diff options
Diffstat (limited to 'arch/powerpc/kernel')
150 files changed, 65628 insertions, 0 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile new file mode 100644 index 00000000..f5808a35 --- /dev/null +++ b/arch/powerpc/kernel/Makefile @@ -0,0 +1,165 @@ +# +# Makefile for the linux kernel. +# + +CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' + +subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror + +ifeq ($(CONFIG_PPC64),y) +CFLAGS_prom_init.o += -mno-minimal-toc +endif +ifeq ($(CONFIG_PPC32),y) +CFLAGS_prom_init.o += -fPIC +CFLAGS_btext.o += -fPIC +endif + +ifdef CONFIG_FUNCTION_TRACER +# Do not trace early boot code +CFLAGS_REMOVE_cputable.o = -pg -mno-sched-epilog +CFLAGS_REMOVE_prom_init.o = -pg -mno-sched-epilog +CFLAGS_REMOVE_btext.o = -pg -mno-sched-epilog +CFLAGS_REMOVE_prom.o = -pg -mno-sched-epilog +# do not trace tracer code +CFLAGS_REMOVE_ftrace.o = -pg -mno-sched-epilog +# timers used by tracing +CFLAGS_REMOVE_time.o = -pg -mno-sched-epilog +endif + +obj-y := cputable.o ptrace.o syscalls.o \ + irq.o align.o signal_32.o pmc.o vdso.o \ + init_task.o process.o systbl.o idle.o \ + signal.o sysfs.o cacheinfo.o time.o \ + prom.o traps.o setup-common.o \ + udbg.o misc.o io.o dma.o \ + misc_$(CONFIG_WORD_SIZE).o vdso32/ +obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ + signal_64.o ptrace32.o \ + paca.o nvram_64.o firmware.o +obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o +obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o +obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power7.o +obj64-$(CONFIG_RELOCATABLE) += reloc_64.o +obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o +obj-$(CONFIG_PPC_A2) += cpu_setup_a2.o +obj-$(CONFIG_PPC64) += vdso64/ +obj-$(CONFIG_ALTIVEC) += vecemu.o +obj-$(CONFIG_PPC_970_NAP) += idle_power4.o +obj-$(CONFIG_PPC_P7_NAP) += idle_power7.o +obj-$(CONFIG_PPC_OF) += of_platform.o prom_parse.o +obj-$(CONFIG_PPC_CLOCK) += clock.o +procfs-y := proc_powerpc.o +obj-$(CONFIG_PROC_FS) += $(procfs-y) +rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI) := rtas_pci.o +obj-$(CONFIG_PPC_RTAS) += rtas.o rtas-rtc.o $(rtaspci-y-y) +obj-$(CONFIG_PPC_RTAS_DAEMON) += rtasd.o +obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o +obj-$(CONFIG_RTAS_PROC) += rtas-proc.o +obj-$(CONFIG_LPARCFG) += lparcfg.o +obj-$(CONFIG_IBMVIO) += vio.o +obj-$(CONFIG_IBMEBUS) += ibmebus.o +obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +obj-$(CONFIG_FA_DUMP) += fadump.o +ifeq ($(CONFIG_PPC32),y) +obj-$(CONFIG_E500) += idle_e500.o +endif +obj-$(CONFIG_6xx) += idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o +obj-$(CONFIG_TAU) += tau_6xx.o +obj-$(CONFIG_HIBERNATION) += swsusp.o suspend.o +ifeq ($(CONFIG_FSL_BOOKE),y) +obj-$(CONFIG_HIBERNATION) += swsusp_booke.o +else +obj-$(CONFIG_HIBERNATION) += swsusp_$(CONFIG_WORD_SIZE).o +endif +obj64-$(CONFIG_HIBERNATION) += swsusp_asm64.o +obj-$(CONFIG_MODULES) += module.o module_$(CONFIG_WORD_SIZE).o +obj-$(CONFIG_44x) += cpu_setup_44x.o +obj-$(CONFIG_PPC_FSL_BOOK3E) += cpu_setup_fsl_booke.o dbell.o +obj-$(CONFIG_PPC_BOOK3E_64) += dbell.o +obj-$(CONFIG_JUMP_LABEL) += jump_label.o + +extra-y := head_$(CONFIG_WORD_SIZE).o +extra-$(CONFIG_40x) := head_40x.o +extra-$(CONFIG_44x) := head_44x.o +extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o +extra-$(CONFIG_8xx) := head_8xx.o +extra-y += vmlinux.lds + +obj-$(CONFIG_RELOCATABLE_PPC32) += reloc_32.o + +obj-$(CONFIG_PPC32) += entry_32.o setup_32.o +obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o +obj-$(CONFIG_KGDB) += kgdb.o +obj-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init.o +obj-$(CONFIG_MODULES) += ppc_ksyms.o +obj-$(CONFIG_BOOTX_TEXT) += btext.o +obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_KPROBES) += kprobes.o +obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o +obj-$(CONFIG_STACKTRACE) += stacktrace.o +obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o + +pci64-$(CONFIG_PPC64) += pci_dn.o isa-bridge.o +obj-$(CONFIG_PCI) += pci_$(CONFIG_WORD_SIZE).o $(pci64-y) \ + pci-common.o pci_of_scan.o +obj-$(CONFIG_PCI_MSI) += msi.o +obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o \ + machine_kexec_$(CONFIG_WORD_SIZE).o +obj-$(CONFIG_AUDIT) += audit.o +obj64-$(CONFIG_AUDIT) += compat_audit.o + +obj-$(CONFIG_PPC_IO_WORKAROUNDS) += io-workarounds.o + +obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o +obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o + +obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o + +ifneq ($(CONFIG_PPC_INDIRECT_IO),y) +obj-y += iomap.o +endif + +obj-$(CONFIG_PPC64) += $(obj64-y) +obj-$(CONFIG_PPC32) += $(obj32-y) + +ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),) +obj-y += ppc_save_regs.o +endif + +obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o + +# Disable GCOV in odd or sensitive code +GCOV_PROFILE_prom_init.o := n +GCOV_PROFILE_ftrace.o := n +GCOV_PROFILE_machine_kexec_64.o := n +GCOV_PROFILE_machine_kexec_32.o := n +GCOV_PROFILE_kprobes.o := n + +extra-$(CONFIG_PPC_FPU) += fpu.o +extra-$(CONFIG_ALTIVEC) += vector.o +extra-$(CONFIG_PPC64) += entry_64.o + +extra-y += systbl_chk.i +$(obj)/systbl.o: systbl_chk + +quiet_cmd_systbl_chk = CALL $< + cmd_systbl_chk = $(CONFIG_SHELL) $< $(obj)/systbl_chk.i + +PHONY += systbl_chk +systbl_chk: $(src)/systbl_chk.sh $(obj)/systbl_chk.i + $(call cmd,systbl_chk) + +ifeq ($(CONFIG_PPC_OF_BOOT_TRAMPOLINE),y) +$(obj)/built-in.o: prom_init_check + +quiet_cmd_prom_init_check = CALL $< + cmd_prom_init_check = $(CONFIG_SHELL) $< "$(NM)" "$(obj)/prom_init.o" + +PHONY += prom_init_check +prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o + $(call cmd,prom_init_check) +endif + +clean-files := vmlinux.lds diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c new file mode 100644 index 00000000..ee5b690a --- /dev/null +++ b/arch/powerpc/kernel/align.c @@ -0,0 +1,973 @@ +/* align.c - handle alignment exceptions for the Power PC. + * + * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au> + * Copyright (c) 1998-1999 TiVo, Inc. + * PowerPC 403GCX modifications. + * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu> + * PowerPC 403GCX/405GP modifications. + * Copyright (c) 2001-2002 PPC64 team, IBM Corp + * 64-bit and Power4 support + * Copyright (c) 2005 Benjamin Herrenschmidt, IBM Corp + * <benh@kernel.crashing.org> + * Merge ppc32 and ppc64 implementations + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <asm/processor.h> +#include <asm/uaccess.h> +#include <asm/cache.h> +#include <asm/cputable.h> +#include <asm/emulated_ops.h> +#include <asm/switch_to.h> + +struct aligninfo { + unsigned char len; + unsigned char flags; +}; + +#define IS_XFORM(inst) (((inst) >> 26) == 31) +#define IS_DSFORM(inst) (((inst) >> 26) >= 56) + +#define INVALID { 0, 0 } + +/* Bits in the flags field */ +#define LD 0 /* load */ +#define ST 1 /* store */ +#define SE 2 /* sign-extend value, or FP ld/st as word */ +#define F 4 /* to/from fp regs */ +#define U 8 /* update index register */ +#define M 0x10 /* multiple load/store */ +#define SW 0x20 /* byte swap */ +#define S 0x40 /* single-precision fp or... */ +#define SX 0x40 /* ... byte count in XER */ +#define HARD 0x80 /* string, stwcx. */ +#define E4 0x40 /* SPE endianness is word */ +#define E8 0x80 /* SPE endianness is double word */ +#define SPLT 0x80 /* VSX SPLAT load */ + +/* DSISR bits reported for a DCBZ instruction: */ +#define DCBZ 0x5f /* 8xx/82xx dcbz faults when cache not enabled */ + +#define SWAP(a, b) (t = (a), (a) = (b), (b) = t) + +/* + * The PowerPC stores certain bits of the instruction that caused the + * alignment exception in the DSISR register. This array maps those + * bits to information about the operand length and what the + * instruction would do. + */ +static struct aligninfo aligninfo[128] = { + { 4, LD }, /* 00 0 0000: lwz / lwarx */ + INVALID, /* 00 0 0001 */ + { 4, ST }, /* 00 0 0010: stw */ + INVALID, /* 00 0 0011 */ + { 2, LD }, /* 00 0 0100: lhz */ + { 2, LD+SE }, /* 00 0 0101: lha */ + { 2, ST }, /* 00 0 0110: sth */ + { 4, LD+M }, /* 00 0 0111: lmw */ + { 4, LD+F+S }, /* 00 0 1000: lfs */ + { 8, LD+F }, /* 00 0 1001: lfd */ + { 4, ST+F+S }, /* 00 0 1010: stfs */ + { 8, ST+F }, /* 00 0 1011: stfd */ + INVALID, /* 00 0 1100 */ + { 8, LD }, /* 00 0 1101: ld/ldu/lwa */ + INVALID, /* 00 0 1110 */ + { 8, ST }, /* 00 0 1111: std/stdu */ + { 4, LD+U }, /* 00 1 0000: lwzu */ + INVALID, /* 00 1 0001 */ + { 4, ST+U }, /* 00 1 0010: stwu */ + INVALID, /* 00 1 0011 */ + { 2, LD+U }, /* 00 1 0100: lhzu */ + { 2, LD+SE+U }, /* 00 1 0101: lhau */ + { 2, ST+U }, /* 00 1 0110: sthu */ + { 4, ST+M }, /* 00 1 0111: stmw */ + { 4, LD+F+S+U }, /* 00 1 1000: lfsu */ + { 8, LD+F+U }, /* 00 1 1001: lfdu */ + { 4, ST+F+S+U }, /* 00 1 1010: stfsu */ + { 8, ST+F+U }, /* 00 1 1011: stfdu */ + { 16, LD+F }, /* 00 1 1100: lfdp */ + INVALID, /* 00 1 1101 */ + { 16, ST+F }, /* 00 1 1110: stfdp */ + INVALID, /* 00 1 1111 */ + { 8, LD }, /* 01 0 0000: ldx */ + INVALID, /* 01 0 0001 */ + { 8, ST }, /* 01 0 0010: stdx */ + INVALID, /* 01 0 0011 */ + INVALID, /* 01 0 0100 */ + { 4, LD+SE }, /* 01 0 0101: lwax */ + INVALID, /* 01 0 0110 */ + INVALID, /* 01 0 0111 */ + { 4, LD+M+HARD+SX }, /* 01 0 1000: lswx */ + { 4, LD+M+HARD }, /* 01 0 1001: lswi */ + { 4, ST+M+HARD+SX }, /* 01 0 1010: stswx */ + { 4, ST+M+HARD }, /* 01 0 1011: stswi */ + INVALID, /* 01 0 1100 */ + { 8, LD+U }, /* 01 0 1101: ldu */ + INVALID, /* 01 0 1110 */ + { 8, ST+U }, /* 01 0 1111: stdu */ + { 8, LD+U }, /* 01 1 0000: ldux */ + INVALID, /* 01 1 0001 */ + { 8, ST+U }, /* 01 1 0010: stdux */ + INVALID, /* 01 1 0011 */ + INVALID, /* 01 1 0100 */ + { 4, LD+SE+U }, /* 01 1 0101: lwaux */ + INVALID, /* 01 1 0110 */ + INVALID, /* 01 1 0111 */ + INVALID, /* 01 1 1000 */ + INVALID, /* 01 1 1001 */ + INVALID, /* 01 1 1010 */ + INVALID, /* 01 1 1011 */ + INVALID, /* 01 1 1100 */ + INVALID, /* 01 1 1101 */ + INVALID, /* 01 1 1110 */ + INVALID, /* 01 1 1111 */ + INVALID, /* 10 0 0000 */ + INVALID, /* 10 0 0001 */ + INVALID, /* 10 0 0010: stwcx. */ + INVALID, /* 10 0 0011 */ + INVALID, /* 10 0 0100 */ + INVALID, /* 10 0 0101 */ + INVALID, /* 10 0 0110 */ + INVALID, /* 10 0 0111 */ + { 4, LD+SW }, /* 10 0 1000: lwbrx */ + INVALID, /* 10 0 1001 */ + { 4, ST+SW }, /* 10 0 1010: stwbrx */ + INVALID, /* 10 0 1011 */ + { 2, LD+SW }, /* 10 0 1100: lhbrx */ + { 4, LD+SE }, /* 10 0 1101 lwa */ + { 2, ST+SW }, /* 10 0 1110: sthbrx */ + INVALID, /* 10 0 1111 */ + INVALID, /* 10 1 0000 */ + INVALID, /* 10 1 0001 */ + INVALID, /* 10 1 0010 */ + INVALID, /* 10 1 0011 */ + INVALID, /* 10 1 0100 */ + INVALID, /* 10 1 0101 */ + INVALID, /* 10 1 0110 */ + INVALID, /* 10 1 0111 */ + INVALID, /* 10 1 1000 */ + INVALID, /* 10 1 1001 */ + INVALID, /* 10 1 1010 */ + INVALID, /* 10 1 1011 */ + INVALID, /* 10 1 1100 */ + INVALID, /* 10 1 1101 */ + INVALID, /* 10 1 1110 */ + { 0, ST+HARD }, /* 10 1 1111: dcbz */ + { 4, LD }, /* 11 0 0000: lwzx */ + INVALID, /* 11 0 0001 */ + { 4, ST }, /* 11 0 0010: stwx */ + INVALID, /* 11 0 0011 */ + { 2, LD }, /* 11 0 0100: lhzx */ + { 2, LD+SE }, /* 11 0 0101: lhax */ + { 2, ST }, /* 11 0 0110: sthx */ + INVALID, /* 11 0 0111 */ + { 4, LD+F+S }, /* 11 0 1000: lfsx */ + { 8, LD+F }, /* 11 0 1001: lfdx */ + { 4, ST+F+S }, /* 11 0 1010: stfsx */ + { 8, ST+F }, /* 11 0 1011: stfdx */ + { 16, LD+F }, /* 11 0 1100: lfdpx */ + { 4, LD+F+SE }, /* 11 0 1101: lfiwax */ + { 16, ST+F }, /* 11 0 1110: stfdpx */ + { 4, ST+F }, /* 11 0 1111: stfiwx */ + { 4, LD+U }, /* 11 1 0000: lwzux */ + INVALID, /* 11 1 0001 */ + { 4, ST+U }, /* 11 1 0010: stwux */ + INVALID, /* 11 1 0011 */ + { 2, LD+U }, /* 11 1 0100: lhzux */ + { 2, LD+SE+U }, /* 11 1 0101: lhaux */ + { 2, ST+U }, /* 11 1 0110: sthux */ + INVALID, /* 11 1 0111 */ + { 4, LD+F+S+U }, /* 11 1 1000: lfsux */ + { 8, LD+F+U }, /* 11 1 1001: lfdux */ + { 4, ST+F+S+U }, /* 11 1 1010: stfsux */ + { 8, ST+F+U }, /* 11 1 1011: stfdux */ + INVALID, /* 11 1 1100 */ + { 4, LD+F }, /* 11 1 1101: lfiwzx */ + INVALID, /* 11 1 1110 */ + INVALID, /* 11 1 1111 */ +}; + +/* + * Create a DSISR value from the instruction + */ +static inline unsigned make_dsisr(unsigned instr) +{ + unsigned dsisr; + + + /* bits 6:15 --> 22:31 */ + dsisr = (instr & 0x03ff0000) >> 16; + + if (IS_XFORM(instr)) { + /* bits 29:30 --> 15:16 */ + dsisr |= (instr & 0x00000006) << 14; + /* bit 25 --> 17 */ + dsisr |= (instr & 0x00000040) << 8; + /* bits 21:24 --> 18:21 */ + dsisr |= (instr & 0x00000780) << 3; + } else { + /* bit 5 --> 17 */ + dsisr |= (instr & 0x04000000) >> 12; + /* bits 1: 4 --> 18:21 */ + dsisr |= (instr & 0x78000000) >> 17; + /* bits 30:31 --> 12:13 */ + if (IS_DSFORM(instr)) + dsisr |= (instr & 0x00000003) << 18; + } + + return dsisr; +} + +/* + * The dcbz (data cache block zero) instruction + * gives an alignment fault if used on non-cacheable + * memory. We handle the fault mainly for the + * case when we are running with the cache disabled + * for debugging. + */ +static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr) +{ + long __user *p; + int i, size; + +#ifdef __powerpc64__ + size = ppc64_caches.dline_size; +#else + size = L1_CACHE_BYTES; +#endif + p = (long __user *) (regs->dar & -size); + if (user_mode(regs) && !access_ok(VERIFY_WRITE, p, size)) + return -EFAULT; + for (i = 0; i < size / sizeof(long); ++i) + if (__put_user_inatomic(0, p+i)) + return -EFAULT; + return 1; +} + +/* + * Emulate load & store multiple instructions + * On 64-bit machines, these instructions only affect/use the + * bottom 4 bytes of each register, and the loads clear the + * top 4 bytes of the affected register. + */ +#ifdef CONFIG_PPC64 +#define REG_BYTE(rp, i) *((u8 *)((rp) + ((i) >> 2)) + ((i) & 3) + 4) +#else +#define REG_BYTE(rp, i) *((u8 *)(rp) + (i)) +#endif + +#define SWIZ_PTR(p) ((unsigned char __user *)((p) ^ swiz)) + +static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr, + unsigned int reg, unsigned int nb, + unsigned int flags, unsigned int instr, + unsigned long swiz) +{ + unsigned long *rptr; + unsigned int nb0, i, bswiz; + unsigned long p; + + /* + * We do not try to emulate 8 bytes multiple as they aren't really + * available in our operating environments and we don't try to + * emulate multiples operations in kernel land as they should never + * be used/generated there at least not on unaligned boundaries + */ + if (unlikely((nb > 4) || !user_mode(regs))) + return 0; + + /* lmw, stmw, lswi/x, stswi/x */ + nb0 = 0; + if (flags & HARD) { + if (flags & SX) { + nb = regs->xer & 127; + if (nb == 0) + return 1; + } else { + unsigned long pc = regs->nip ^ (swiz & 4); + + if (__get_user_inatomic(instr, + (unsigned int __user *)pc)) + return -EFAULT; + if (swiz == 0 && (flags & SW)) + instr = cpu_to_le32(instr); + nb = (instr >> 11) & 0x1f; + if (nb == 0) + nb = 32; + } + if (nb + reg * 4 > 128) { + nb0 = nb + reg * 4 - 128; + nb = 128 - reg * 4; + } + } else { + /* lwm, stmw */ + nb = (32 - reg) * 4; + } + + if (!access_ok((flags & ST ? VERIFY_WRITE: VERIFY_READ), addr, nb+nb0)) + return -EFAULT; /* bad address */ + + rptr = ®s->gpr[reg]; + p = (unsigned long) addr; + bswiz = (flags & SW)? 3: 0; + + if (!(flags & ST)) { + /* + * This zeroes the top 4 bytes of the affected registers + * in 64-bit mode, and also zeroes out any remaining + * bytes of the last register for lsw*. + */ + memset(rptr, 0, ((nb + 3) / 4) * sizeof(unsigned long)); + if (nb0 > 0) + memset(®s->gpr[0], 0, + ((nb0 + 3) / 4) * sizeof(unsigned long)); + + for (i = 0; i < nb; ++i, ++p) + if (__get_user_inatomic(REG_BYTE(rptr, i ^ bswiz), + SWIZ_PTR(p))) + return -EFAULT; + if (nb0 > 0) { + rptr = ®s->gpr[0]; + addr += nb; + for (i = 0; i < nb0; ++i, ++p) + if (__get_user_inatomic(REG_BYTE(rptr, + i ^ bswiz), + SWIZ_PTR(p))) + return -EFAULT; + } + + } else { + for (i = 0; i < nb; ++i, ++p) + if (__put_user_inatomic(REG_BYTE(rptr, i ^ bswiz), + SWIZ_PTR(p))) + return -EFAULT; + if (nb0 > 0) { + rptr = ®s->gpr[0]; + addr += nb; + for (i = 0; i < nb0; ++i, ++p) + if (__put_user_inatomic(REG_BYTE(rptr, + i ^ bswiz), + SWIZ_PTR(p))) + return -EFAULT; + } + } + return 1; +} + +/* + * Emulate floating-point pair loads and stores. + * Only POWER6 has these instructions, and it does true little-endian, + * so we don't need the address swizzling. + */ +static int emulate_fp_pair(unsigned char __user *addr, unsigned int reg, + unsigned int flags) +{ + char *ptr0 = (char *) ¤t->thread.TS_FPR(reg); + char *ptr1 = (char *) ¤t->thread.TS_FPR(reg+1); + int i, ret, sw = 0; + + if (!(flags & F)) + return 0; + if (reg & 1) + return 0; /* invalid form: FRS/FRT must be even */ + if (flags & SW) + sw = 7; + ret = 0; + for (i = 0; i < 8; ++i) { + if (!(flags & ST)) { + ret |= __get_user(ptr0[i^sw], addr + i); + ret |= __get_user(ptr1[i^sw], addr + i + 8); + } else { + ret |= __put_user(ptr0[i^sw], addr + i); + ret |= __put_user(ptr1[i^sw], addr + i + 8); + } + } + if (ret) + return -EFAULT; + return 1; /* exception handled and fixed up */ +} + +#ifdef CONFIG_SPE + +static struct aligninfo spe_aligninfo[32] = { + { 8, LD+E8 }, /* 0 00 00: evldd[x] */ + { 8, LD+E4 }, /* 0 00 01: evldw[x] */ + { 8, LD }, /* 0 00 10: evldh[x] */ + INVALID, /* 0 00 11 */ + { 2, LD }, /* 0 01 00: evlhhesplat[x] */ + INVALID, /* 0 01 01 */ + { 2, LD }, /* 0 01 10: evlhhousplat[x] */ + { 2, LD+SE }, /* 0 01 11: evlhhossplat[x] */ + { 4, LD }, /* 0 10 00: evlwhe[x] */ + INVALID, /* 0 10 01 */ + { 4, LD }, /* 0 10 10: evlwhou[x] */ + { 4, LD+SE }, /* 0 10 11: evlwhos[x] */ + { 4, LD+E4 }, /* 0 11 00: evlwwsplat[x] */ + INVALID, /* 0 11 01 */ + { 4, LD }, /* 0 11 10: evlwhsplat[x] */ + INVALID, /* 0 11 11 */ + + { 8, ST+E8 }, /* 1 00 00: evstdd[x] */ + { 8, ST+E4 }, /* 1 00 01: evstdw[x] */ + { 8, ST }, /* 1 00 10: evstdh[x] */ + INVALID, /* 1 00 11 */ + INVALID, /* 1 01 00 */ + INVALID, /* 1 01 01 */ + INVALID, /* 1 01 10 */ + INVALID, /* 1 01 11 */ + { 4, ST }, /* 1 10 00: evstwhe[x] */ + INVALID, /* 1 10 01 */ + { 4, ST }, /* 1 10 10: evstwho[x] */ + INVALID, /* 1 10 11 */ + { 4, ST+E4 }, /* 1 11 00: evstwwe[x] */ + INVALID, /* 1 11 01 */ + { 4, ST+E4 }, /* 1 11 10: evstwwo[x] */ + INVALID, /* 1 11 11 */ +}; + +#define EVLDD 0x00 +#define EVLDW 0x01 +#define EVLDH 0x02 +#define EVLHHESPLAT 0x04 +#define EVLHHOUSPLAT 0x06 +#define EVLHHOSSPLAT 0x07 +#define EVLWHE 0x08 +#define EVLWHOU 0x0A +#define EVLWHOS 0x0B +#define EVLWWSPLAT 0x0C +#define EVLWHSPLAT 0x0E +#define EVSTDD 0x10 +#define EVSTDW 0x11 +#define EVSTDH 0x12 +#define EVSTWHE 0x18 +#define EVSTWHO 0x1A +#define EVSTWWE 0x1C +#define EVSTWWO 0x1E + +/* + * Emulate SPE loads and stores. + * Only Book-E has these instructions, and it does true little-endian, + * so we don't need the address swizzling. + */ +static int emulate_spe(struct pt_regs *regs, unsigned int reg, + unsigned int instr) +{ + int t, ret; + union { + u64 ll; + u32 w[2]; + u16 h[4]; + u8 v[8]; + } data, temp; + unsigned char __user *p, *addr; + unsigned long *evr = ¤t->thread.evr[reg]; + unsigned int nb, flags; + + instr = (instr >> 1) & 0x1f; + + /* DAR has the operand effective address */ + addr = (unsigned char __user *)regs->dar; + + nb = spe_aligninfo[instr].len; + flags = spe_aligninfo[instr].flags; + + /* Verify the address of the operand */ + if (unlikely(user_mode(regs) && + !access_ok((flags & ST ? VERIFY_WRITE : VERIFY_READ), + addr, nb))) + return -EFAULT; + + /* userland only */ + if (unlikely(!user_mode(regs))) + return 0; + + flush_spe_to_thread(current); + + /* If we are loading, get the data from user space, else + * get it from register values + */ + if (flags & ST) { + data.ll = 0; + switch (instr) { + case EVSTDD: + case EVSTDW: + case EVSTDH: + data.w[0] = *evr; + data.w[1] = regs->gpr[reg]; + break; + case EVSTWHE: + data.h[2] = *evr >> 16; + data.h[3] = regs->gpr[reg] >> 16; + break; + case EVSTWHO: + data.h[2] = *evr & 0xffff; + data.h[3] = regs->gpr[reg] & 0xffff; + break; + case EVSTWWE: + data.w[1] = *evr; + break; + case EVSTWWO: + data.w[1] = regs->gpr[reg]; + break; + default: + return -EINVAL; + } + } else { + temp.ll = data.ll = 0; + ret = 0; + p = addr; + + switch (nb) { + case 8: + ret |= __get_user_inatomic(temp.v[0], p++); + ret |= __get_user_inatomic(temp.v[1], p++); + ret |= __get_user_inatomic(temp.v[2], p++); + ret |= __get_user_inatomic(temp.v[3], p++); + case 4: + ret |= __get_user_inatomic(temp.v[4], p++); + ret |= __get_user_inatomic(temp.v[5], p++); + case 2: + ret |= __get_user_inatomic(temp.v[6], p++); + ret |= __get_user_inatomic(temp.v[7], p++); + if (unlikely(ret)) + return -EFAULT; + } + + switch (instr) { + case EVLDD: + case EVLDW: + case EVLDH: + data.ll = temp.ll; + break; + case EVLHHESPLAT: + data.h[0] = temp.h[3]; + data.h[2] = temp.h[3]; + break; + case EVLHHOUSPLAT: + case EVLHHOSSPLAT: + data.h[1] = temp.h[3]; + data.h[3] = temp.h[3]; + break; + case EVLWHE: + data.h[0] = temp.h[2]; + data.h[2] = temp.h[3]; + break; + case EVLWHOU: + case EVLWHOS: + data.h[1] = temp.h[2]; + data.h[3] = temp.h[3]; + break; + case EVLWWSPLAT: + data.w[0] = temp.w[1]; + data.w[1] = temp.w[1]; + break; + case EVLWHSPLAT: + data.h[0] = temp.h[2]; + data.h[1] = temp.h[2]; + data.h[2] = temp.h[3]; + data.h[3] = temp.h[3]; + break; + default: + return -EINVAL; + } + } + + if (flags & SW) { + switch (flags & 0xf0) { + case E8: + SWAP(data.v[0], data.v[7]); + SWAP(data.v[1], data.v[6]); + SWAP(data.v[2], data.v[5]); + SWAP(data.v[3], data.v[4]); + break; + case E4: + + SWAP(data.v[0], data.v[3]); + SWAP(data.v[1], data.v[2]); + SWAP(data.v[4], data.v[7]); + SWAP(data.v[5], data.v[6]); + break; + /* Its half word endian */ + default: + SWAP(data.v[0], data.v[1]); + SWAP(data.v[2], data.v[3]); + SWAP(data.v[4], data.v[5]); + SWAP(data.v[6], data.v[7]); + break; + } + } + + if (flags & SE) { + data.w[0] = (s16)data.h[1]; + data.w[1] = (s16)data.h[3]; + } + + /* Store result to memory or update registers */ + if (flags & ST) { + ret = 0; + p = addr; + switch (nb) { + case 8: + ret |= __put_user_inatomic(data.v[0], p++); + ret |= __put_user_inatomic(data.v[1], p++); + ret |= __put_user_inatomic(data.v[2], p++); + ret |= __put_user_inatomic(data.v[3], p++); + case 4: + ret |= __put_user_inatomic(data.v[4], p++); + ret |= __put_user_inatomic(data.v[5], p++); + case 2: + ret |= __put_user_inatomic(data.v[6], p++); + ret |= __put_user_inatomic(data.v[7], p++); + } + if (unlikely(ret)) + return -EFAULT; + } else { + *evr = data.w[0]; + regs->gpr[reg] = data.w[1]; + } + + return 1; +} +#endif /* CONFIG_SPE */ + +#ifdef CONFIG_VSX +/* + * Emulate VSX instructions... + */ +static int emulate_vsx(unsigned char __user *addr, unsigned int reg, + unsigned int areg, struct pt_regs *regs, + unsigned int flags, unsigned int length, + unsigned int elsize) +{ + char *ptr; + unsigned long *lptr; + int ret = 0; + int sw = 0; + int i, j; + + flush_vsx_to_thread(current); + + if (reg < 32) + ptr = (char *) ¤t->thread.TS_FPR(reg); + else + ptr = (char *) ¤t->thread.vr[reg - 32]; + + lptr = (unsigned long *) ptr; + + if (flags & SW) + sw = elsize-1; + + for (j = 0; j < length; j += elsize) { + for (i = 0; i < elsize; ++i) { + if (flags & ST) + ret |= __put_user(ptr[i^sw], addr + i); + else + ret |= __get_user(ptr[i^sw], addr + i); + } + ptr += elsize; + addr += elsize; + } + + if (!ret) { + if (flags & U) + regs->gpr[areg] = regs->dar; + + /* Splat load copies the same data to top and bottom 8 bytes */ + if (flags & SPLT) + lptr[1] = lptr[0]; + /* For 8 byte loads, zero the top 8 bytes */ + else if (!(flags & ST) && (8 == length)) + lptr[1] = 0; + } else + return -EFAULT; + + return 1; +} +#endif + +/* + * Called on alignment exception. Attempts to fixup + * + * Return 1 on success + * Return 0 if unable to handle the interrupt + * Return -EFAULT if data address is bad + */ + +int fix_alignment(struct pt_regs *regs) +{ + unsigned int instr, nb, flags, instruction = 0; + unsigned int reg, areg; + unsigned int dsisr; + unsigned char __user *addr; + unsigned long p, swiz; + int ret, t; + union { + u64 ll; + double dd; + unsigned char v[8]; + struct { + unsigned hi32; + int low32; + } x32; + struct { + unsigned char hi48[6]; + short low16; + } x16; + } data; + + /* + * We require a complete register set, if not, then our assembly + * is broken + */ + CHECK_FULL_REGS(regs); + + dsisr = regs->dsisr; + + /* Some processors don't provide us with a DSISR we can use here, + * let's make one up from the instruction + */ + if (cpu_has_feature(CPU_FTR_NODSISRALIGN)) { + unsigned long pc = regs->nip; + + if (cpu_has_feature(CPU_FTR_PPC_LE) && (regs->msr & MSR_LE)) + pc ^= 4; + if (unlikely(__get_user_inatomic(instr, + (unsigned int __user *)pc))) + return -EFAULT; + if (cpu_has_feature(CPU_FTR_REAL_LE) && (regs->msr & MSR_LE)) + instr = cpu_to_le32(instr); + dsisr = make_dsisr(instr); + instruction = instr; + } + + /* extract the operation and registers from the dsisr */ + reg = (dsisr >> 5) & 0x1f; /* source/dest register */ + areg = dsisr & 0x1f; /* register to update */ + +#ifdef CONFIG_SPE + if ((instr >> 26) == 0x4) { + PPC_WARN_ALIGNMENT(spe, regs); + return emulate_spe(regs, reg, instr); + } +#endif + + instr = (dsisr >> 10) & 0x7f; + instr |= (dsisr >> 13) & 0x60; + + /* Lookup the operation in our table */ + nb = aligninfo[instr].len; + flags = aligninfo[instr].flags; + + /* Byteswap little endian loads and stores */ + swiz = 0; + if (regs->msr & MSR_LE) { + flags ^= SW; + /* + * So-called "PowerPC little endian" mode works by + * swizzling addresses rather than by actually doing + * any byte-swapping. To emulate this, we XOR each + * byte address with 7. We also byte-swap, because + * the processor's address swizzling depends on the + * operand size (it xors the address with 7 for bytes, + * 6 for halfwords, 4 for words, 0 for doublewords) but + * we will xor with 7 and load/store each byte separately. + */ + if (cpu_has_feature(CPU_FTR_PPC_LE)) + swiz = 7; + } + + /* DAR has the operand effective address */ + addr = (unsigned char __user *)regs->dar; + +#ifdef CONFIG_VSX + if ((instruction & 0xfc00003e) == 0x7c000018) { + unsigned int elsize; + + /* Additional register addressing bit (64 VSX vs 32 FPR/GPR) */ + reg |= (instruction & 0x1) << 5; + /* Simple inline decoder instead of a table */ + /* VSX has only 8 and 16 byte memory accesses */ + nb = 8; + if (instruction & 0x200) + nb = 16; + + /* Vector stores in little-endian mode swap individual + elements, so process them separately */ + elsize = 4; + if (instruction & 0x80) + elsize = 8; + + flags = 0; + if (regs->msr & MSR_LE) + flags |= SW; + if (instruction & 0x100) + flags |= ST; + if (instruction & 0x040) + flags |= U; + /* splat load needs a special decoder */ + if ((instruction & 0x400) == 0){ + flags |= SPLT; + nb = 8; + } + PPC_WARN_ALIGNMENT(vsx, regs); + return emulate_vsx(addr, reg, areg, regs, flags, nb, elsize); + } +#endif + /* A size of 0 indicates an instruction we don't support, with + * the exception of DCBZ which is handled as a special case here + */ + if (instr == DCBZ) { + PPC_WARN_ALIGNMENT(dcbz, regs); + return emulate_dcbz(regs, addr); + } + if (unlikely(nb == 0)) + return 0; + + /* Load/Store Multiple instructions are handled in their own + * function + */ + if (flags & M) { + PPC_WARN_ALIGNMENT(multiple, regs); + return emulate_multiple(regs, addr, reg, nb, + flags, instr, swiz); + } + + /* Verify the address of the operand */ + if (unlikely(user_mode(regs) && + !access_ok((flags & ST ? VERIFY_WRITE : VERIFY_READ), + addr, nb))) + return -EFAULT; + + /* Force the fprs into the save area so we can reference them */ + if (flags & F) { + /* userland only */ + if (unlikely(!user_mode(regs))) + return 0; + flush_fp_to_thread(current); + } + + /* Special case for 16-byte FP loads and stores */ + if (nb == 16) { + PPC_WARN_ALIGNMENT(fp_pair, regs); + return emulate_fp_pair(addr, reg, flags); + } + + PPC_WARN_ALIGNMENT(unaligned, regs); + + /* If we are loading, get the data from user space, else + * get it from register values + */ + if (!(flags & ST)) { + data.ll = 0; + ret = 0; + p = (unsigned long) addr; + switch (nb) { + case 8: + ret |= __get_user_inatomic(data.v[0], SWIZ_PTR(p++)); + ret |= __get_user_inatomic(data.v[1], SWIZ_PTR(p++)); + ret |= __get_user_inatomic(data.v[2], SWIZ_PTR(p++)); + ret |= __get_user_inatomic(data.v[3], SWIZ_PTR(p++)); + case 4: + ret |= __get_user_inatomic(data.v[4], SWIZ_PTR(p++)); + ret |= __get_user_inatomic(data.v[5], SWIZ_PTR(p++)); + case 2: + ret |= __get_user_inatomic(data.v[6], SWIZ_PTR(p++)); + ret |= __get_user_inatomic(data.v[7], SWIZ_PTR(p++)); + if (unlikely(ret)) + return -EFAULT; + } + } else if (flags & F) { + data.dd = current->thread.TS_FPR(reg); + if (flags & S) { + /* Single-precision FP store requires conversion... */ +#ifdef CONFIG_PPC_FPU + preempt_disable(); + enable_kernel_fp(); + cvt_df(&data.dd, (float *)&data.v[4]); + preempt_enable(); +#else + return 0; +#endif + } + } else + data.ll = regs->gpr[reg]; + + if (flags & SW) { + switch (nb) { + case 8: + SWAP(data.v[0], data.v[7]); + SWAP(data.v[1], data.v[6]); + SWAP(data.v[2], data.v[5]); + SWAP(data.v[3], data.v[4]); + break; + case 4: + SWAP(data.v[4], data.v[7]); + SWAP(data.v[5], data.v[6]); + break; + case 2: + SWAP(data.v[6], data.v[7]); + break; + } + } + + /* Perform other misc operations like sign extension + * or floating point single precision conversion + */ + switch (flags & ~(U|SW)) { + case LD+SE: /* sign extending integer loads */ + case LD+F+SE: /* sign extend for lfiwax */ + if ( nb == 2 ) + data.ll = data.x16.low16; + else /* nb must be 4 */ + data.ll = data.x32.low32; + break; + + /* Single-precision FP load requires conversion... */ + case LD+F+S: +#ifdef CONFIG_PPC_FPU + preempt_disable(); + enable_kernel_fp(); + cvt_fd((float *)&data.v[4], &data.dd); + preempt_enable(); +#else + return 0; +#endif + break; + } + + /* Store result to memory or update registers */ + if (flags & ST) { + ret = 0; + p = (unsigned long) addr; + switch (nb) { + case 8: + ret |= __put_user_inatomic(data.v[0], SWIZ_PTR(p++)); + ret |= __put_user_inatomic(data.v[1], SWIZ_PTR(p++)); + ret |= __put_user_inatomic(data.v[2], SWIZ_PTR(p++)); + ret |= __put_user_inatomic(data.v[3], SWIZ_PTR(p++)); + case 4: + ret |= __put_user_inatomic(data.v[4], SWIZ_PTR(p++)); + ret |= __put_user_inatomic(data.v[5], SWIZ_PTR(p++)); + case 2: + ret |= __put_user_inatomic(data.v[6], SWIZ_PTR(p++)); + ret |= __put_user_inatomic(data.v[7], SWIZ_PTR(p++)); + } + if (unlikely(ret)) + return -EFAULT; + } else if (flags & F) + current->thread.TS_FPR(reg) = data.dd; + else + regs->gpr[reg] = data.ll; + + /* Update RA as needed */ + if (flags & U) + regs->gpr[areg] = regs->dar; + + return 1; +} diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c new file mode 100644 index 00000000..34b8afe9 --- /dev/null +++ b/arch/powerpc/kernel/asm-offsets.c @@ -0,0 +1,619 @@ +/* + * This program is used to generate definitions needed by + * assembly language modules. + * + * We use the technique used in the OSF Mach kernel code: + * generate asm statements containing #defines, + * compile this file to assembler, and then extract the + * #defines from the assembly-language output. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/suspend.h> +#include <linux/hrtimer.h> +#ifdef CONFIG_PPC64 +#include <linux/time.h> +#include <linux/hardirq.h> +#endif +#include <linux/kbuild.h> + +#include <asm/io.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/rtas.h> +#include <asm/vdso_datapage.h> +#ifdef CONFIG_PPC64 +#include <asm/paca.h> +#include <asm/lppaca.h> +#include <asm/cache.h> +#include <asm/compat.h> +#include <asm/mmu.h> +#include <asm/hvcall.h> +#include <asm/xics.h> +#endif +#ifdef CONFIG_PPC_POWERNV +#include <asm/opal.h> +#endif +#if defined(CONFIG_KVM) || defined(CONFIG_KVM_GUEST) +#include <linux/kvm_host.h> +#endif +#if defined(CONFIG_KVM) && defined(CONFIG_PPC_BOOK3S) +#include <asm/kvm_book3s.h> +#endif + +#ifdef CONFIG_PPC32 +#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +#include "head_booke.h" +#endif +#endif + +#if defined(CONFIG_PPC_FSL_BOOK3E) +#include "../mm/mmu_decl.h" +#endif + +int main(void) +{ + DEFINE(THREAD, offsetof(struct task_struct, thread)); + DEFINE(MM, offsetof(struct task_struct, mm)); + DEFINE(MMCONTEXTID, offsetof(struct mm_struct, context.id)); +#ifdef CONFIG_PPC64 + DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context)); + DEFINE(SIGSEGV, SIGSEGV); + DEFINE(NMI_MASK, NMI_MASK); + DEFINE(THREAD_DSCR, offsetof(struct thread_struct, dscr)); +#else + DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); +#endif /* CONFIG_PPC64 */ + + DEFINE(KSP, offsetof(struct thread_struct, ksp)); + DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit)); + DEFINE(PT_REGS, offsetof(struct thread_struct, regs)); +#ifdef CONFIG_BOOKE + DEFINE(THREAD_NORMSAVES, offsetof(struct thread_struct, normsave[0])); +#endif + DEFINE(THREAD_FPEXC_MODE, offsetof(struct thread_struct, fpexc_mode)); + DEFINE(THREAD_FPR0, offsetof(struct thread_struct, fpr[0])); + DEFINE(THREAD_FPSCR, offsetof(struct thread_struct, fpscr)); +#ifdef CONFIG_ALTIVEC + DEFINE(THREAD_VR0, offsetof(struct thread_struct, vr[0])); + DEFINE(THREAD_VRSAVE, offsetof(struct thread_struct, vrsave)); + DEFINE(THREAD_VSCR, offsetof(struct thread_struct, vscr)); + DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr)); +#endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_VSX + DEFINE(THREAD_VSR0, offsetof(struct thread_struct, fpr)); + DEFINE(THREAD_USED_VSR, offsetof(struct thread_struct, used_vsr)); +#endif /* CONFIG_VSX */ +#ifdef CONFIG_PPC64 + DEFINE(KSP_VSID, offsetof(struct thread_struct, ksp_vsid)); +#else /* CONFIG_PPC64 */ + DEFINE(PGDIR, offsetof(struct thread_struct, pgdir)); +#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) + DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, dbcr0)); +#endif +#ifdef CONFIG_SPE + DEFINE(THREAD_EVR0, offsetof(struct thread_struct, evr[0])); + DEFINE(THREAD_ACC, offsetof(struct thread_struct, acc)); + DEFINE(THREAD_SPEFSCR, offsetof(struct thread_struct, spefscr)); + DEFINE(THREAD_USED_SPE, offsetof(struct thread_struct, used_spe)); +#endif /* CONFIG_SPE */ +#endif /* CONFIG_PPC64 */ +#ifdef CONFIG_KVM_BOOK3S_32_HANDLER + DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu)); +#endif + + DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); + DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); + DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); + DEFINE(TI_TASK, offsetof(struct thread_info, task)); + DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); + +#ifdef CONFIG_PPC64 + DEFINE(DCACHEL1LINESIZE, offsetof(struct ppc64_caches, dline_size)); + DEFINE(DCACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_dline_size)); + DEFINE(DCACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, dlines_per_page)); + DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size)); + DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size)); + DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page)); + /* paca */ + DEFINE(PACA_SIZE, sizeof(struct paca_struct)); + DEFINE(PACA_LOCK_TOKEN, offsetof(struct paca_struct, lock_token)); + DEFINE(PACAPACAINDEX, offsetof(struct paca_struct, paca_index)); + DEFINE(PACAPROCSTART, offsetof(struct paca_struct, cpu_start)); + DEFINE(PACAKSAVE, offsetof(struct paca_struct, kstack)); + DEFINE(PACACURRENT, offsetof(struct paca_struct, __current)); + DEFINE(PACASAVEDMSR, offsetof(struct paca_struct, saved_msr)); + DEFINE(PACASTABRR, offsetof(struct paca_struct, stab_rr)); + DEFINE(PACAR1, offsetof(struct paca_struct, saved_r1)); + DEFINE(PACATOC, offsetof(struct paca_struct, kernel_toc)); + DEFINE(PACAKBASE, offsetof(struct paca_struct, kernelbase)); + DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); + DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); + DEFINE(PACAIRQHAPPENED, offsetof(struct paca_struct, irq_happened)); + DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); +#ifdef CONFIG_PPC_MM_SLICES + DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct, + context.low_slices_psize)); + DEFINE(PACAHIGHSLICEPSIZE, offsetof(struct paca_struct, + context.high_slices_psize)); + DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def)); +#endif /* CONFIG_PPC_MM_SLICES */ + +#ifdef CONFIG_PPC_BOOK3E + DEFINE(PACAPGD, offsetof(struct paca_struct, pgd)); + DEFINE(PACA_KERNELPGD, offsetof(struct paca_struct, kernel_pgd)); + DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen)); + DEFINE(PACA_EXTLB, offsetof(struct paca_struct, extlb)); + DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc)); + DEFINE(PACA_EXCRIT, offsetof(struct paca_struct, excrit)); + DEFINE(PACA_EXDBG, offsetof(struct paca_struct, exdbg)); + DEFINE(PACA_MC_STACK, offsetof(struct paca_struct, mc_kstack)); + DEFINE(PACA_CRIT_STACK, offsetof(struct paca_struct, crit_kstack)); + DEFINE(PACA_DBG_STACK, offsetof(struct paca_struct, dbg_kstack)); +#endif /* CONFIG_PPC_BOOK3E */ + +#ifdef CONFIG_PPC_STD_MMU_64 + DEFINE(PACASTABREAL, offsetof(struct paca_struct, stab_real)); + DEFINE(PACASTABVIRT, offsetof(struct paca_struct, stab_addr)); + DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); + DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); + DEFINE(PACAVMALLOCSLLP, offsetof(struct paca_struct, vmalloc_sllp)); +#ifdef CONFIG_PPC_MM_SLICES + DEFINE(MMUPSIZESLLP, offsetof(struct mmu_psize_def, sllp)); +#else + DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, context.sllp)); +#endif /* CONFIG_PPC_MM_SLICES */ + DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen)); + DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc)); + DEFINE(PACA_EXSLB, offsetof(struct paca_struct, exslb)); + DEFINE(PACALPPACAPTR, offsetof(struct paca_struct, lppaca_ptr)); + DEFINE(PACA_SLBSHADOWPTR, offsetof(struct paca_struct, slb_shadow_ptr)); + DEFINE(SLBSHADOW_STACKVSID, + offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid)); + DEFINE(SLBSHADOW_STACKESID, + offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid)); + DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area)); + DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0)); + DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1)); + DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int)); + DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int)); + DEFINE(LPPACA_PMCINUSE, offsetof(struct lppaca, pmcregs_in_use)); + DEFINE(LPPACA_DTLIDX, offsetof(struct lppaca, dtl_idx)); + DEFINE(LPPACA_YIELDCOUNT, offsetof(struct lppaca, yield_count)); + DEFINE(PACA_DTL_RIDX, offsetof(struct paca_struct, dtl_ridx)); +#endif /* CONFIG_PPC_STD_MMU_64 */ + DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp)); + DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); + DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state)); + DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime)); + DEFINE(PACA_STARTTIME_USER, offsetof(struct paca_struct, starttime_user)); + DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time)); + DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time)); + DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); + DEFINE(PACA_NAPSTATELOST, offsetof(struct paca_struct, nap_state_lost)); +#endif /* CONFIG_PPC64 */ + + /* RTAS */ + DEFINE(RTASBASE, offsetof(struct rtas_t, base)); + DEFINE(RTASENTRY, offsetof(struct rtas_t, entry)); + + /* Interrupt register frame */ + DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE); + DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)); +#ifdef CONFIG_PPC64 + /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */ + DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); + DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); + + /* hcall statistics */ + DEFINE(HCALL_STAT_SIZE, sizeof(struct hcall_stats)); + DEFINE(HCALL_STAT_CALLS, offsetof(struct hcall_stats, num_calls)); + DEFINE(HCALL_STAT_TB, offsetof(struct hcall_stats, tb_total)); + DEFINE(HCALL_STAT_PURR, offsetof(struct hcall_stats, purr_total)); +#endif /* CONFIG_PPC64 */ + DEFINE(GPR0, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[0])); + DEFINE(GPR1, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[1])); + DEFINE(GPR2, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[2])); + DEFINE(GPR3, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[3])); + DEFINE(GPR4, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[4])); + DEFINE(GPR5, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[5])); + DEFINE(GPR6, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[6])); + DEFINE(GPR7, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[7])); + DEFINE(GPR8, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[8])); + DEFINE(GPR9, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[9])); + DEFINE(GPR10, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[10])); + DEFINE(GPR11, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[11])); + DEFINE(GPR12, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[12])); + DEFINE(GPR13, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[13])); +#ifndef CONFIG_PPC64 + DEFINE(GPR14, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[14])); + DEFINE(GPR15, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[15])); + DEFINE(GPR16, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[16])); + DEFINE(GPR17, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[17])); + DEFINE(GPR18, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[18])); + DEFINE(GPR19, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[19])); + DEFINE(GPR20, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[20])); + DEFINE(GPR21, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[21])); + DEFINE(GPR22, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[22])); + DEFINE(GPR23, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[23])); + DEFINE(GPR24, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[24])); + DEFINE(GPR25, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[25])); + DEFINE(GPR26, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[26])); + DEFINE(GPR27, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[27])); + DEFINE(GPR28, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[28])); + DEFINE(GPR29, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[29])); + DEFINE(GPR30, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[30])); + DEFINE(GPR31, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[31])); +#endif /* CONFIG_PPC64 */ + /* + * Note: these symbols include _ because they overlap with special + * register names + */ + DEFINE(_NIP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, nip)); + DEFINE(_MSR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, msr)); + DEFINE(_CTR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ctr)); + DEFINE(_LINK, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, link)); + DEFINE(_CCR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ccr)); + DEFINE(_XER, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, xer)); + DEFINE(_DAR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dar)); + DEFINE(_DSISR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dsisr)); + DEFINE(ORIG_GPR3, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, orig_gpr3)); + DEFINE(RESULT, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, result)); + DEFINE(_TRAP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, trap)); +#ifndef CONFIG_PPC64 + DEFINE(_MQ, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, mq)); + /* + * The PowerPC 400-class & Book-E processors have neither the DAR + * nor the DSISR SPRs. Hence, we overload them to hold the similar + * DEAR and ESR SPRs for such processors. For critical interrupts + * we use them to hold SRR0 and SRR1. + */ + DEFINE(_DEAR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dar)); + DEFINE(_ESR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dsisr)); +#else /* CONFIG_PPC64 */ + DEFINE(SOFTE, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, softe)); + + /* These _only_ to be used with {PROM,RTAS}_FRAME_SIZE!!! */ + DEFINE(_SRR0, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)); + DEFINE(_SRR1, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)+8); +#endif /* CONFIG_PPC64 */ + +#if defined(CONFIG_PPC32) +#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) + DEFINE(EXC_LVL_SIZE, STACK_EXC_LVL_FRAME_SIZE); + DEFINE(MAS0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0)); + /* we overload MMUCR for 44x on MAS0 since they are mutually exclusive */ + DEFINE(MMUCR, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0)); + DEFINE(MAS1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas1)); + DEFINE(MAS2, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas2)); + DEFINE(MAS3, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas3)); + DEFINE(MAS6, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas6)); + DEFINE(MAS7, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas7)); + DEFINE(_SRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, srr0)); + DEFINE(_SRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, srr1)); + DEFINE(_CSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr0)); + DEFINE(_CSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr1)); + DEFINE(_DSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr0)); + DEFINE(_DSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr1)); + DEFINE(SAVED_KSP_LIMIT, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, saved_ksp_limit)); +#endif +#endif + DEFINE(CLONE_VM, CLONE_VM); + DEFINE(CLONE_UNTRACED, CLONE_UNTRACED); + +#ifndef CONFIG_PPC64 + DEFINE(MM_PGD, offsetof(struct mm_struct, pgd)); +#endif /* ! CONFIG_PPC64 */ + + /* About the CPU features table */ + DEFINE(CPU_SPEC_FEATURES, offsetof(struct cpu_spec, cpu_features)); + DEFINE(CPU_SPEC_SETUP, offsetof(struct cpu_spec, cpu_setup)); + DEFINE(CPU_SPEC_RESTORE, offsetof(struct cpu_spec, cpu_restore)); + + DEFINE(pbe_address, offsetof(struct pbe, address)); + DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address)); + DEFINE(pbe_next, offsetof(struct pbe, next)); + +#ifndef CONFIG_PPC64 + DEFINE(TASK_SIZE, TASK_SIZE); + DEFINE(NUM_USER_SEGMENTS, TASK_SIZE>>28); +#endif /* ! CONFIG_PPC64 */ + + /* datapage offsets for use by vdso */ + DEFINE(CFG_TB_ORIG_STAMP, offsetof(struct vdso_data, tb_orig_stamp)); + DEFINE(CFG_TB_TICKS_PER_SEC, offsetof(struct vdso_data, tb_ticks_per_sec)); + DEFINE(CFG_TB_TO_XS, offsetof(struct vdso_data, tb_to_xs)); + DEFINE(CFG_STAMP_XSEC, offsetof(struct vdso_data, stamp_xsec)); + DEFINE(CFG_TB_UPDATE_COUNT, offsetof(struct vdso_data, tb_update_count)); + DEFINE(CFG_TZ_MINUTEWEST, offsetof(struct vdso_data, tz_minuteswest)); + DEFINE(CFG_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime)); + DEFINE(CFG_SYSCALL_MAP32, offsetof(struct vdso_data, syscall_map_32)); + DEFINE(WTOM_CLOCK_SEC, offsetof(struct vdso_data, wtom_clock_sec)); + DEFINE(WTOM_CLOCK_NSEC, offsetof(struct vdso_data, wtom_clock_nsec)); + DEFINE(STAMP_XTIME, offsetof(struct vdso_data, stamp_xtime)); + DEFINE(STAMP_SEC_FRAC, offsetof(struct vdso_data, stamp_sec_fraction)); + DEFINE(CFG_ICACHE_BLOCKSZ, offsetof(struct vdso_data, icache_block_size)); + DEFINE(CFG_DCACHE_BLOCKSZ, offsetof(struct vdso_data, dcache_block_size)); + DEFINE(CFG_ICACHE_LOGBLOCKSZ, offsetof(struct vdso_data, icache_log_block_size)); + DEFINE(CFG_DCACHE_LOGBLOCKSZ, offsetof(struct vdso_data, dcache_log_block_size)); +#ifdef CONFIG_PPC64 + DEFINE(CFG_SYSCALL_MAP64, offsetof(struct vdso_data, syscall_map_64)); + DEFINE(TVAL64_TV_SEC, offsetof(struct timeval, tv_sec)); + DEFINE(TVAL64_TV_USEC, offsetof(struct timeval, tv_usec)); + DEFINE(TVAL32_TV_SEC, offsetof(struct compat_timeval, tv_sec)); + DEFINE(TVAL32_TV_USEC, offsetof(struct compat_timeval, tv_usec)); + DEFINE(TSPC64_TV_SEC, offsetof(struct timespec, tv_sec)); + DEFINE(TSPC64_TV_NSEC, offsetof(struct timespec, tv_nsec)); + DEFINE(TSPC32_TV_SEC, offsetof(struct compat_timespec, tv_sec)); + DEFINE(TSPC32_TV_NSEC, offsetof(struct compat_timespec, tv_nsec)); +#else + DEFINE(TVAL32_TV_SEC, offsetof(struct timeval, tv_sec)); + DEFINE(TVAL32_TV_USEC, offsetof(struct timeval, tv_usec)); + DEFINE(TSPC32_TV_SEC, offsetof(struct timespec, tv_sec)); + DEFINE(TSPC32_TV_NSEC, offsetof(struct timespec, tv_nsec)); +#endif + /* timeval/timezone offsets for use by vdso */ + DEFINE(TZONE_TZ_MINWEST, offsetof(struct timezone, tz_minuteswest)); + DEFINE(TZONE_TZ_DSTTIME, offsetof(struct timezone, tz_dsttime)); + + /* Other bits used by the vdso */ + DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); + DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); + DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); + DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); + +#ifdef CONFIG_BUG + DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry)); +#endif + + DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE); + DEFINE(PTE_SIZE, sizeof(pte_t)); + +#ifdef CONFIG_KVM + DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); + DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); + DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); + DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave)); + DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fpr)); + DEFINE(VCPU_FPSCR, offsetof(struct kvm_vcpu, arch.fpscr)); +#ifdef CONFIG_ALTIVEC + DEFINE(VCPU_VRS, offsetof(struct kvm_vcpu, arch.vr)); + DEFINE(VCPU_VSCR, offsetof(struct kvm_vcpu, arch.vscr)); +#endif +#ifdef CONFIG_VSX + DEFINE(VCPU_VSRS, offsetof(struct kvm_vcpu, arch.vsr)); +#endif + DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); + DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); + DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); + DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); + DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); +#ifdef CONFIG_KVM_BOOK3S_64_HV + DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.shregs.msr)); + DEFINE(VCPU_SRR0, offsetof(struct kvm_vcpu, arch.shregs.srr0)); + DEFINE(VCPU_SRR1, offsetof(struct kvm_vcpu, arch.shregs.srr1)); + DEFINE(VCPU_SPRG0, offsetof(struct kvm_vcpu, arch.shregs.sprg0)); + DEFINE(VCPU_SPRG1, offsetof(struct kvm_vcpu, arch.shregs.sprg1)); + DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2)); + DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3)); +#endif + DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4)); + DEFINE(VCPU_SHARED_SPRG5, offsetof(struct kvm_vcpu_arch_shared, sprg5)); + DEFINE(VCPU_SHARED_SPRG6, offsetof(struct kvm_vcpu_arch_shared, sprg6)); + DEFINE(VCPU_SHARED_SPRG7, offsetof(struct kvm_vcpu_arch_shared, sprg7)); + DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid)); + DEFINE(VCPU_SHADOW_PID1, offsetof(struct kvm_vcpu, arch.shadow_pid1)); + DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared)); + DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr)); + DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); + + DEFINE(VCPU_SHARED_MAS0, offsetof(struct kvm_vcpu_arch_shared, mas0)); + DEFINE(VCPU_SHARED_MAS1, offsetof(struct kvm_vcpu_arch_shared, mas1)); + DEFINE(VCPU_SHARED_MAS2, offsetof(struct kvm_vcpu_arch_shared, mas2)); + DEFINE(VCPU_SHARED_MAS7_3, offsetof(struct kvm_vcpu_arch_shared, mas7_3)); + DEFINE(VCPU_SHARED_MAS4, offsetof(struct kvm_vcpu_arch_shared, mas4)); + DEFINE(VCPU_SHARED_MAS6, offsetof(struct kvm_vcpu_arch_shared, mas6)); + + /* book3s */ +#ifdef CONFIG_KVM_BOOK3S_64_HV + DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid)); + DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1)); + DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid)); + DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr)); + DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1)); + DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock)); + DEFINE(KVM_ONLINE_CPUS, offsetof(struct kvm, online_vcpus.counter)); + DEFINE(KVM_LAST_VCPU, offsetof(struct kvm, arch.last_vcpu)); + DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr)); + DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor)); + DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v)); + DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); + DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); +#endif +#ifdef CONFIG_PPC_BOOK3S + DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); + DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); + DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr)); + DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr)); + DEFINE(VCPU_DSCR, offsetof(struct kvm_vcpu, arch.dscr)); + DEFINE(VCPU_AMR, offsetof(struct kvm_vcpu, arch.amr)); + DEFINE(VCPU_UAMOR, offsetof(struct kvm_vcpu, arch.uamor)); + DEFINE(VCPU_CTRL, offsetof(struct kvm_vcpu, arch.ctrl)); + DEFINE(VCPU_DABR, offsetof(struct kvm_vcpu, arch.dabr)); + DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); + DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec)); + DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires)); + DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions)); + DEFINE(VCPU_CEDED, offsetof(struct kvm_vcpu, arch.ceded)); + DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded)); + DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa)); + DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr)); + DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc)); + DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb)); + DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max)); + DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr)); + DEFINE(VCPU_LAST_CPU, offsetof(struct kvm_vcpu, arch.last_cpu)); + DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr)); + DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar)); + DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); + DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap)); + DEFINE(VCPU_PTID, offsetof(struct kvm_vcpu, arch.ptid)); + DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count)); + DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count)); + DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest)); + DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads)); + DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) - + offsetof(struct kvmppc_vcpu_book3s, vcpu)); + DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige)); + DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv)); + DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb)); + +#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_KVM_BOOK3S_PR +# define SVCPU_FIELD(x, f) DEFINE(x, offsetof(struct paca_struct, shadow_vcpu.f)) +#else +# define SVCPU_FIELD(x, f) +#endif +# define HSTATE_FIELD(x, f) DEFINE(x, offsetof(struct paca_struct, kvm_hstate.f)) +#else /* 32-bit */ +# define SVCPU_FIELD(x, f) DEFINE(x, offsetof(struct kvmppc_book3s_shadow_vcpu, f)) +# define HSTATE_FIELD(x, f) DEFINE(x, offsetof(struct kvmppc_book3s_shadow_vcpu, hstate.f)) +#endif + + SVCPU_FIELD(SVCPU_CR, cr); + SVCPU_FIELD(SVCPU_XER, xer); + SVCPU_FIELD(SVCPU_CTR, ctr); + SVCPU_FIELD(SVCPU_LR, lr); + SVCPU_FIELD(SVCPU_PC, pc); + SVCPU_FIELD(SVCPU_R0, gpr[0]); + SVCPU_FIELD(SVCPU_R1, gpr[1]); + SVCPU_FIELD(SVCPU_R2, gpr[2]); + SVCPU_FIELD(SVCPU_R3, gpr[3]); + SVCPU_FIELD(SVCPU_R4, gpr[4]); + SVCPU_FIELD(SVCPU_R5, gpr[5]); + SVCPU_FIELD(SVCPU_R6, gpr[6]); + SVCPU_FIELD(SVCPU_R7, gpr[7]); + SVCPU_FIELD(SVCPU_R8, gpr[8]); + SVCPU_FIELD(SVCPU_R9, gpr[9]); + SVCPU_FIELD(SVCPU_R10, gpr[10]); + SVCPU_FIELD(SVCPU_R11, gpr[11]); + SVCPU_FIELD(SVCPU_R12, gpr[12]); + SVCPU_FIELD(SVCPU_R13, gpr[13]); + SVCPU_FIELD(SVCPU_FAULT_DSISR, fault_dsisr); + SVCPU_FIELD(SVCPU_FAULT_DAR, fault_dar); + SVCPU_FIELD(SVCPU_LAST_INST, last_inst); + SVCPU_FIELD(SVCPU_SHADOW_SRR1, shadow_srr1); +#ifdef CONFIG_PPC_BOOK3S_32 + SVCPU_FIELD(SVCPU_SR, sr); +#endif +#ifdef CONFIG_PPC64 + SVCPU_FIELD(SVCPU_SLB, slb); + SVCPU_FIELD(SVCPU_SLB_MAX, slb_max); +#endif + + HSTATE_FIELD(HSTATE_HOST_R1, host_r1); + HSTATE_FIELD(HSTATE_HOST_R2, host_r2); + HSTATE_FIELD(HSTATE_HOST_MSR, host_msr); + HSTATE_FIELD(HSTATE_VMHANDLER, vmhandler); + HSTATE_FIELD(HSTATE_SCRATCH0, scratch0); + HSTATE_FIELD(HSTATE_SCRATCH1, scratch1); + HSTATE_FIELD(HSTATE_IN_GUEST, in_guest); + HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5); + HSTATE_FIELD(HSTATE_NAPPING, napping); + +#ifdef CONFIG_KVM_BOOK3S_64_HV + HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); + HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore); + HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys); + HSTATE_FIELD(HSTATE_MMCR, host_mmcr); + HSTATE_FIELD(HSTATE_PMC, host_pmc); + HSTATE_FIELD(HSTATE_PURR, host_purr); + HSTATE_FIELD(HSTATE_SPURR, host_spurr); + HSTATE_FIELD(HSTATE_DSCR, host_dscr); + HSTATE_FIELD(HSTATE_DABR, dabr); + HSTATE_FIELD(HSTATE_DECEXP, dec_expires); + DEFINE(IPI_PRIORITY, IPI_PRIORITY); +#endif /* CONFIG_KVM_BOOK3S_64_HV */ + +#else /* CONFIG_PPC_BOOK3S */ + DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); + DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); + DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); + DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); + DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); + DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); + DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); + DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); +#endif /* CONFIG_PPC_BOOK3S */ +#endif /* CONFIG_KVM */ + +#ifdef CONFIG_KVM_GUEST + DEFINE(KVM_MAGIC_SCRATCH1, offsetof(struct kvm_vcpu_arch_shared, + scratch1)); + DEFINE(KVM_MAGIC_SCRATCH2, offsetof(struct kvm_vcpu_arch_shared, + scratch2)); + DEFINE(KVM_MAGIC_SCRATCH3, offsetof(struct kvm_vcpu_arch_shared, + scratch3)); + DEFINE(KVM_MAGIC_INT, offsetof(struct kvm_vcpu_arch_shared, + int_pending)); + DEFINE(KVM_MAGIC_MSR, offsetof(struct kvm_vcpu_arch_shared, msr)); + DEFINE(KVM_MAGIC_CRITICAL, offsetof(struct kvm_vcpu_arch_shared, + critical)); + DEFINE(KVM_MAGIC_SR, offsetof(struct kvm_vcpu_arch_shared, sr)); +#endif + +#ifdef CONFIG_44x + DEFINE(PGD_T_LOG2, PGD_T_LOG2); + DEFINE(PTE_T_LOG2, PTE_T_LOG2); +#endif +#ifdef CONFIG_PPC_FSL_BOOK3E + DEFINE(TLBCAM_SIZE, sizeof(struct tlbcam)); + DEFINE(TLBCAM_MAS0, offsetof(struct tlbcam, MAS0)); + DEFINE(TLBCAM_MAS1, offsetof(struct tlbcam, MAS1)); + DEFINE(TLBCAM_MAS2, offsetof(struct tlbcam, MAS2)); + DEFINE(TLBCAM_MAS3, offsetof(struct tlbcam, MAS3)); + DEFINE(TLBCAM_MAS7, offsetof(struct tlbcam, MAS7)); +#endif + +#if defined(CONFIG_KVM) && defined(CONFIG_SPE) + DEFINE(VCPU_EVR, offsetof(struct kvm_vcpu, arch.evr[0])); + DEFINE(VCPU_ACC, offsetof(struct kvm_vcpu, arch.acc)); + DEFINE(VCPU_SPEFSCR, offsetof(struct kvm_vcpu, arch.spefscr)); + DEFINE(VCPU_HOST_SPEFSCR, offsetof(struct kvm_vcpu, arch.host_spefscr)); +#endif + +#ifdef CONFIG_KVM_EXIT_TIMING + DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu, + arch.timing_exit.tv32.tbu)); + DEFINE(VCPU_TIMING_EXIT_TBL, offsetof(struct kvm_vcpu, + arch.timing_exit.tv32.tbl)); + DEFINE(VCPU_TIMING_LAST_ENTER_TBU, offsetof(struct kvm_vcpu, + arch.timing_last_enter.tv32.tbu)); + DEFINE(VCPU_TIMING_LAST_ENTER_TBL, offsetof(struct kvm_vcpu, + arch.timing_last_enter.tv32.tbl)); +#endif + +#ifdef CONFIG_PPC_POWERNV + DEFINE(OPAL_MC_GPR3, offsetof(struct opal_machine_check_event, gpr3)); + DEFINE(OPAL_MC_SRR0, offsetof(struct opal_machine_check_event, srr0)); + DEFINE(OPAL_MC_SRR1, offsetof(struct opal_machine_check_event, srr1)); + DEFINE(PACA_OPAL_MC_EVT, offsetof(struct paca_struct, opal_mc_evt)); +#endif + + return 0; +} diff --git a/arch/powerpc/kernel/audit.c b/arch/powerpc/kernel/audit.c new file mode 100644 index 00000000..a4dab7ca --- /dev/null +++ b/arch/powerpc/kernel/audit.c @@ -0,0 +1,83 @@ +#include <linux/init.h> +#include <linux/types.h> +#include <linux/audit.h> +#include <asm/unistd.h> + +static unsigned dir_class[] = { +#include <asm-generic/audit_dir_write.h> +~0U +}; + +static unsigned read_class[] = { +#include <asm-generic/audit_read.h> +~0U +}; + +static unsigned write_class[] = { +#include <asm-generic/audit_write.h> +~0U +}; + +static unsigned chattr_class[] = { +#include <asm-generic/audit_change_attr.h> +~0U +}; + +static unsigned signal_class[] = { +#include <asm-generic/audit_signal.h> +~0U +}; + +int audit_classify_arch(int arch) +{ +#ifdef CONFIG_PPC64 + if (arch == AUDIT_ARCH_PPC) + return 1; +#endif + return 0; +} + +int audit_classify_syscall(int abi, unsigned syscall) +{ +#ifdef CONFIG_PPC64 + extern int ppc32_classify_syscall(unsigned); + if (abi == AUDIT_ARCH_PPC) + return ppc32_classify_syscall(syscall); +#endif + switch(syscall) { + case __NR_open: + return 2; + case __NR_openat: + return 3; + case __NR_socketcall: + return 4; + case __NR_execve: + return 5; + default: + return 0; + } +} + +static int __init audit_classes_init(void) +{ +#ifdef CONFIG_PPC64 + extern __u32 ppc32_dir_class[]; + extern __u32 ppc32_write_class[]; + extern __u32 ppc32_read_class[]; + extern __u32 ppc32_chattr_class[]; + extern __u32 ppc32_signal_class[]; + audit_register_class(AUDIT_CLASS_WRITE_32, ppc32_write_class); + audit_register_class(AUDIT_CLASS_READ_32, ppc32_read_class); + audit_register_class(AUDIT_CLASS_DIR_WRITE_32, ppc32_dir_class); + audit_register_class(AUDIT_CLASS_CHATTR_32, ppc32_chattr_class); + audit_register_class(AUDIT_CLASS_SIGNAL_32, ppc32_signal_class); +#endif + audit_register_class(AUDIT_CLASS_WRITE, write_class); + audit_register_class(AUDIT_CLASS_READ, read_class); + audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class); + audit_register_class(AUDIT_CLASS_CHATTR, chattr_class); + audit_register_class(AUDIT_CLASS_SIGNAL, signal_class); + return 0; +} + +__initcall(audit_classes_init); diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c new file mode 100644 index 00000000..ac8f5273 --- /dev/null +++ b/arch/powerpc/kernel/btext.c @@ -0,0 +1,922 @@ +/* + * Procedures for drawing on the screen early on in the boot process. + * + * Benjamin Herrenschmidt <benh@kernel.crashing.org> + */ +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/export.h> +#include <linux/memblock.h> + +#include <asm/sections.h> +#include <asm/prom.h> +#include <asm/btext.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/io.h> +#include <asm/processor.h> +#include <asm/udbg.h> + +#define NO_SCROLL + +#ifndef NO_SCROLL +static void scrollscreen(void); +#endif + +static void draw_byte(unsigned char c, long locX, long locY); +static void draw_byte_32(unsigned char *bits, unsigned int *base, int rb); +static void draw_byte_16(unsigned char *bits, unsigned int *base, int rb); +static void draw_byte_8(unsigned char *bits, unsigned int *base, int rb); + +#define __force_data __attribute__((__section__(".data"))) + +static int g_loc_X __force_data; +static int g_loc_Y __force_data; +static int g_max_loc_X __force_data; +static int g_max_loc_Y __force_data; + +static int dispDeviceRowBytes __force_data; +static int dispDeviceDepth __force_data; +static int dispDeviceRect[4] __force_data; +static unsigned char *dispDeviceBase __force_data; +static unsigned char *logicalDisplayBase __force_data; + +unsigned long disp_BAT[2] __initdata = {0, 0}; + +#define cmapsz (16*256) + +static unsigned char vga_font[cmapsz]; + +int boot_text_mapped __force_data = 0; +int force_printk_to_btext = 0; + +#ifdef CONFIG_PPC32 +/* Calc BAT values for mapping the display and store them + * in disp_BAT. Those values are then used from head.S to map + * the display during identify_machine() and MMU_Init() + * + * The display is mapped to virtual address 0xD0000000, rather + * than 1:1, because some some CHRP machines put the frame buffer + * in the region starting at 0xC0000000 (PAGE_OFFSET). + * This mapping is temporary and will disappear as soon as the + * setup done by MMU_Init() is applied. + * + * For now, we align the BAT and then map 8Mb on 601 and 16Mb + * on other PPCs. This may cause trouble if the framebuffer + * is really badly aligned, but I didn't encounter this case + * yet. + */ +void __init btext_prepare_BAT(void) +{ + unsigned long vaddr = PAGE_OFFSET + 0x10000000; + unsigned long addr; + unsigned long lowbits; + + addr = (unsigned long)dispDeviceBase; + if (!addr) { + boot_text_mapped = 0; + return; + } + if (PVR_VER(mfspr(SPRN_PVR)) != 1) { + /* 603, 604, G3, G4, ... */ + lowbits = addr & ~0xFF000000UL; + addr &= 0xFF000000UL; + disp_BAT[0] = vaddr | (BL_16M<<2) | 2; + disp_BAT[1] = addr | (_PAGE_NO_CACHE | _PAGE_GUARDED | BPP_RW); + } else { + /* 601 */ + lowbits = addr & ~0xFF800000UL; + addr &= 0xFF800000UL; + disp_BAT[0] = vaddr | (_PAGE_NO_CACHE | PP_RWXX) | 4; + disp_BAT[1] = addr | BL_8M | 0x40; + } + logicalDisplayBase = (void *) (vaddr + lowbits); +} +#endif + + +/* This function can be used to enable the early boot text when doing + * OF booting or within bootx init. It must be followed by a btext_unmap() + * call before the logical address becomes unusable + */ +void __init btext_setup_display(int width, int height, int depth, int pitch, + unsigned long address) +{ + g_loc_X = 0; + g_loc_Y = 0; + g_max_loc_X = width / 8; + g_max_loc_Y = height / 16; + logicalDisplayBase = (unsigned char *)address; + dispDeviceBase = (unsigned char *)address; + dispDeviceRowBytes = pitch; + dispDeviceDepth = depth == 15 ? 16 : depth; + dispDeviceRect[0] = dispDeviceRect[1] = 0; + dispDeviceRect[2] = width; + dispDeviceRect[3] = height; + boot_text_mapped = 1; +} + +void __init btext_unmap(void) +{ + boot_text_mapped = 0; +} + +/* Here's a small text engine to use during early boot + * or for debugging purposes + * + * todo: + * + * - build some kind of vgacon with it to enable early printk + * - move to a separate file + * - add a few video driver hooks to keep in sync with display + * changes. + */ + +static void map_boot_text(void) +{ + unsigned long base, offset, size; + unsigned char *vbase; + + /* By default, we are no longer mapped */ + boot_text_mapped = 0; + if (dispDeviceBase == 0) + return; + base = ((unsigned long) dispDeviceBase) & 0xFFFFF000UL; + offset = ((unsigned long) dispDeviceBase) - base; + size = dispDeviceRowBytes * dispDeviceRect[3] + offset + + dispDeviceRect[0]; + vbase = __ioremap(base, size, _PAGE_NO_CACHE); + if (vbase == 0) + return; + logicalDisplayBase = vbase + offset; + boot_text_mapped = 1; +} + +int btext_initialize(struct device_node *np) +{ + unsigned int width, height, depth, pitch; + unsigned long address = 0; + const u32 *prop; + + prop = of_get_property(np, "linux,bootx-width", NULL); + if (prop == NULL) + prop = of_get_property(np, "width", NULL); + if (prop == NULL) + return -EINVAL; + width = *prop; + prop = of_get_property(np, "linux,bootx-height", NULL); + if (prop == NULL) + prop = of_get_property(np, "height", NULL); + if (prop == NULL) + return -EINVAL; + height = *prop; + prop = of_get_property(np, "linux,bootx-depth", NULL); + if (prop == NULL) + prop = of_get_property(np, "depth", NULL); + if (prop == NULL) + return -EINVAL; + depth = *prop; + pitch = width * ((depth + 7) / 8); + prop = of_get_property(np, "linux,bootx-linebytes", NULL); + if (prop == NULL) + prop = of_get_property(np, "linebytes", NULL); + if (prop && *prop != 0xffffffffu) + pitch = *prop; + if (pitch == 1) + pitch = 0x1000; + prop = of_get_property(np, "linux,bootx-addr", NULL); + if (prop == NULL) + prop = of_get_property(np, "address", NULL); + if (prop) + address = *prop; + + /* FIXME: Add support for PCI reg properties. Right now, only + * reliable on macs + */ + if (address == 0) + return -EINVAL; + + g_loc_X = 0; + g_loc_Y = 0; + g_max_loc_X = width / 8; + g_max_loc_Y = height / 16; + dispDeviceBase = (unsigned char *)address; + dispDeviceRowBytes = pitch; + dispDeviceDepth = depth == 15 ? 16 : depth; + dispDeviceRect[0] = dispDeviceRect[1] = 0; + dispDeviceRect[2] = width; + dispDeviceRect[3] = height; + + map_boot_text(); + + return 0; +} + +int __init btext_find_display(int allow_nonstdout) +{ + const char *name; + struct device_node *np = NULL; + int rc = -ENODEV; + + name = of_get_property(of_chosen, "linux,stdout-path", NULL); + if (name != NULL) { + np = of_find_node_by_path(name); + if (np != NULL) { + if (strcmp(np->type, "display") != 0) { + printk("boot stdout isn't a display !\n"); + of_node_put(np); + np = NULL; + } + } + } + if (np) + rc = btext_initialize(np); + if (rc == 0 || !allow_nonstdout) + return rc; + + for_each_node_by_type(np, "display") { + if (of_get_property(np, "linux,opened", NULL)) { + printk("trying %s ...\n", np->full_name); + rc = btext_initialize(np); + printk("result: %d\n", rc); + } + if (rc == 0) + break; + } + return rc; +} + +/* Calc the base address of a given point (x,y) */ +static unsigned char * calc_base(int x, int y) +{ + unsigned char *base; + + base = logicalDisplayBase; + if (base == 0) + base = dispDeviceBase; + base += (x + dispDeviceRect[0]) * (dispDeviceDepth >> 3); + base += (y + dispDeviceRect[1]) * dispDeviceRowBytes; + return base; +} + +/* Adjust the display to a new resolution */ +void btext_update_display(unsigned long phys, int width, int height, + int depth, int pitch) +{ + if (dispDeviceBase == 0) + return; + + /* check it's the same frame buffer (within 256MB) */ + if ((phys ^ (unsigned long)dispDeviceBase) & 0xf0000000) + return; + + dispDeviceBase = (__u8 *) phys; + dispDeviceRect[0] = 0; + dispDeviceRect[1] = 0; + dispDeviceRect[2] = width; + dispDeviceRect[3] = height; + dispDeviceDepth = depth; + dispDeviceRowBytes = pitch; + if (boot_text_mapped) { + iounmap(logicalDisplayBase); + boot_text_mapped = 0; + } + map_boot_text(); + g_loc_X = 0; + g_loc_Y = 0; + g_max_loc_X = width / 8; + g_max_loc_Y = height / 16; +} +EXPORT_SYMBOL(btext_update_display); + +void btext_clearscreen(void) +{ + unsigned int *base = (unsigned int *)calc_base(0, 0); + unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) * + (dispDeviceDepth >> 3)) >> 2; + int i,j; + + for (i=0; i<(dispDeviceRect[3] - dispDeviceRect[1]); i++) + { + unsigned int *ptr = base; + for(j=width; j; --j) + *(ptr++) = 0; + base += (dispDeviceRowBytes >> 2); + } +} + +void btext_flushscreen(void) +{ + unsigned int *base = (unsigned int *)calc_base(0, 0); + unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) * + (dispDeviceDepth >> 3)) >> 2; + int i,j; + + for (i=0; i < (dispDeviceRect[3] - dispDeviceRect[1]); i++) + { + unsigned int *ptr = base; + for(j = width; j > 0; j -= 8) { + __asm__ __volatile__ ("dcbst 0,%0" :: "r" (ptr)); + ptr += 8; + } + base += (dispDeviceRowBytes >> 2); + } + __asm__ __volatile__ ("sync" ::: "memory"); +} + +void btext_flushline(void) +{ + unsigned int *base = (unsigned int *)calc_base(0, g_loc_Y << 4); + unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) * + (dispDeviceDepth >> 3)) >> 2; + int i,j; + + for (i=0; i < 16; i++) + { + unsigned int *ptr = base; + for(j = width; j > 0; j -= 8) { + __asm__ __volatile__ ("dcbst 0,%0" :: "r" (ptr)); + ptr += 8; + } + base += (dispDeviceRowBytes >> 2); + } + __asm__ __volatile__ ("sync" ::: "memory"); +} + + +#ifndef NO_SCROLL +static void scrollscreen(void) +{ + unsigned int *src = (unsigned int *)calc_base(0,16); + unsigned int *dst = (unsigned int *)calc_base(0,0); + unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) * + (dispDeviceDepth >> 3)) >> 2; + int i,j; + + for (i=0; i<(dispDeviceRect[3] - dispDeviceRect[1] - 16); i++) + { + unsigned int *src_ptr = src; + unsigned int *dst_ptr = dst; + for(j=width; j; --j) + *(dst_ptr++) = *(src_ptr++); + src += (dispDeviceRowBytes >> 2); + dst += (dispDeviceRowBytes >> 2); + } + for (i=0; i<16; i++) + { + unsigned int *dst_ptr = dst; + for(j=width; j; --j) + *(dst_ptr++) = 0; + dst += (dispDeviceRowBytes >> 2); + } +} +#endif /* ndef NO_SCROLL */ + +void btext_drawchar(char c) +{ + int cline = 0; +#ifdef NO_SCROLL + int x; +#endif + if (!boot_text_mapped) + return; + + switch (c) { + case '\b': + if (g_loc_X > 0) + --g_loc_X; + break; + case '\t': + g_loc_X = (g_loc_X & -8) + 8; + break; + case '\r': + g_loc_X = 0; + break; + case '\n': + g_loc_X = 0; + g_loc_Y++; + cline = 1; + break; + default: + draw_byte(c, g_loc_X++, g_loc_Y); + } + if (g_loc_X >= g_max_loc_X) { + g_loc_X = 0; + g_loc_Y++; + cline = 1; + } +#ifndef NO_SCROLL + while (g_loc_Y >= g_max_loc_Y) { + scrollscreen(); + g_loc_Y--; + } +#else + /* wrap around from bottom to top of screen so we don't + waste time scrolling each line. -- paulus. */ + if (g_loc_Y >= g_max_loc_Y) + g_loc_Y = 0; + if (cline) { + for (x = 0; x < g_max_loc_X; ++x) + draw_byte(' ', x, g_loc_Y); + } +#endif +} + +void btext_drawstring(const char *c) +{ + if (!boot_text_mapped) + return; + while (*c) + btext_drawchar(*c++); +} + +void btext_drawtext(const char *c, unsigned int len) +{ + if (!boot_text_mapped) + return; + while (len--) + btext_drawchar(*c++); +} + +void btext_drawhex(unsigned long v) +{ + if (!boot_text_mapped) + return; +#ifdef CONFIG_PPC64 + btext_drawchar(hex_asc_hi(v >> 56)); + btext_drawchar(hex_asc_lo(v >> 56)); + btext_drawchar(hex_asc_hi(v >> 48)); + btext_drawchar(hex_asc_lo(v >> 48)); + btext_drawchar(hex_asc_hi(v >> 40)); + btext_drawchar(hex_asc_lo(v >> 40)); + btext_drawchar(hex_asc_hi(v >> 32)); + btext_drawchar(hex_asc_lo(v >> 32)); +#endif + btext_drawchar(hex_asc_hi(v >> 24)); + btext_drawchar(hex_asc_lo(v >> 24)); + btext_drawchar(hex_asc_hi(v >> 16)); + btext_drawchar(hex_asc_lo(v >> 16)); + btext_drawchar(hex_asc_hi(v >> 8)); + btext_drawchar(hex_asc_lo(v >> 8)); + btext_drawchar(hex_asc_hi(v)); + btext_drawchar(hex_asc_lo(v)); + btext_drawchar(' '); +} + +static void draw_byte(unsigned char c, long locX, long locY) +{ + unsigned char *base = calc_base(locX << 3, locY << 4); + unsigned char *font = &vga_font[((unsigned int)c) * 16]; + int rb = dispDeviceRowBytes; + + switch(dispDeviceDepth) { + case 24: + case 32: + draw_byte_32(font, (unsigned int *)base, rb); + break; + case 15: + case 16: + draw_byte_16(font, (unsigned int *)base, rb); + break; + case 8: + draw_byte_8(font, (unsigned int *)base, rb); + break; + } +} + +static unsigned int expand_bits_8[16] = { + 0x00000000, + 0x000000ff, + 0x0000ff00, + 0x0000ffff, + 0x00ff0000, + 0x00ff00ff, + 0x00ffff00, + 0x00ffffff, + 0xff000000, + 0xff0000ff, + 0xff00ff00, + 0xff00ffff, + 0xffff0000, + 0xffff00ff, + 0xffffff00, + 0xffffffff +}; + +static unsigned int expand_bits_16[4] = { + 0x00000000, + 0x0000ffff, + 0xffff0000, + 0xffffffff +}; + + +static void draw_byte_32(unsigned char *font, unsigned int *base, int rb) +{ + int l, bits; + int fg = 0xFFFFFFFFUL; + int bg = 0x00000000UL; + + for (l = 0; l < 16; ++l) + { + bits = *font++; + base[0] = (-(bits >> 7) & fg) ^ bg; + base[1] = (-((bits >> 6) & 1) & fg) ^ bg; + base[2] = (-((bits >> 5) & 1) & fg) ^ bg; + base[3] = (-((bits >> 4) & 1) & fg) ^ bg; + base[4] = (-((bits >> 3) & 1) & fg) ^ bg; + base[5] = (-((bits >> 2) & 1) & fg) ^ bg; + base[6] = (-((bits >> 1) & 1) & fg) ^ bg; + base[7] = (-(bits & 1) & fg) ^ bg; + base = (unsigned int *) ((char *)base + rb); + } +} + +static void draw_byte_16(unsigned char *font, unsigned int *base, int rb) +{ + int l, bits; + int fg = 0xFFFFFFFFUL; + int bg = 0x00000000UL; + unsigned int *eb = (int *)expand_bits_16; + + for (l = 0; l < 16; ++l) + { + bits = *font++; + base[0] = (eb[bits >> 6] & fg) ^ bg; + base[1] = (eb[(bits >> 4) & 3] & fg) ^ bg; + base[2] = (eb[(bits >> 2) & 3] & fg) ^ bg; + base[3] = (eb[bits & 3] & fg) ^ bg; + base = (unsigned int *) ((char *)base + rb); + } +} + +static void draw_byte_8(unsigned char *font, unsigned int *base, int rb) +{ + int l, bits; + int fg = 0x0F0F0F0FUL; + int bg = 0x00000000UL; + unsigned int *eb = (int *)expand_bits_8; + + for (l = 0; l < 16; ++l) + { + bits = *font++; + base[0] = (eb[bits >> 4] & fg) ^ bg; + base[1] = (eb[bits & 0xf] & fg) ^ bg; + base = (unsigned int *) ((char *)base + rb); + } +} + +static unsigned char vga_font[cmapsz] = { +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x81, 0xa5, 0x81, 0x81, 0xbd, +0x99, 0x81, 0x81, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xff, +0xdb, 0xff, 0xff, 0xc3, 0xe7, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x6c, 0xfe, 0xfe, 0xfe, 0xfe, 0x7c, 0x38, 0x10, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x7c, 0xfe, +0x7c, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, +0x3c, 0x3c, 0xe7, 0xe7, 0xe7, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x18, 0x3c, 0x7e, 0xff, 0xff, 0x7e, 0x18, 0x18, 0x3c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, +0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xe7, 0xc3, 0xc3, 0xe7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x42, 0x42, 0x66, 0x3c, 0x00, +0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc3, 0x99, 0xbd, +0xbd, 0x99, 0xc3, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x1e, 0x0e, +0x1a, 0x32, 0x78, 0xcc, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x3c, 0x66, 0x66, 0x66, 0x66, 0x3c, 0x18, 0x7e, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x33, 0x3f, 0x30, 0x30, 0x30, +0x30, 0x70, 0xf0, 0xe0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x63, +0x7f, 0x63, 0x63, 0x63, 0x63, 0x67, 0xe7, 0xe6, 0xc0, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x18, 0x18, 0xdb, 0x3c, 0xe7, 0x3c, 0xdb, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfe, 0xf8, +0xf0, 0xe0, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x06, 0x0e, +0x1e, 0x3e, 0xfe, 0x3e, 0x1e, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, +0x66, 0x00, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0xdb, +0xdb, 0xdb, 0x7b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x00, 0x00, 0x00, 0x00, +0x00, 0x7c, 0xc6, 0x60, 0x38, 0x6c, 0xc6, 0xc6, 0x6c, 0x38, 0x0c, 0xc6, +0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0xfe, 0xfe, 0xfe, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, +0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x7e, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x18, 0x0c, 0xfe, 0x0c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x60, 0xfe, 0x60, 0x30, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xc0, +0xc0, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x24, 0x66, 0xff, 0x66, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x38, 0x7c, 0x7c, 0xfe, 0xfe, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xfe, 0x7c, 0x7c, +0x38, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x24, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6c, +0x6c, 0xfe, 0x6c, 0x6c, 0x6c, 0xfe, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, +0x18, 0x18, 0x7c, 0xc6, 0xc2, 0xc0, 0x7c, 0x06, 0x06, 0x86, 0xc6, 0x7c, +0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, 0xc6, 0x0c, 0x18, +0x30, 0x60, 0xc6, 0x86, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, +0x6c, 0x38, 0x76, 0xdc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, +0x00, 0x30, 0x30, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x30, 0x30, 0x30, +0x30, 0x30, 0x18, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x18, +0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x3c, 0xff, 0x3c, 0x66, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, +0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x02, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xce, 0xde, 0xf6, 0xe6, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x38, 0x78, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, +0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x7c, 0xc6, 0x06, 0x06, 0x3c, 0x06, 0x06, 0x06, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x1c, 0x3c, 0x6c, 0xcc, 0xfe, +0x0c, 0x0c, 0x0c, 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0, +0xc0, 0xc0, 0xfc, 0x06, 0x06, 0x06, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x38, 0x60, 0xc0, 0xc0, 0xfc, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0x06, 0x06, 0x0c, 0x18, +0x30, 0x30, 0x30, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, +0xc6, 0xc6, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x06, 0x06, 0x0c, 0x78, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, +0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x18, 0x18, 0x00, 0x00, 0x00, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x06, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, +0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, +0x30, 0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x7c, 0xc6, 0xc6, 0x0c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xde, 0xde, +0xde, 0xdc, 0xc0, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, +0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x66, 0x66, 0x66, 0x66, 0xfc, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0xc2, 0xc0, 0xc0, 0xc0, +0xc0, 0xc2, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x6c, +0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x6c, 0xf8, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, 0x60, 0x62, 0x66, 0xfe, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, +0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, +0xc2, 0xc0, 0xc0, 0xde, 0xc6, 0xc6, 0x66, 0x3a, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x0c, +0x0c, 0x0c, 0x0c, 0x0c, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xe6, 0x66, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0x66, 0xe6, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x60, 0x60, 0x60, 0x60, 0x60, +0x60, 0x62, 0x66, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xe7, +0xff, 0xff, 0xdb, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, 0xc6, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, +0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, +0x66, 0x66, 0x7c, 0x60, 0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xd6, 0xde, 0x7c, +0x0c, 0x0e, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x6c, +0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, +0xc6, 0x60, 0x38, 0x0c, 0x06, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xff, 0xdb, 0x99, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, +0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, +0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x66, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x18, +0x3c, 0x66, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, +0xc3, 0x66, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xff, 0xc3, 0x86, 0x0c, 0x18, 0x30, 0x60, 0xc1, 0xc3, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x30, 0x30, 0x30, 0x30, 0x30, +0x30, 0x30, 0x30, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, +0xc0, 0xe0, 0x70, 0x38, 0x1c, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x3c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x3c, +0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, +0x30, 0x30, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x0c, 0x7c, +0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x60, +0x60, 0x78, 0x6c, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc0, 0xc0, 0xc0, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x0c, 0x0c, 0x3c, 0x6c, 0xcc, +0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xf0, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xcc, 0xcc, +0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0xcc, 0x78, 0x00, 0x00, 0x00, 0xe0, 0x60, +0x60, 0x6c, 0x76, 0x66, 0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x18, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x06, 0x00, 0x0e, 0x06, 0x06, +0x06, 0x06, 0x06, 0x06, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0xe0, 0x60, +0x60, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, 0xff, 0xdb, +0xdb, 0xdb, 0xdb, 0xdb, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x66, 0x66, +0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x76, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0x0c, 0x1e, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x76, 0x66, 0x60, 0x60, 0x60, 0xf0, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0x60, +0x38, 0x0c, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x30, +0x30, 0xfc, 0x30, 0x30, 0x30, 0x30, 0x36, 0x1c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0xc3, +0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0x3c, 0x66, 0xc3, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6, +0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xfe, 0xcc, 0x18, 0x30, 0x60, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x0e, 0x18, 0x18, 0x18, 0x70, 0x18, 0x18, 0x18, 0x18, 0x0e, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x00, 0x18, +0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x18, +0x18, 0x18, 0x0e, 0x18, 0x18, 0x18, 0x18, 0x70, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, +0xc6, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, +0xc2, 0xc0, 0xc0, 0xc0, 0xc2, 0x66, 0x3c, 0x0c, 0x06, 0x7c, 0x00, 0x00, +0x00, 0x00, 0xcc, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, +0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x00, 0x7c, 0xc6, 0xfe, +0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, +0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xcc, 0x00, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, +0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0x78, 0x0c, 0x7c, +0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38, +0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x60, 0x60, 0x66, 0x3c, 0x0c, 0x06, +0x3c, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xfe, +0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, +0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x60, 0x30, 0x18, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x38, 0x18, 0x18, +0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, 0x66, +0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x60, 0x30, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, +0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0xc6, +0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38, 0x00, +0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, +0x18, 0x30, 0x60, 0x00, 0xfe, 0x66, 0x60, 0x7c, 0x60, 0x60, 0x66, 0xfe, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6e, 0x3b, 0x1b, +0x7e, 0xd8, 0xdc, 0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x6c, +0xcc, 0xcc, 0xfe, 0xcc, 0xcc, 0xcc, 0xcc, 0xce, 0x00, 0x00, 0x00, 0x00, +0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x00, 0x7c, 0xc6, 0xc6, +0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, +0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x30, 0x78, 0xcc, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, +0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0xcc, 0xcc, 0xcc, +0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, +0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0x78, 0x00, +0x00, 0xc6, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, +0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, +0xc3, 0xc0, 0xc0, 0xc0, 0xc3, 0x7e, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, +0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xe6, 0xfc, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0xff, 0x18, +0xff, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66, +0x7c, 0x62, 0x66, 0x6f, 0x66, 0x66, 0x66, 0xf3, 0x00, 0x00, 0x00, 0x00, +0x00, 0x0e, 0x1b, 0x18, 0x18, 0x18, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, +0xd8, 0x70, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0x78, 0x0c, 0x7c, +0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, +0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x18, 0x30, 0x60, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0xcc, 0xcc, 0xcc, +0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, +0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, +0x76, 0xdc, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, +0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x6c, 0x6c, 0x3e, 0x00, 0x7e, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c, +0x38, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x30, 0x30, 0x00, 0x30, 0x30, 0x60, 0xc0, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0, +0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xfe, 0x06, 0x06, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, 0x60, 0xce, 0x9b, 0x06, +0x0c, 0x1f, 0x00, 0x00, 0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, +0x66, 0xce, 0x96, 0x3e, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, +0x00, 0x18, 0x18, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x6c, 0xd8, 0x6c, 0x36, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd8, 0x6c, 0x36, +0x6c, 0xd8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x44, 0x11, 0x44, +0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, +0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, +0x55, 0xaa, 0x55, 0xaa, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, +0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x18, 0xf8, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, +0x36, 0xf6, 0x06, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x06, 0xf6, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x36, 0xf6, 0x06, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xfe, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x37, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x36, 0x37, 0x30, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xf7, 0x00, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xff, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36, +0x36, 0xf7, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xff, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x3f, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, +0x18, 0x1f, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0xff, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0, 0xf0, 0xf0, 0xf0, +0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, +0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, +0x0f, 0x0f, 0x0f, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x76, 0xdc, 0xd8, 0xd8, 0xd8, 0xdc, 0x76, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x78, 0xcc, 0xcc, 0xcc, 0xd8, 0xcc, 0xc6, 0xc6, 0xc6, 0xcc, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0xc6, 0xc0, 0xc0, 0xc0, +0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0xfe, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0xfe, 0xc6, 0x60, 0x30, 0x18, 0x30, 0x60, 0xc6, 0xfe, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xd8, 0xd8, +0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x66, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xc0, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x18, 0x3c, 0x66, 0x66, +0x66, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, +0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0x6c, 0x38, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x38, 0x6c, 0xc6, 0xc6, 0xc6, 0x6c, 0x6c, 0x6c, 0x6c, 0xee, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x30, 0x18, 0x0c, 0x3e, 0x66, +0x66, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x7e, 0xdb, 0xdb, 0xdb, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x03, 0x06, 0x7e, 0xdb, 0xdb, 0xf3, 0x7e, 0x60, 0xc0, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x30, 0x60, 0x60, 0x7c, 0x60, +0x60, 0x60, 0x30, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, +0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, 0x18, +0x18, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, +0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x00, 0x7e, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x1b, 0x1b, 0x1b, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x7e, 0x00, 0x18, 0x18, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x00, +0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c, +0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x0c, 0x0c, +0x0c, 0x0c, 0x0c, 0xec, 0x6c, 0x6c, 0x3c, 0x1c, 0x00, 0x00, 0x00, 0x00, +0x00, 0xd8, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0xd8, 0x30, 0x60, 0xc8, 0xf8, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, +}; + +void __init udbg_init_btext(void) +{ + /* If btext is enabled, we might have a BAT setup for early display, + * thus we do enable some very basic udbg output + */ + udbg_putc = btext_drawchar; +} diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c new file mode 100644 index 00000000..92c6b008 --- /dev/null +++ b/arch/powerpc/kernel/cacheinfo.c @@ -0,0 +1,838 @@ +/* + * Processor cache information made available to userspace via sysfs; + * intended to be compatible with x86 intel_cacheinfo implementation. + * + * Copyright 2008 IBM Corporation + * Author: Nathan Lynch + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + */ + +#include <linux/cpu.h> +#include <linux/cpumask.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/kobject.h> +#include <linux/list.h> +#include <linux/notifier.h> +#include <linux/of.h> +#include <linux/percpu.h> +#include <linux/slab.h> +#include <asm/prom.h> + +#include "cacheinfo.h" + +/* per-cpu object for tracking: + * - a "cache" kobject for the top-level directory + * - a list of "index" objects representing the cpu's local cache hierarchy + */ +struct cache_dir { + struct kobject *kobj; /* bare (not embedded) kobject for cache + * directory */ + struct cache_index_dir *index; /* list of index objects */ +}; + +/* "index" object: each cpu's cache directory has an index + * subdirectory corresponding to a cache object associated with the + * cpu. This object's lifetime is managed via the embedded kobject. + */ +struct cache_index_dir { + struct kobject kobj; + struct cache_index_dir *next; /* next index in parent directory */ + struct cache *cache; +}; + +/* Template for determining which OF properties to query for a given + * cache type */ +struct cache_type_info { + const char *name; + const char *size_prop; + + /* Allow for both [di]-cache-line-size and + * [di]-cache-block-size properties. According to the PowerPC + * Processor binding, -line-size should be provided if it + * differs from the cache block size (that which is operated + * on by cache instructions), so we look for -line-size first. + * See cache_get_line_size(). */ + + const char *line_size_props[2]; + const char *nr_sets_prop; +}; + +/* These are used to index the cache_type_info array. */ +#define CACHE_TYPE_UNIFIED 0 +#define CACHE_TYPE_INSTRUCTION 1 +#define CACHE_TYPE_DATA 2 + +static const struct cache_type_info cache_type_info[] = { + { + /* PowerPC Processor binding says the [di]-cache-* + * must be equal on unified caches, so just use + * d-cache properties. */ + .name = "Unified", + .size_prop = "d-cache-size", + .line_size_props = { "d-cache-line-size", + "d-cache-block-size", }, + .nr_sets_prop = "d-cache-sets", + }, + { + .name = "Instruction", + .size_prop = "i-cache-size", + .line_size_props = { "i-cache-line-size", + "i-cache-block-size", }, + .nr_sets_prop = "i-cache-sets", + }, + { + .name = "Data", + .size_prop = "d-cache-size", + .line_size_props = { "d-cache-line-size", + "d-cache-block-size", }, + .nr_sets_prop = "d-cache-sets", + }, +}; + +/* Cache object: each instance of this corresponds to a distinct cache + * in the system. There are separate objects for Harvard caches: one + * each for instruction and data, and each refers to the same OF node. + * The refcount of the OF node is elevated for the lifetime of the + * cache object. A cache object is released when its shared_cpu_map + * is cleared (see cache_cpu_clear). + * + * A cache object is on two lists: an unsorted global list + * (cache_list) of cache objects; and a singly-linked list + * representing the local cache hierarchy, which is ordered by level + * (e.g. L1d -> L1i -> L2 -> L3). + */ +struct cache { + struct device_node *ofnode; /* OF node for this cache, may be cpu */ + struct cpumask shared_cpu_map; /* online CPUs using this cache */ + int type; /* split cache disambiguation */ + int level; /* level not explicit in device tree */ + struct list_head list; /* global list of cache objects */ + struct cache *next_local; /* next cache of >= level */ +}; + +static DEFINE_PER_CPU(struct cache_dir *, cache_dir_pcpu); + +/* traversal/modification of this list occurs only at cpu hotplug time; + * access is serialized by cpu hotplug locking + */ +static LIST_HEAD(cache_list); + +static struct cache_index_dir *kobj_to_cache_index_dir(struct kobject *k) +{ + return container_of(k, struct cache_index_dir, kobj); +} + +static const char *cache_type_string(const struct cache *cache) +{ + return cache_type_info[cache->type].name; +} + +static void __cpuinit cache_init(struct cache *cache, int type, int level, struct device_node *ofnode) +{ + cache->type = type; + cache->level = level; + cache->ofnode = of_node_get(ofnode); + INIT_LIST_HEAD(&cache->list); + list_add(&cache->list, &cache_list); +} + +static struct cache *__cpuinit new_cache(int type, int level, struct device_node *ofnode) +{ + struct cache *cache; + + cache = kzalloc(sizeof(*cache), GFP_KERNEL); + if (cache) + cache_init(cache, type, level, ofnode); + + return cache; +} + +static void release_cache_debugcheck(struct cache *cache) +{ + struct cache *iter; + + list_for_each_entry(iter, &cache_list, list) + WARN_ONCE(iter->next_local == cache, + "cache for %s(%s) refers to cache for %s(%s)\n", + iter->ofnode->full_name, + cache_type_string(iter), + cache->ofnode->full_name, + cache_type_string(cache)); +} + +static void release_cache(struct cache *cache) +{ + if (!cache) + return; + + pr_debug("freeing L%d %s cache for %s\n", cache->level, + cache_type_string(cache), cache->ofnode->full_name); + + release_cache_debugcheck(cache); + list_del(&cache->list); + of_node_put(cache->ofnode); + kfree(cache); +} + +static void cache_cpu_set(struct cache *cache, int cpu) +{ + struct cache *next = cache; + + while (next) { + WARN_ONCE(cpumask_test_cpu(cpu, &next->shared_cpu_map), + "CPU %i already accounted in %s(%s)\n", + cpu, next->ofnode->full_name, + cache_type_string(next)); + cpumask_set_cpu(cpu, &next->shared_cpu_map); + next = next->next_local; + } +} + +static int cache_size(const struct cache *cache, unsigned int *ret) +{ + const char *propname; + const u32 *cache_size; + + propname = cache_type_info[cache->type].size_prop; + + cache_size = of_get_property(cache->ofnode, propname, NULL); + if (!cache_size) + return -ENODEV; + + *ret = *cache_size; + return 0; +} + +static int cache_size_kb(const struct cache *cache, unsigned int *ret) +{ + unsigned int size; + + if (cache_size(cache, &size)) + return -ENODEV; + + *ret = size / 1024; + return 0; +} + +/* not cache_line_size() because that's a macro in include/linux/cache.h */ +static int cache_get_line_size(const struct cache *cache, unsigned int *ret) +{ + const u32 *line_size; + int i, lim; + + lim = ARRAY_SIZE(cache_type_info[cache->type].line_size_props); + + for (i = 0; i < lim; i++) { + const char *propname; + + propname = cache_type_info[cache->type].line_size_props[i]; + line_size = of_get_property(cache->ofnode, propname, NULL); + if (line_size) + break; + } + + if (!line_size) + return -ENODEV; + + *ret = *line_size; + return 0; +} + +static int cache_nr_sets(const struct cache *cache, unsigned int *ret) +{ + const char *propname; + const u32 *nr_sets; + + propname = cache_type_info[cache->type].nr_sets_prop; + + nr_sets = of_get_property(cache->ofnode, propname, NULL); + if (!nr_sets) + return -ENODEV; + + *ret = *nr_sets; + return 0; +} + +static int cache_associativity(const struct cache *cache, unsigned int *ret) +{ + unsigned int line_size; + unsigned int nr_sets; + unsigned int size; + + if (cache_nr_sets(cache, &nr_sets)) + goto err; + + /* If the cache is fully associative, there is no need to + * check the other properties. + */ + if (nr_sets == 1) { + *ret = 0; + return 0; + } + + if (cache_get_line_size(cache, &line_size)) + goto err; + if (cache_size(cache, &size)) + goto err; + + if (!(nr_sets > 0 && size > 0 && line_size > 0)) + goto err; + + *ret = (size / nr_sets) / line_size; + return 0; +err: + return -ENODEV; +} + +/* helper for dealing with split caches */ +static struct cache *cache_find_first_sibling(struct cache *cache) +{ + struct cache *iter; + + if (cache->type == CACHE_TYPE_UNIFIED) + return cache; + + list_for_each_entry(iter, &cache_list, list) + if (iter->ofnode == cache->ofnode && iter->next_local == cache) + return iter; + + return cache; +} + +/* return the first cache on a local list matching node */ +static struct cache *cache_lookup_by_node(const struct device_node *node) +{ + struct cache *cache = NULL; + struct cache *iter; + + list_for_each_entry(iter, &cache_list, list) { + if (iter->ofnode != node) + continue; + cache = cache_find_first_sibling(iter); + break; + } + + return cache; +} + +static bool cache_node_is_unified(const struct device_node *np) +{ + return of_get_property(np, "cache-unified", NULL); +} + +static struct cache *__cpuinit cache_do_one_devnode_unified(struct device_node *node, int level) +{ + struct cache *cache; + + pr_debug("creating L%d ucache for %s\n", level, node->full_name); + + cache = new_cache(CACHE_TYPE_UNIFIED, level, node); + + return cache; +} + +static struct cache *__cpuinit cache_do_one_devnode_split(struct device_node *node, int level) +{ + struct cache *dcache, *icache; + + pr_debug("creating L%d dcache and icache for %s\n", level, + node->full_name); + + dcache = new_cache(CACHE_TYPE_DATA, level, node); + icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node); + + if (!dcache || !icache) + goto err; + + dcache->next_local = icache; + + return dcache; +err: + release_cache(dcache); + release_cache(icache); + return NULL; +} + +static struct cache *__cpuinit cache_do_one_devnode(struct device_node *node, int level) +{ + struct cache *cache; + + if (cache_node_is_unified(node)) + cache = cache_do_one_devnode_unified(node, level); + else + cache = cache_do_one_devnode_split(node, level); + + return cache; +} + +static struct cache *__cpuinit cache_lookup_or_instantiate(struct device_node *node, int level) +{ + struct cache *cache; + + cache = cache_lookup_by_node(node); + + WARN_ONCE(cache && cache->level != level, + "cache level mismatch on lookup (got %d, expected %d)\n", + cache->level, level); + + if (!cache) + cache = cache_do_one_devnode(node, level); + + return cache; +} + +static void __cpuinit link_cache_lists(struct cache *smaller, struct cache *bigger) +{ + while (smaller->next_local) { + if (smaller->next_local == bigger) + return; /* already linked */ + smaller = smaller->next_local; + } + + smaller->next_local = bigger; +} + +static void __cpuinit do_subsidiary_caches_debugcheck(struct cache *cache) +{ + WARN_ON_ONCE(cache->level != 1); + WARN_ON_ONCE(strcmp(cache->ofnode->type, "cpu")); +} + +static void __cpuinit do_subsidiary_caches(struct cache *cache) +{ + struct device_node *subcache_node; + int level = cache->level; + + do_subsidiary_caches_debugcheck(cache); + + while ((subcache_node = of_find_next_cache_node(cache->ofnode))) { + struct cache *subcache; + + level++; + subcache = cache_lookup_or_instantiate(subcache_node, level); + of_node_put(subcache_node); + if (!subcache) + break; + + link_cache_lists(cache, subcache); + cache = subcache; + } +} + +static struct cache *__cpuinit cache_chain_instantiate(unsigned int cpu_id) +{ + struct device_node *cpu_node; + struct cache *cpu_cache = NULL; + + pr_debug("creating cache object(s) for CPU %i\n", cpu_id); + + cpu_node = of_get_cpu_node(cpu_id, NULL); + WARN_ONCE(!cpu_node, "no OF node found for CPU %i\n", cpu_id); + if (!cpu_node) + goto out; + + cpu_cache = cache_lookup_or_instantiate(cpu_node, 1); + if (!cpu_cache) + goto out; + + do_subsidiary_caches(cpu_cache); + + cache_cpu_set(cpu_cache, cpu_id); +out: + of_node_put(cpu_node); + + return cpu_cache; +} + +static struct cache_dir *__cpuinit cacheinfo_create_cache_dir(unsigned int cpu_id) +{ + struct cache_dir *cache_dir; + struct device *dev; + struct kobject *kobj = NULL; + + dev = get_cpu_device(cpu_id); + WARN_ONCE(!dev, "no dev for CPU %i\n", cpu_id); + if (!dev) + goto err; + + kobj = kobject_create_and_add("cache", &dev->kobj); + if (!kobj) + goto err; + + cache_dir = kzalloc(sizeof(*cache_dir), GFP_KERNEL); + if (!cache_dir) + goto err; + + cache_dir->kobj = kobj; + + WARN_ON_ONCE(per_cpu(cache_dir_pcpu, cpu_id) != NULL); + + per_cpu(cache_dir_pcpu, cpu_id) = cache_dir; + + return cache_dir; +err: + kobject_put(kobj); + return NULL; +} + +static void cache_index_release(struct kobject *kobj) +{ + struct cache_index_dir *index; + + index = kobj_to_cache_index_dir(kobj); + + pr_debug("freeing index directory for L%d %s cache\n", + index->cache->level, cache_type_string(index->cache)); + + kfree(index); +} + +static ssize_t cache_index_show(struct kobject *k, struct attribute *attr, char *buf) +{ + struct kobj_attribute *kobj_attr; + + kobj_attr = container_of(attr, struct kobj_attribute, attr); + + return kobj_attr->show(k, kobj_attr, buf); +} + +static struct cache *index_kobj_to_cache(struct kobject *k) +{ + struct cache_index_dir *index; + + index = kobj_to_cache_index_dir(k); + + return index->cache; +} + +static ssize_t size_show(struct kobject *k, struct kobj_attribute *attr, char *buf) +{ + unsigned int size_kb; + struct cache *cache; + + cache = index_kobj_to_cache(k); + + if (cache_size_kb(cache, &size_kb)) + return -ENODEV; + + return sprintf(buf, "%uK\n", size_kb); +} + +static struct kobj_attribute cache_size_attr = + __ATTR(size, 0444, size_show, NULL); + + +static ssize_t line_size_show(struct kobject *k, struct kobj_attribute *attr, char *buf) +{ + unsigned int line_size; + struct cache *cache; + + cache = index_kobj_to_cache(k); + + if (cache_get_line_size(cache, &line_size)) + return -ENODEV; + + return sprintf(buf, "%u\n", line_size); +} + +static struct kobj_attribute cache_line_size_attr = + __ATTR(coherency_line_size, 0444, line_size_show, NULL); + +static ssize_t nr_sets_show(struct kobject *k, struct kobj_attribute *attr, char *buf) +{ + unsigned int nr_sets; + struct cache *cache; + + cache = index_kobj_to_cache(k); + + if (cache_nr_sets(cache, &nr_sets)) + return -ENODEV; + + return sprintf(buf, "%u\n", nr_sets); +} + +static struct kobj_attribute cache_nr_sets_attr = + __ATTR(number_of_sets, 0444, nr_sets_show, NULL); + +static ssize_t associativity_show(struct kobject *k, struct kobj_attribute *attr, char *buf) +{ + unsigned int associativity; + struct cache *cache; + + cache = index_kobj_to_cache(k); + + if (cache_associativity(cache, &associativity)) + return -ENODEV; + + return sprintf(buf, "%u\n", associativity); +} + +static struct kobj_attribute cache_assoc_attr = + __ATTR(ways_of_associativity, 0444, associativity_show, NULL); + +static ssize_t type_show(struct kobject *k, struct kobj_attribute *attr, char *buf) +{ + struct cache *cache; + + cache = index_kobj_to_cache(k); + + return sprintf(buf, "%s\n", cache_type_string(cache)); +} + +static struct kobj_attribute cache_type_attr = + __ATTR(type, 0444, type_show, NULL); + +static ssize_t level_show(struct kobject *k, struct kobj_attribute *attr, char *buf) +{ + struct cache_index_dir *index; + struct cache *cache; + + index = kobj_to_cache_index_dir(k); + cache = index->cache; + + return sprintf(buf, "%d\n", cache->level); +} + +static struct kobj_attribute cache_level_attr = + __ATTR(level, 0444, level_show, NULL); + +static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *attr, char *buf) +{ + struct cache_index_dir *index; + struct cache *cache; + int len; + int n = 0; + + index = kobj_to_cache_index_dir(k); + cache = index->cache; + len = PAGE_SIZE - 2; + + if (len > 1) { + n = cpumask_scnprintf(buf, len, &cache->shared_cpu_map); + buf[n++] = '\n'; + buf[n] = '\0'; + } + return n; +} + +static struct kobj_attribute cache_shared_cpu_map_attr = + __ATTR(shared_cpu_map, 0444, shared_cpu_map_show, NULL); + +/* Attributes which should always be created -- the kobject/sysfs core + * does this automatically via kobj_type->default_attrs. This is the + * minimum data required to uniquely identify a cache. + */ +static struct attribute *cache_index_default_attrs[] = { + &cache_type_attr.attr, + &cache_level_attr.attr, + &cache_shared_cpu_map_attr.attr, + NULL, +}; + +/* Attributes which should be created if the cache device node has the + * right properties -- see cacheinfo_create_index_opt_attrs + */ +static struct kobj_attribute *cache_index_opt_attrs[] = { + &cache_size_attr, + &cache_line_size_attr, + &cache_nr_sets_attr, + &cache_assoc_attr, +}; + +static const struct sysfs_ops cache_index_ops = { + .show = cache_index_show, +}; + +static struct kobj_type cache_index_type = { + .release = cache_index_release, + .sysfs_ops = &cache_index_ops, + .default_attrs = cache_index_default_attrs, +}; + +static void __cpuinit cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir) +{ + const char *cache_name; + const char *cache_type; + struct cache *cache; + char *buf; + int i; + + buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!buf) + return; + + cache = dir->cache; + cache_name = cache->ofnode->full_name; + cache_type = cache_type_string(cache); + + /* We don't want to create an attribute that can't provide a + * meaningful value. Check the return value of each optional + * attribute's ->show method before registering the + * attribute. + */ + for (i = 0; i < ARRAY_SIZE(cache_index_opt_attrs); i++) { + struct kobj_attribute *attr; + ssize_t rc; + + attr = cache_index_opt_attrs[i]; + + rc = attr->show(&dir->kobj, attr, buf); + if (rc <= 0) { + pr_debug("not creating %s attribute for " + "%s(%s) (rc = %zd)\n", + attr->attr.name, cache_name, + cache_type, rc); + continue; + } + if (sysfs_create_file(&dir->kobj, &attr->attr)) + pr_debug("could not create %s attribute for %s(%s)\n", + attr->attr.name, cache_name, cache_type); + } + + kfree(buf); +} + +static void __cpuinit cacheinfo_create_index_dir(struct cache *cache, int index, struct cache_dir *cache_dir) +{ + struct cache_index_dir *index_dir; + int rc; + + index_dir = kzalloc(sizeof(*index_dir), GFP_KERNEL); + if (!index_dir) + goto err; + + index_dir->cache = cache; + + rc = kobject_init_and_add(&index_dir->kobj, &cache_index_type, + cache_dir->kobj, "index%d", index); + if (rc) + goto err; + + index_dir->next = cache_dir->index; + cache_dir->index = index_dir; + + cacheinfo_create_index_opt_attrs(index_dir); + + return; +err: + kfree(index_dir); +} + +static void __cpuinit cacheinfo_sysfs_populate(unsigned int cpu_id, struct cache *cache_list) +{ + struct cache_dir *cache_dir; + struct cache *cache; + int index = 0; + + cache_dir = cacheinfo_create_cache_dir(cpu_id); + if (!cache_dir) + return; + + cache = cache_list; + while (cache) { + cacheinfo_create_index_dir(cache, index, cache_dir); + index++; + cache = cache->next_local; + } +} + +void __cpuinit cacheinfo_cpu_online(unsigned int cpu_id) +{ + struct cache *cache; + + cache = cache_chain_instantiate(cpu_id); + if (!cache) + return; + + cacheinfo_sysfs_populate(cpu_id, cache); +} + +#ifdef CONFIG_HOTPLUG_CPU /* functions needed for cpu offline */ + +static struct cache *cache_lookup_by_cpu(unsigned int cpu_id) +{ + struct device_node *cpu_node; + struct cache *cache; + + cpu_node = of_get_cpu_node(cpu_id, NULL); + WARN_ONCE(!cpu_node, "no OF node found for CPU %i\n", cpu_id); + if (!cpu_node) + return NULL; + + cache = cache_lookup_by_node(cpu_node); + of_node_put(cpu_node); + + return cache; +} + +static void remove_index_dirs(struct cache_dir *cache_dir) +{ + struct cache_index_dir *index; + + index = cache_dir->index; + + while (index) { + struct cache_index_dir *next; + + next = index->next; + kobject_put(&index->kobj); + index = next; + } +} + +static void remove_cache_dir(struct cache_dir *cache_dir) +{ + remove_index_dirs(cache_dir); + + kobject_put(cache_dir->kobj); + + kfree(cache_dir); +} + +static void cache_cpu_clear(struct cache *cache, int cpu) +{ + while (cache) { + struct cache *next = cache->next_local; + + WARN_ONCE(!cpumask_test_cpu(cpu, &cache->shared_cpu_map), + "CPU %i not accounted in %s(%s)\n", + cpu, cache->ofnode->full_name, + cache_type_string(cache)); + + cpumask_clear_cpu(cpu, &cache->shared_cpu_map); + + /* Release the cache object if all the cpus using it + * are offline */ + if (cpumask_empty(&cache->shared_cpu_map)) + release_cache(cache); + + cache = next; + } +} + +void cacheinfo_cpu_offline(unsigned int cpu_id) +{ + struct cache_dir *cache_dir; + struct cache *cache; + + /* Prevent userspace from seeing inconsistent state - remove + * the sysfs hierarchy first */ + cache_dir = per_cpu(cache_dir_pcpu, cpu_id); + + /* careful, sysfs population may have failed */ + if (cache_dir) + remove_cache_dir(cache_dir); + + per_cpu(cache_dir_pcpu, cpu_id) = NULL; + + /* clear the CPU's bit in its cache chain, possibly freeing + * cache objects */ + cache = cache_lookup_by_cpu(cpu_id); + if (cache) + cache_cpu_clear(cache, cpu_id); +} +#endif /* CONFIG_HOTPLUG_CPU */ diff --git a/arch/powerpc/kernel/cacheinfo.h b/arch/powerpc/kernel/cacheinfo.h new file mode 100644 index 00000000..a7b74d36 --- /dev/null +++ b/arch/powerpc/kernel/cacheinfo.h @@ -0,0 +1,8 @@ +#ifndef _PPC_CACHEINFO_H +#define _PPC_CACHEINFO_H + +/* These are just hooks for sysfs.c to use. */ +extern void cacheinfo_cpu_online(unsigned int cpu_id); +extern void cacheinfo_cpu_offline(unsigned int cpu_id); + +#endif /* _PPC_CACHEINFO_H */ diff --git a/arch/powerpc/kernel/clock.c b/arch/powerpc/kernel/clock.c new file mode 100644 index 00000000..a764b477 --- /dev/null +++ b/arch/powerpc/kernel/clock.c @@ -0,0 +1,82 @@ +/* + * Dummy clk implementations for powerpc. + * These need to be overridden in platform code. + */ + +#include <linux/clk.h> +#include <linux/err.h> +#include <linux/errno.h> +#include <linux/export.h> +#include <asm/clk_interface.h> + +struct clk_interface clk_functions; + +struct clk *clk_get(struct device *dev, const char *id) +{ + if (clk_functions.clk_get) + return clk_functions.clk_get(dev, id); + return ERR_PTR(-ENOSYS); +} +EXPORT_SYMBOL(clk_get); + +void clk_put(struct clk *clk) +{ + if (clk_functions.clk_put) + clk_functions.clk_put(clk); +} +EXPORT_SYMBOL(clk_put); + +int clk_enable(struct clk *clk) +{ + if (clk_functions.clk_enable) + return clk_functions.clk_enable(clk); + return -ENOSYS; +} +EXPORT_SYMBOL(clk_enable); + +void clk_disable(struct clk *clk) +{ + if (clk_functions.clk_disable) + clk_functions.clk_disable(clk); +} +EXPORT_SYMBOL(clk_disable); + +unsigned long clk_get_rate(struct clk *clk) +{ + if (clk_functions.clk_get_rate) + return clk_functions.clk_get_rate(clk); + return 0; +} +EXPORT_SYMBOL(clk_get_rate); + +long clk_round_rate(struct clk *clk, unsigned long rate) +{ + if (clk_functions.clk_round_rate) + return clk_functions.clk_round_rate(clk, rate); + return -ENOSYS; +} +EXPORT_SYMBOL(clk_round_rate); + +int clk_set_rate(struct clk *clk, unsigned long rate) +{ + if (clk_functions.clk_set_rate) + return clk_functions.clk_set_rate(clk, rate); + return -ENOSYS; +} +EXPORT_SYMBOL(clk_set_rate); + +struct clk *clk_get_parent(struct clk *clk) +{ + if (clk_functions.clk_get_parent) + return clk_functions.clk_get_parent(clk); + return ERR_PTR(-ENOSYS); +} +EXPORT_SYMBOL(clk_get_parent); + +int clk_set_parent(struct clk *clk, struct clk *parent) +{ + if (clk_functions.clk_set_parent) + return clk_functions.clk_set_parent(clk, parent); + return -ENOSYS; +} +EXPORT_SYMBOL(clk_set_parent); diff --git a/arch/powerpc/kernel/compat_audit.c b/arch/powerpc/kernel/compat_audit.c new file mode 100644 index 00000000..108ff14e --- /dev/null +++ b/arch/powerpc/kernel/compat_audit.c @@ -0,0 +1,43 @@ +#undef __powerpc64__ +#include <asm/unistd.h> + +unsigned ppc32_dir_class[] = { +#include <asm-generic/audit_dir_write.h> +~0U +}; + +unsigned ppc32_chattr_class[] = { +#include <asm-generic/audit_change_attr.h> +~0U +}; + +unsigned ppc32_write_class[] = { +#include <asm-generic/audit_write.h> +~0U +}; + +unsigned ppc32_read_class[] = { +#include <asm-generic/audit_read.h> +~0U +}; + +unsigned ppc32_signal_class[] = { +#include <asm-generic/audit_signal.h> +~0U +}; + +int ppc32_classify_syscall(unsigned syscall) +{ + switch(syscall) { + case __NR_open: + return 2; + case __NR_openat: + return 3; + case __NR_socketcall: + return 4; + case __NR_execve: + return 5; + default: + return 1; + } +} diff --git a/arch/powerpc/kernel/cpu_setup_44x.S b/arch/powerpc/kernel/cpu_setup_44x.S new file mode 100644 index 00000000..e32b4a9a --- /dev/null +++ b/arch/powerpc/kernel/cpu_setup_44x.S @@ -0,0 +1,74 @@ +/* + * This file contains low level CPU setup functions. + * Valentine Barshak <vbarshak@ru.mvista.com> + * MontaVista Software, Inc (c) 2007 + * + * Based on cpu_setup_6xx code by + * Benjamin Herrenschmidt <benh@kernel.crashing.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <asm/processor.h> +#include <asm/cputable.h> +#include <asm/ppc_asm.h> + +_GLOBAL(__setup_cpu_440ep) + b __init_fpu_44x +_GLOBAL(__setup_cpu_440epx) + mflr r4 + bl __init_fpu_44x + bl __plb_disable_wrp + bl __fixup_440A_mcheck + mtlr r4 + blr +_GLOBAL(__setup_cpu_440grx) + mflr r4 + bl __plb_disable_wrp + bl __fixup_440A_mcheck + mtlr r4 + blr +_GLOBAL(__setup_cpu_460ex) +_GLOBAL(__setup_cpu_460gt) +_GLOBAL(__setup_cpu_460sx) +_GLOBAL(__setup_cpu_apm821xx) + mflr r4 + bl __init_fpu_44x + bl __fixup_440A_mcheck + mtlr r4 + blr + +_GLOBAL(__setup_cpu_440x5) +_GLOBAL(__setup_cpu_440gx) +_GLOBAL(__setup_cpu_440spe) + b __fixup_440A_mcheck + +/* enable APU between CPU and FPU */ +_GLOBAL(__init_fpu_44x) + mfspr r3,SPRN_CCR0 + /* Clear DAPUIB flag in CCR0 */ + rlwinm r3,r3,0,12,10 + mtspr SPRN_CCR0,r3 + isync + blr + +/* + * Workaround for the incorrect write to DDR SDRAM errata. + * The write address can be corrupted during writes to + * DDR SDRAM when write pipelining is enabled on PLB0. + * Disable write pipelining here. + */ +#define DCRN_PLB4A0_ACR 0x81 + +_GLOBAL(__plb_disable_wrp) + mfdcr r3,DCRN_PLB4A0_ACR + /* clear WRP bit in PLB4A0_ACR */ + rlwinm r3,r3,0,8,6 + mtdcr DCRN_PLB4A0_ACR,r3 + isync + blr + diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S new file mode 100644 index 00000000..f8cd9fba --- /dev/null +++ b/arch/powerpc/kernel/cpu_setup_6xx.S @@ -0,0 +1,489 @@ +/* + * This file contains low level CPU setup functions. + * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/cputable.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/cache.h> +#include <asm/mmu.h> + +_GLOBAL(__setup_cpu_603) + mflr r5 +BEGIN_MMU_FTR_SECTION + li r10,0 + mtspr SPRN_SPRG_603_LRU,r10 /* init SW LRU tracking */ +END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) +BEGIN_FTR_SECTION + bl __init_fpu_registers +END_FTR_SECTION_IFCLR(CPU_FTR_FPU_UNAVAILABLE) + bl setup_common_caches + mtlr r5 + blr +_GLOBAL(__setup_cpu_604) + mflr r5 + bl setup_common_caches + bl setup_604_hid0 + mtlr r5 + blr +_GLOBAL(__setup_cpu_750) + mflr r5 + bl __init_fpu_registers + bl setup_common_caches + bl setup_750_7400_hid0 + mtlr r5 + blr +_GLOBAL(__setup_cpu_750cx) + mflr r5 + bl __init_fpu_registers + bl setup_common_caches + bl setup_750_7400_hid0 + bl setup_750cx + mtlr r5 + blr +_GLOBAL(__setup_cpu_750fx) + mflr r5 + bl __init_fpu_registers + bl setup_common_caches + bl setup_750_7400_hid0 + bl setup_750fx + mtlr r5 + blr +_GLOBAL(__setup_cpu_7400) + mflr r5 + bl __init_fpu_registers + bl setup_7400_workarounds + bl setup_common_caches + bl setup_750_7400_hid0 + mtlr r5 + blr +_GLOBAL(__setup_cpu_7410) + mflr r5 + bl __init_fpu_registers + bl setup_7410_workarounds + bl setup_common_caches + bl setup_750_7400_hid0 + li r3,0 + mtspr SPRN_L2CR2,r3 + mtlr r5 + blr +_GLOBAL(__setup_cpu_745x) + mflr r5 + bl setup_common_caches + bl setup_745x_specifics + mtlr r5 + blr + +/* Enable caches for 603's, 604, 750 & 7400 */ +setup_common_caches: + mfspr r11,SPRN_HID0 + andi. r0,r11,HID0_DCE + ori r11,r11,HID0_ICE|HID0_DCE + ori r8,r11,HID0_ICFI + bne 1f /* don't invalidate the D-cache */ + ori r8,r8,HID0_DCI /* unless it wasn't enabled */ +1: sync + mtspr SPRN_HID0,r8 /* enable and invalidate caches */ + sync + mtspr SPRN_HID0,r11 /* enable caches */ + sync + isync + blr + +/* 604, 604e, 604ev, ... + * Enable superscalar execution & branch history table + */ +setup_604_hid0: + mfspr r11,SPRN_HID0 + ori r11,r11,HID0_SIED|HID0_BHTE + ori r8,r11,HID0_BTCD + sync + mtspr SPRN_HID0,r8 /* flush branch target address cache */ + sync /* on 604e/604r */ + mtspr SPRN_HID0,r11 + sync + isync + blr + +/* 7400 <= rev 2.7 and 7410 rev = 1.0 suffer from some + * erratas we work around here. + * Moto MPC710CE.pdf describes them, those are errata + * #3, #4 and #5 + * Note that we assume the firmware didn't choose to + * apply other workarounds (there are other ones documented + * in the .pdf). It appear that Apple firmware only works + * around #3 and with the same fix we use. We may want to + * check if the CPU is using 60x bus mode in which case + * the workaround for errata #4 is useless. Also, we may + * want to explicitly clear HID0_NOPDST as this is not + * needed once we have applied workaround #5 (though it's + * not set by Apple's firmware at least). + */ +setup_7400_workarounds: + mfpvr r3 + rlwinm r3,r3,0,20,31 + cmpwi 0,r3,0x0207 + ble 1f + blr +setup_7410_workarounds: + mfpvr r3 + rlwinm r3,r3,0,20,31 + cmpwi 0,r3,0x0100 + bnelr +1: + mfspr r11,SPRN_MSSSR0 + /* Errata #3: Set L1OPQ_SIZE to 0x10 */ + rlwinm r11,r11,0,9,6 + oris r11,r11,0x0100 + /* Errata #4: Set L2MQ_SIZE to 1 (check for MPX mode first ?) */ + oris r11,r11,0x0002 + /* Errata #5: Set DRLT_SIZE to 0x01 */ + rlwinm r11,r11,0,5,2 + oris r11,r11,0x0800 + sync + mtspr SPRN_MSSSR0,r11 + sync + isync + blr + +/* 740/750/7400/7410 + * Enable Store Gathering (SGE), Address Brodcast (ABE), + * Branch History Table (BHTE), Branch Target ICache (BTIC) + * Dynamic Power Management (DPM), Speculative (SPD) + * Clear Instruction cache throttling (ICTC) + */ +setup_750_7400_hid0: + mfspr r11,SPRN_HID0 + ori r11,r11,HID0_SGE | HID0_ABE | HID0_BHTE | HID0_BTIC + oris r11,r11,HID0_DPM@h +BEGIN_FTR_SECTION + xori r11,r11,HID0_BTIC +END_FTR_SECTION_IFSET(CPU_FTR_NO_BTIC) +BEGIN_FTR_SECTION + xoris r11,r11,HID0_DPM@h /* disable dynamic power mgmt */ +END_FTR_SECTION_IFSET(CPU_FTR_NO_DPM) + li r3,HID0_SPD + andc r11,r11,r3 /* clear SPD: enable speculative */ + li r3,0 + mtspr SPRN_ICTC,r3 /* Instruction Cache Throttling off */ + isync + mtspr SPRN_HID0,r11 + sync + isync + blr + +/* 750cx specific + * Looks like we have to disable NAP feature for some PLL settings... + * (waiting for confirmation) + */ +setup_750cx: + mfspr r10, SPRN_HID1 + rlwinm r10,r10,4,28,31 + cmpwi cr0,r10,7 + cmpwi cr1,r10,9 + cmpwi cr2,r10,11 + cror 4*cr0+eq,4*cr0+eq,4*cr1+eq + cror 4*cr0+eq,4*cr0+eq,4*cr2+eq + bnelr + lwz r6,CPU_SPEC_FEATURES(r4) + li r7,CPU_FTR_CAN_NAP + andc r6,r6,r7 + stw r6,CPU_SPEC_FEATURES(r4) + blr + +/* 750fx specific + */ +setup_750fx: + blr + +/* MPC 745x + * Enable Store Gathering (SGE), Branch Folding (FOLD) + * Branch History Table (BHTE), Branch Target ICache (BTIC) + * Dynamic Power Management (DPM), Speculative (SPD) + * Ensure our data cache instructions really operate. + * Timebase has to be running or we wouldn't have made it here, + * just ensure we don't disable it. + * Clear Instruction cache throttling (ICTC) + * Enable L2 HW prefetch + */ +setup_745x_specifics: + /* We check for the presence of an L3 cache setup by + * the firmware. If any, we disable NAP capability as + * it's known to be bogus on rev 2.1 and earlier + */ +BEGIN_FTR_SECTION + mfspr r11,SPRN_L3CR + andis. r11,r11,L3CR_L3E@h + beq 1f +END_FTR_SECTION_IFSET(CPU_FTR_L3CR) + lwz r6,CPU_SPEC_FEATURES(r4) + andi. r0,r6,CPU_FTR_L3_DISABLE_NAP + beq 1f + li r7,CPU_FTR_CAN_NAP + andc r6,r6,r7 + stw r6,CPU_SPEC_FEATURES(r4) +1: + mfspr r11,SPRN_HID0 + + /* All of the bits we have to set..... + */ + ori r11,r11,HID0_SGE | HID0_FOLD | HID0_BHTE + ori r11,r11,HID0_LRSTK | HID0_BTIC + oris r11,r11,HID0_DPM@h +BEGIN_MMU_FTR_SECTION + oris r11,r11,HID0_HIGH_BAT@h +END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) +BEGIN_FTR_SECTION + xori r11,r11,HID0_BTIC +END_FTR_SECTION_IFSET(CPU_FTR_NO_BTIC) +BEGIN_FTR_SECTION + xoris r11,r11,HID0_DPM@h /* disable dynamic power mgmt */ +END_FTR_SECTION_IFSET(CPU_FTR_NO_DPM) + + /* All of the bits we have to clear.... + */ + li r3,HID0_SPD | HID0_NOPDST | HID0_NOPTI + andc r11,r11,r3 /* clear SPD: enable speculative */ + li r3,0 + + mtspr SPRN_ICTC,r3 /* Instruction Cache Throttling off */ + isync + mtspr SPRN_HID0,r11 + sync + isync + + /* Enable L2 HW prefetch, if L2 is enabled + */ + mfspr r3,SPRN_L2CR + andis. r3,r3,L2CR_L2E@h + beqlr + mfspr r3,SPRN_MSSCR0 + ori r3,r3,3 + sync + mtspr SPRN_MSSCR0,r3 + sync + isync + blr + +/* + * Initialize the FPU registers. This is needed to work around an errata + * in some 750 cpus where using a not yet initialized FPU register after + * power on reset may hang the CPU + */ +_GLOBAL(__init_fpu_registers) + mfmsr r10 + ori r11,r10,MSR_FP + mtmsr r11 + isync + addis r9,r3,empty_zero_page@ha + addi r9,r9,empty_zero_page@l + REST_32FPRS(0,r9) + sync + mtmsr r10 + isync + blr + + +/* Definitions for the table use to save CPU states */ +#define CS_HID0 0 +#define CS_HID1 4 +#define CS_HID2 8 +#define CS_MSSCR0 12 +#define CS_MSSSR0 16 +#define CS_ICTRL 20 +#define CS_LDSTCR 24 +#define CS_LDSTDB 28 +#define CS_SIZE 32 + + .data + .balign L1_CACHE_BYTES +cpu_state_storage: + .space CS_SIZE + .balign L1_CACHE_BYTES,0 + .text + +/* Called in normal context to backup CPU 0 state. This + * does not include cache settings. This function is also + * called for machine sleep. This does not include the MMU + * setup, BATs, etc... but rather the "special" registers + * like HID0, HID1, MSSCR0, etc... + */ +_GLOBAL(__save_cpu_setup) + /* Some CR fields are volatile, we back it up all */ + mfcr r7 + + /* Get storage ptr */ + lis r5,cpu_state_storage@h + ori r5,r5,cpu_state_storage@l + + /* Save HID0 (common to all CONFIG_6xx cpus) */ + mfspr r3,SPRN_HID0 + stw r3,CS_HID0(r5) + + /* Now deal with CPU type dependent registers */ + mfspr r3,SPRN_PVR + srwi r3,r3,16 + cmplwi cr0,r3,0x8000 /* 7450 */ + cmplwi cr1,r3,0x000c /* 7400 */ + cmplwi cr2,r3,0x800c /* 7410 */ + cmplwi cr3,r3,0x8001 /* 7455 */ + cmplwi cr4,r3,0x8002 /* 7457 */ + cmplwi cr5,r3,0x8003 /* 7447A */ + cmplwi cr6,r3,0x7000 /* 750FX */ + cmplwi cr7,r3,0x8004 /* 7448 */ + /* cr1 is 7400 || 7410 */ + cror 4*cr1+eq,4*cr1+eq,4*cr2+eq + /* cr0 is 74xx */ + cror 4*cr0+eq,4*cr0+eq,4*cr3+eq + cror 4*cr0+eq,4*cr0+eq,4*cr4+eq + cror 4*cr0+eq,4*cr0+eq,4*cr1+eq + cror 4*cr0+eq,4*cr0+eq,4*cr5+eq + cror 4*cr0+eq,4*cr0+eq,4*cr7+eq + bne 1f + /* Backup 74xx specific regs */ + mfspr r4,SPRN_MSSCR0 + stw r4,CS_MSSCR0(r5) + mfspr r4,SPRN_MSSSR0 + stw r4,CS_MSSSR0(r5) + beq cr1,1f + /* Backup 745x specific registers */ + mfspr r4,SPRN_HID1 + stw r4,CS_HID1(r5) + mfspr r4,SPRN_ICTRL + stw r4,CS_ICTRL(r5) + mfspr r4,SPRN_LDSTCR + stw r4,CS_LDSTCR(r5) + mfspr r4,SPRN_LDSTDB + stw r4,CS_LDSTDB(r5) +1: + bne cr6,1f + /* Backup 750FX specific registers */ + mfspr r4,SPRN_HID1 + stw r4,CS_HID1(r5) + /* If rev 2.x, backup HID2 */ + mfspr r3,SPRN_PVR + andi. r3,r3,0xff00 + cmpwi cr0,r3,0x0200 + bne 1f + mfspr r4,SPRN_HID2 + stw r4,CS_HID2(r5) +1: + mtcr r7 + blr + +/* Called with no MMU context (typically MSR:IR/DR off) to + * restore CPU state as backed up by the previous + * function. This does not include cache setting + */ +_GLOBAL(__restore_cpu_setup) + /* Some CR fields are volatile, we back it up all */ + mfcr r7 + + /* Get storage ptr */ + lis r5,(cpu_state_storage-KERNELBASE)@h + ori r5,r5,cpu_state_storage@l + + /* Restore HID0 */ + lwz r3,CS_HID0(r5) + sync + isync + mtspr SPRN_HID0,r3 + sync + isync + + /* Now deal with CPU type dependent registers */ + mfspr r3,SPRN_PVR + srwi r3,r3,16 + cmplwi cr0,r3,0x8000 /* 7450 */ + cmplwi cr1,r3,0x000c /* 7400 */ + cmplwi cr2,r3,0x800c /* 7410 */ + cmplwi cr3,r3,0x8001 /* 7455 */ + cmplwi cr4,r3,0x8002 /* 7457 */ + cmplwi cr5,r3,0x8003 /* 7447A */ + cmplwi cr6,r3,0x7000 /* 750FX */ + cmplwi cr7,r3,0x8004 /* 7448 */ + /* cr1 is 7400 || 7410 */ + cror 4*cr1+eq,4*cr1+eq,4*cr2+eq + /* cr0 is 74xx */ + cror 4*cr0+eq,4*cr0+eq,4*cr3+eq + cror 4*cr0+eq,4*cr0+eq,4*cr4+eq + cror 4*cr0+eq,4*cr0+eq,4*cr1+eq + cror 4*cr0+eq,4*cr0+eq,4*cr5+eq + cror 4*cr0+eq,4*cr0+eq,4*cr7+eq + bne 2f + /* Restore 74xx specific regs */ + lwz r4,CS_MSSCR0(r5) + sync + mtspr SPRN_MSSCR0,r4 + sync + isync + lwz r4,CS_MSSSR0(r5) + sync + mtspr SPRN_MSSSR0,r4 + sync + isync + bne cr2,1f + /* Clear 7410 L2CR2 */ + li r4,0 + mtspr SPRN_L2CR2,r4 +1: beq cr1,2f + /* Restore 745x specific registers */ + lwz r4,CS_HID1(r5) + sync + mtspr SPRN_HID1,r4 + isync + sync + lwz r4,CS_ICTRL(r5) + sync + mtspr SPRN_ICTRL,r4 + isync + sync + lwz r4,CS_LDSTCR(r5) + sync + mtspr SPRN_LDSTCR,r4 + isync + sync + lwz r4,CS_LDSTDB(r5) + sync + mtspr SPRN_LDSTDB,r4 + isync + sync +2: bne cr6,1f + /* Restore 750FX specific registers + * that is restore HID2 on rev 2.x and PLL config & switch + * to PLL 0 on all + */ + /* If rev 2.x, restore HID2 with low voltage bit cleared */ + mfspr r3,SPRN_PVR + andi. r3,r3,0xff00 + cmpwi cr0,r3,0x0200 + bne 4f + lwz r4,CS_HID2(r5) + rlwinm r4,r4,0,19,17 + mtspr SPRN_HID2,r4 + sync +4: + lwz r4,CS_HID1(r5) + rlwinm r5,r4,0,16,14 + mtspr SPRN_HID1,r5 + /* Wait for PLL to stabilize */ + mftbl r5 +3: mftbl r6 + sub r6,r6,r5 + cmplwi cr0,r6,10000 + ble 3b + /* Setup final PLL */ + mtspr SPRN_HID1,r4 +1: + mtcr r7 + blr + diff --git a/arch/powerpc/kernel/cpu_setup_a2.S b/arch/powerpc/kernel/cpu_setup_a2.S new file mode 100644 index 00000000..ebc62f42 --- /dev/null +++ b/arch/powerpc/kernel/cpu_setup_a2.S @@ -0,0 +1,120 @@ +/* + * A2 specific assembly support code + * + * Copyright 2009 Ben Herrenschmidt, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <asm/asm-offsets.h> +#include <asm/ppc_asm.h> +#include <asm/ppc-opcode.h> +#include <asm/processor.h> +#include <asm/reg_a2.h> +#include <asm/reg.h> +#include <asm/thread_info.h> + +/* + * Disable thdid and class fields in ERATs to bump PID to full 14 bits capacity. + * This also prevents external LPID accesses but that isn't a problem when not a + * guest. Under PV, this setting will be ignored and MMUCR will return the right + * number of PID bits we can use. + */ +#define MMUCR1_EXTEND_PID \ + (MMUCR1_ICTID | MMUCR1_ITTID | MMUCR1_DCTID | \ + MMUCR1_DTTID | MMUCR1_DCCD) + +/* + * Use extended PIDs if enabled. + * Don't clear the ERATs on context sync events and enable I & D LRU. + * Enable ERAT back invalidate when tlbwe overwrites an entry. + */ +#define INITIAL_MMUCR1 \ + (MMUCR1_EXTEND_PID | MMUCR1_CSINV_NEVER | MMUCR1_IRRE | \ + MMUCR1_DRRE | MMUCR1_TLBWE_BINV) + +_GLOBAL(__setup_cpu_a2) + /* Some of these are actually thread local and some are + * core local but doing it always won't hurt + */ + +#ifdef CONFIG_PPC_ICSWX + /* Make sure ACOP starts out as zero */ + li r3,0 + mtspr SPRN_ACOP,r3 + + /* Skip the following if we are in Guest mode */ + mfmsr r3 + andis. r0,r3,MSR_GS@h + bne _icswx_skip_guest + + /* Enable icswx instruction */ + mfspr r3,SPRN_A2_CCR2 + ori r3,r3,A2_CCR2_ENABLE_ICSWX + mtspr SPRN_A2_CCR2,r3 + + /* Unmask all CTs in HACOP */ + li r3,-1 + mtspr SPRN_HACOP,r3 +_icswx_skip_guest: +#endif /* CONFIG_PPC_ICSWX */ + + /* Enable doorbell */ + mfspr r3,SPRN_A2_CCR2 + oris r3,r3,A2_CCR2_ENABLE_PC@h + mtspr SPRN_A2_CCR2,r3 + isync + + /* Setup CCR0 to disable power saving for now as it's busted + * in the current implementations. Setup CCR1 to wake on + * interrupts normally (we write the default value but who + * knows what FW may have clobbered...) + */ + li r3,0 + mtspr SPRN_A2_CCR0, r3 + LOAD_REG_IMMEDIATE(r3,0x0f0f0f0f) + mtspr SPRN_A2_CCR1, r3 + + /* Initialise MMUCR1 */ + lis r3,INITIAL_MMUCR1@h + ori r3,r3,INITIAL_MMUCR1@l + mtspr SPRN_MMUCR1,r3 + + /* Set MMUCR2 to enable 4K, 64K, 1M, 16M and 1G pages */ + LOAD_REG_IMMEDIATE(r3, 0x000a7531) + mtspr SPRN_MMUCR2,r3 + + /* Set MMUCR3 to write all thids bit to the TLB */ + LOAD_REG_IMMEDIATE(r3, 0x0000000f) + mtspr SPRN_MMUCR3,r3 + + /* Don't do ERAT stuff if running guest mode */ + mfmsr r3 + andis. r0,r3,MSR_GS@h + bne 1f + + /* Now set the I-ERAT watermark to 15 */ + lis r4,(MMUCR0_TLBSEL_I|MMUCR0_ECL)@h + mtspr SPRN_MMUCR0, r4 + li r4,A2_IERAT_SIZE-1 + PPC_ERATWE(r4,r4,3) + + /* Now set the D-ERAT watermark to 31 */ + lis r4,(MMUCR0_TLBSEL_D|MMUCR0_ECL)@h + mtspr SPRN_MMUCR0, r4 + li r4,A2_DERAT_SIZE-1 + PPC_ERATWE(r4,r4,3) + + /* And invalidate the beast just in case. That won't get rid of + * a bolted entry though it will be in LRU and so will go away eventually + * but let's not bother for now + */ + PPC_ERATILX(0,0,0) +1: + blr + +_GLOBAL(__restore_cpu_a2) + b __setup_cpu_a2 diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S new file mode 100644 index 00000000..8053db02 --- /dev/null +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -0,0 +1,98 @@ +/* + * This file contains low level CPU setup functions. + * Kumar Gala <galak@kernel.crashing.org> + * Copyright 2009 Freescale Semiconductor, Inc. + * + * Based on cpu_setup_6xx code by + * Benjamin Herrenschmidt <benh@kernel.crashing.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <asm/processor.h> +#include <asm/cputable.h> +#include <asm/ppc_asm.h> + +_GLOBAL(__e500_icache_setup) + mfspr r0, SPRN_L1CSR1 + andi. r3, r0, L1CSR1_ICE + bnelr /* Already enabled */ + oris r0, r0, L1CSR1_CPE@h + ori r0, r0, (L1CSR1_ICFI | L1CSR1_ICLFR | L1CSR1_ICE) + mtspr SPRN_L1CSR1, r0 /* Enable I-Cache */ + isync + blr + +_GLOBAL(__e500_dcache_setup) + mfspr r0, SPRN_L1CSR0 + andi. r3, r0, L1CSR0_DCE + bnelr /* Already enabled */ + msync + isync + li r0, 0 + mtspr SPRN_L1CSR0, r0 /* Disable */ + msync + isync + li r0, (L1CSR0_DCFI | L1CSR0_CLFC) + mtspr SPRN_L1CSR0, r0 /* Invalidate */ + isync +1: mfspr r0, SPRN_L1CSR0 + andi. r3, r0, L1CSR0_CLFC + bne+ 1b /* Wait for lock bits reset */ + oris r0, r0, L1CSR0_CPE@h + ori r0, r0, L1CSR0_DCE + msync + isync + mtspr SPRN_L1CSR0, r0 /* Enable */ + isync + blr + +#ifdef CONFIG_PPC32 +_GLOBAL(__setup_cpu_e200) + /* enable dedicated debug exception handling resources (Debug APU) */ + mfspr r3,SPRN_HID0 + ori r3,r3,HID0_DAPUEN@l + mtspr SPRN_HID0,r3 + b __setup_e200_ivors +_GLOBAL(__setup_cpu_e500v1) +_GLOBAL(__setup_cpu_e500v2) + mflr r4 + bl __e500_icache_setup + bl __e500_dcache_setup + bl __setup_e500_ivors +#ifdef CONFIG_FSL_RIO + /* Ensure that RFXE is set */ + mfspr r3,SPRN_HID1 + oris r3,r3,HID1_RFXE@h + mtspr SPRN_HID1,r3 +#endif + mtlr r4 + blr +_GLOBAL(__setup_cpu_e500mc) + mflr r4 + bl __e500_icache_setup + bl __e500_dcache_setup + bl __setup_e500mc_ivors + mtlr r4 + blr +#endif +/* Right now, restore and setup are the same thing */ +_GLOBAL(__restore_cpu_e5500) +_GLOBAL(__setup_cpu_e5500) + mflr r4 + bl __e500_icache_setup + bl __e500_dcache_setup +#ifdef CONFIG_PPC_BOOK3E_64 + bl .__setup_base_ivors + bl .setup_perfmon_ivor + bl .setup_doorbell_ivors + bl .setup_ehv_ivors +#else + bl __setup_e500mc_ivors +#endif + mtlr r4 + blr diff --git a/arch/powerpc/kernel/cpu_setup_pa6t.S b/arch/powerpc/kernel/cpu_setup_pa6t.S new file mode 100644 index 00000000..d62cb9ca --- /dev/null +++ b/arch/powerpc/kernel/cpu_setup_pa6t.S @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2006-2007 PA Semi, Inc + * + * Maintained by: Olof Johansson <olof@lixom.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/cputable.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/cache.h> + +/* Right now, restore and setup are the same thing */ +_GLOBAL(__restore_cpu_pa6t) +_GLOBAL(__setup_cpu_pa6t) + /* Do nothing if not running in HV mode */ + mfmsr r0 + rldicl. r0,r0,4,63 + beqlr + + mfspr r0,SPRN_HID5 + ori r0,r0,0x38 + mtspr SPRN_HID5,r0 + + mfspr r0,SPRN_LPCR + ori r0,r0,0x7000 + mtspr SPRN_LPCR,r0 + + blr diff --git a/arch/powerpc/kernel/cpu_setup_power7.S b/arch/powerpc/kernel/cpu_setup_power7.S new file mode 100644 index 00000000..76797c51 --- /dev/null +++ b/arch/powerpc/kernel/cpu_setup_power7.S @@ -0,0 +1,95 @@ +/* + * This file contains low level CPU setup functions. + * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/cputable.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/cache.h> + +/* Entry: r3 = crap, r4 = ptr to cputable entry + * + * Note that we can be called twice for pseudo-PVRs + */ +_GLOBAL(__setup_cpu_power7) + mflr r11 + bl __init_hvmode_206 + mtlr r11 + beqlr + li r0,0 + mtspr SPRN_LPID,r0 + bl __init_LPCR + bl __init_TLB + mtlr r11 + blr + +_GLOBAL(__restore_cpu_power7) + mflr r11 + mfmsr r3 + rldicl. r0,r3,4,63 + beqlr + li r0,0 + mtspr SPRN_LPID,r0 + bl __init_LPCR + bl __init_TLB + mtlr r11 + blr + +__init_hvmode_206: + /* Disable CPU_FTR_HVMODE and exit if MSR:HV is not set */ + mfmsr r3 + rldicl. r0,r3,4,63 + bnelr + ld r5,CPU_SPEC_FEATURES(r4) + LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE) + xor r5,r5,r6 + std r5,CPU_SPEC_FEATURES(r4) + blr + +__init_LPCR: + /* Setup a sane LPCR: + * + * LPES = 0b01 (HSRR0/1 used for 0x500) + * PECE = 0b111 + * DPFD = 4 + * HDICE = 0 + * VC = 0b100 (VPM0=1, VPM1=0, ISL=0) + * VRMASD = 0b10000 (L=1, LP=00) + * + * Other bits untouched for now + */ + mfspr r3,SPRN_LPCR + li r5,1 + rldimi r3,r5, LPCR_LPES_SH, 64-LPCR_LPES_SH-2 + ori r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2) + li r5,4 + rldimi r3,r5, LPCR_DPFD_SH, 64-LPCR_DPFD_SH-3 + clrrdi r3,r3,1 /* clear HDICE */ + li r5,4 + rldimi r3,r5, LPCR_VC_SH, 0 + li r5,0x10 + rldimi r3,r5, LPCR_VRMASD_SH, 64-LPCR_VRMASD_SH-5 + mtspr SPRN_LPCR,r3 + isync + blr + +__init_TLB: + /* Clear the TLB */ + li r6,128 + mtctr r6 + li r7,0xc00 /* IS field = 0b11 */ + ptesync +2: tlbiel r7 + addi r7,r7,0x1000 + bdnz 2b + ptesync +1: blr diff --git a/arch/powerpc/kernel/cpu_setup_ppc970.S b/arch/powerpc/kernel/cpu_setup_ppc970.S new file mode 100644 index 00000000..12fac8df --- /dev/null +++ b/arch/powerpc/kernel/cpu_setup_ppc970.S @@ -0,0 +1,210 @@ +/* + * This file contains low level CPU setup functions. + * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/cputable.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/cache.h> + +_GLOBAL(__cpu_preinit_ppc970) + /* Do nothing if not running in HV mode */ + mfmsr r0 + rldicl. r0,r0,4,63 + beqlr + + /* Make sure HID4:rm_ci is off before MMU is turned off, that large + * pages are enabled with HID4:61 and clear HID5:DCBZ_size and + * HID5:DCBZ32_ill + */ + li r0,0 + mfspr r3,SPRN_HID4 + rldimi r3,r0,40,23 /* clear bit 23 (rm_ci) */ + rldimi r3,r0,2,61 /* clear bit 61 (lg_pg_en) */ + sync + mtspr SPRN_HID4,r3 + isync + sync + mfspr r3,SPRN_HID5 + rldimi r3,r0,6,56 /* clear bits 56 & 57 (DCBZ*) */ + sync + mtspr SPRN_HID5,r3 + isync + sync + + /* Setup some basic HID1 features */ + mfspr r0,SPRN_HID1 + li r3,0x1200 /* enable i-fetch cacheability */ + sldi r3,r3,44 /* and prefetch */ + or r0,r0,r3 + mtspr SPRN_HID1,r0 + mtspr SPRN_HID1,r0 + isync + + /* Clear HIOR */ + li r0,0 + sync + mtspr SPRN_HIOR,0 /* Clear interrupt prefix */ + isync + blr + +/* Definitions for the table use to save CPU states */ +#define CS_HID0 0 +#define CS_HID1 8 +#define CS_HID4 16 +#define CS_HID5 24 +#define CS_SIZE 32 + + .data + .balign L1_CACHE_BYTES,0 +cpu_state_storage: + .space CS_SIZE + .balign L1_CACHE_BYTES,0 + .text + + +_GLOBAL(__setup_cpu_ppc970) + /* Do nothing if not running in HV mode */ + mfmsr r0 + rldicl. r0,r0,4,63 + beq no_hv_mode + + mfspr r0,SPRN_HID0 + li r11,5 /* clear DOZE and SLEEP */ + rldimi r0,r11,52,8 /* set NAP and DPM */ + li r11,0 + rldimi r0,r11,32,31 /* clear EN_ATTN */ + b load_hids /* Jump to shared code */ + + +_GLOBAL(__setup_cpu_ppc970MP) + /* Do nothing if not running in HV mode */ + mfmsr r0 + rldicl. r0,r0,4,63 + beq no_hv_mode + + mfspr r0,SPRN_HID0 + li r11,0x15 /* clear DOZE and SLEEP */ + rldimi r0,r11,52,6 /* set DEEPNAP, NAP and DPM */ + li r11,0 + rldimi r0,r11,32,31 /* clear EN_ATTN */ + +load_hids: + mtspr SPRN_HID0,r0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + sync + isync + + /* Try to set LPES = 01 in HID4 */ + mfspr r0,SPRN_HID4 + clrldi r0,r0,1 /* clear LPES0 */ + ori r0,r0,HID4_LPES1 /* set LPES1 */ + sync + mtspr SPRN_HID4,r0 + isync + + /* Save away cpu state */ + LOAD_REG_ADDR(r5,cpu_state_storage) + + /* Save HID0,1,4 and 5 */ + mfspr r3,SPRN_HID0 + std r3,CS_HID0(r5) + mfspr r3,SPRN_HID1 + std r3,CS_HID1(r5) + mfspr r4,SPRN_HID4 + std r4,CS_HID4(r5) + mfspr r3,SPRN_HID5 + std r3,CS_HID5(r5) + + /* See if we successfully set LPES1 to 1; if not we are in Apple mode */ + andi. r4,r4,HID4_LPES1 + bnelr + +no_hv_mode: + /* Disable CPU_FTR_HVMODE and exit, since we don't have HV mode */ + ld r5,CPU_SPEC_FEATURES(r4) + LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE) + andc r5,r5,r6 + std r5,CPU_SPEC_FEATURES(r4) + blr + +/* Called with no MMU context (typically MSR:IR/DR off) to + * restore CPU state as backed up by the previous + * function. This does not include cache setting + */ +_GLOBAL(__restore_cpu_ppc970) + /* Do nothing if not running in HV mode */ + mfmsr r0 + rldicl. r0,r0,4,63 + beqlr + + LOAD_REG_ADDR(r5,cpu_state_storage) + /* Before accessing memory, we make sure rm_ci is clear */ + li r0,0 + mfspr r3,SPRN_HID4 + rldimi r3,r0,40,23 /* clear bit 23 (rm_ci) */ + sync + mtspr SPRN_HID4,r3 + isync + sync + + /* Clear interrupt prefix */ + li r0,0 + sync + mtspr SPRN_HIOR,0 + isync + + /* Restore HID0 */ + ld r3,CS_HID0(r5) + sync + isync + mtspr SPRN_HID0,r3 + mfspr r3,SPRN_HID0 + mfspr r3,SPRN_HID0 + mfspr r3,SPRN_HID0 + mfspr r3,SPRN_HID0 + mfspr r3,SPRN_HID0 + mfspr r3,SPRN_HID0 + sync + isync + + /* Restore HID1 */ + ld r3,CS_HID1(r5) + sync + isync + mtspr SPRN_HID1,r3 + mtspr SPRN_HID1,r3 + sync + isync + + /* Restore HID4 */ + ld r3,CS_HID4(r5) + sync + isync + mtspr SPRN_HID4,r3 + sync + isync + + /* Restore HID5 */ + ld r3,CS_HID5(r5) + sync + isync + mtspr SPRN_HID5,r3 + sync + isync + blr + diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c new file mode 100644 index 00000000..455faa38 --- /dev/null +++ b/arch/powerpc/kernel/cputable.c @@ -0,0 +1,2183 @@ +/* + * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) + * + * Modifications for ppc64: + * Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/string.h> +#include <linux/sched.h> +#include <linux/threads.h> +#include <linux/init.h> +#include <linux/export.h> + +#include <asm/oprofile_impl.h> +#include <asm/cputable.h> +#include <asm/prom.h> /* for PTRRELOC on ARCH=ppc */ +#include <asm/mmu.h> +#include <asm/setup.h> + +struct cpu_spec* cur_cpu_spec = NULL; +EXPORT_SYMBOL(cur_cpu_spec); + +/* The platform string corresponding to the real PVR */ +const char *powerpc_base_platform; + +/* NOTE: + * Unlike ppc32, ppc64 will only call this once for the boot CPU, it's + * the responsibility of the appropriate CPU save/restore functions to + * eventually copy these settings over. Those save/restore aren't yet + * part of the cputable though. That has to be fixed for both ppc32 + * and ppc64 + */ +#ifdef CONFIG_PPC32 +extern void __setup_cpu_e200(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_e500v1(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_e500v2(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_e500mc(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_440ep(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_440epx(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_440gx(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_440grx(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_440spe(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_440x5(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_460ex(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_460gt(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_460sx(unsigned long offset, struct cpu_spec *spec); +extern void __setup_cpu_apm821xx(unsigned long offset, struct cpu_spec *spec); +extern void __setup_cpu_603(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_604(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_750(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_750cx(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_750fx(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_7400(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_7410(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_745x(unsigned long offset, struct cpu_spec* spec); +#endif /* CONFIG_PPC32 */ +#ifdef CONFIG_PPC64 +extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_ppc970MP(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_pa6t(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_a2(unsigned long offset, struct cpu_spec* spec); +extern void __restore_cpu_pa6t(void); +extern void __restore_cpu_ppc970(void); +extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec); +extern void __restore_cpu_power7(void); +extern void __restore_cpu_a2(void); +#endif /* CONFIG_PPC64 */ +#if defined(CONFIG_E500) +extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); +extern void __restore_cpu_e5500(void); +#endif /* CONFIG_E500 */ + +/* This table only contains "desktop" CPUs, it need to be filled with embedded + * ones as well... + */ +#define COMMON_USER (PPC_FEATURE_32 | PPC_FEATURE_HAS_FPU | \ + PPC_FEATURE_HAS_MMU) +#define COMMON_USER_PPC64 (COMMON_USER | PPC_FEATURE_64) +#define COMMON_USER_POWER4 (COMMON_USER_PPC64 | PPC_FEATURE_POWER4) +#define COMMON_USER_POWER5 (COMMON_USER_PPC64 | PPC_FEATURE_POWER5 |\ + PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP) +#define COMMON_USER_POWER5_PLUS (COMMON_USER_PPC64 | PPC_FEATURE_POWER5_PLUS|\ + PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP) +#define COMMON_USER_POWER6 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_05 |\ + PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \ + PPC_FEATURE_TRUE_LE | \ + PPC_FEATURE_PSERIES_PERFMON_COMPAT) +#define COMMON_USER_POWER7 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\ + PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \ + PPC_FEATURE_TRUE_LE | \ + PPC_FEATURE_PSERIES_PERFMON_COMPAT) +#define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\ + PPC_FEATURE_TRUE_LE | \ + PPC_FEATURE_HAS_ALTIVEC_COMP) +#ifdef CONFIG_PPC_BOOK3E_64 +#define COMMON_USER_BOOKE (COMMON_USER_PPC64 | PPC_FEATURE_BOOKE) +#else +#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ + PPC_FEATURE_BOOKE) +#endif + +static struct cpu_spec __initdata cpu_specs[] = { +#ifdef CONFIG_PPC_BOOK3S_64 + { /* Power3 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00400000, + .cpu_name = "POWER3 (630)", + .cpu_features = CPU_FTRS_POWER3, + .cpu_user_features = COMMON_USER_PPC64|PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 8, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power3", + .oprofile_type = PPC_OPROFILE_RS64, + .platform = "power3", + }, + { /* Power3+ */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00410000, + .cpu_name = "POWER3 (630+)", + .cpu_features = CPU_FTRS_POWER3, + .cpu_user_features = COMMON_USER_PPC64|PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 8, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power3", + .oprofile_type = PPC_OPROFILE_RS64, + .platform = "power3", + }, + { /* Northstar */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00330000, + .cpu_name = "RS64-II (northstar)", + .cpu_features = CPU_FTRS_RS64, + .cpu_user_features = COMMON_USER_PPC64, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 8, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/rs64", + .oprofile_type = PPC_OPROFILE_RS64, + .platform = "rs64", + }, + { /* Pulsar */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00340000, + .cpu_name = "RS64-III (pulsar)", + .cpu_features = CPU_FTRS_RS64, + .cpu_user_features = COMMON_USER_PPC64, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 8, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/rs64", + .oprofile_type = PPC_OPROFILE_RS64, + .platform = "rs64", + }, + { /* I-star */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00360000, + .cpu_name = "RS64-III (icestar)", + .cpu_features = CPU_FTRS_RS64, + .cpu_user_features = COMMON_USER_PPC64, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 8, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/rs64", + .oprofile_type = PPC_OPROFILE_RS64, + .platform = "rs64", + }, + { /* S-star */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00370000, + .cpu_name = "RS64-IV (sstar)", + .cpu_features = CPU_FTRS_RS64, + .cpu_user_features = COMMON_USER_PPC64, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 8, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/rs64", + .oprofile_type = PPC_OPROFILE_RS64, + .platform = "rs64", + }, + { /* Power4 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00350000, + .cpu_name = "POWER4 (gp)", + .cpu_features = CPU_FTRS_POWER4, + .cpu_user_features = COMMON_USER_POWER4, + .mmu_features = MMU_FTRS_POWER4, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 8, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power4", + .oprofile_type = PPC_OPROFILE_POWER4, + .platform = "power4", + }, + { /* Power4+ */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00380000, + .cpu_name = "POWER4+ (gq)", + .cpu_features = CPU_FTRS_POWER4, + .cpu_user_features = COMMON_USER_POWER4, + .mmu_features = MMU_FTRS_POWER4, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 8, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power4", + .oprofile_type = PPC_OPROFILE_POWER4, + .platform = "power4", + }, + { /* PPC970 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00390000, + .cpu_name = "PPC970", + .cpu_features = CPU_FTRS_PPC970, + .cpu_user_features = COMMON_USER_POWER4 | + PPC_FEATURE_HAS_ALTIVEC_COMP, + .mmu_features = MMU_FTRS_PPC970, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 8, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_ppc970, + .cpu_restore = __restore_cpu_ppc970, + .oprofile_cpu_type = "ppc64/970", + .oprofile_type = PPC_OPROFILE_POWER4, + .platform = "ppc970", + }, + { /* PPC970FX */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003c0000, + .cpu_name = "PPC970FX", + .cpu_features = CPU_FTRS_PPC970, + .cpu_user_features = COMMON_USER_POWER4 | + PPC_FEATURE_HAS_ALTIVEC_COMP, + .mmu_features = MMU_FTRS_PPC970, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 8, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_ppc970, + .cpu_restore = __restore_cpu_ppc970, + .oprofile_cpu_type = "ppc64/970", + .oprofile_type = PPC_OPROFILE_POWER4, + .platform = "ppc970", + }, + { /* PPC970MP DD1.0 - no DEEPNAP, use regular 970 init */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x00440100, + .cpu_name = "PPC970MP", + .cpu_features = CPU_FTRS_PPC970, + .cpu_user_features = COMMON_USER_POWER4 | + PPC_FEATURE_HAS_ALTIVEC_COMP, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 8, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_ppc970, + .cpu_restore = __restore_cpu_ppc970, + .oprofile_cpu_type = "ppc64/970MP", + .oprofile_type = PPC_OPROFILE_POWER4, + .platform = "ppc970", + }, + { /* PPC970MP */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00440000, + .cpu_name = "PPC970MP", + .cpu_features = CPU_FTRS_PPC970, + .cpu_user_features = COMMON_USER_POWER4 | + PPC_FEATURE_HAS_ALTIVEC_COMP, + .mmu_features = MMU_FTRS_PPC970, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 8, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_ppc970MP, + .cpu_restore = __restore_cpu_ppc970, + .oprofile_cpu_type = "ppc64/970MP", + .oprofile_type = PPC_OPROFILE_POWER4, + .platform = "ppc970", + }, + { /* PPC970GX */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00450000, + .cpu_name = "PPC970GX", + .cpu_features = CPU_FTRS_PPC970, + .cpu_user_features = COMMON_USER_POWER4 | + PPC_FEATURE_HAS_ALTIVEC_COMP, + .mmu_features = MMU_FTRS_PPC970, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 8, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_ppc970, + .oprofile_cpu_type = "ppc64/970", + .oprofile_type = PPC_OPROFILE_POWER4, + .platform = "ppc970", + }, + { /* Power5 GR */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003a0000, + .cpu_name = "POWER5 (gr)", + .cpu_features = CPU_FTRS_POWER5, + .cpu_user_features = COMMON_USER_POWER5, + .mmu_features = MMU_FTRS_POWER5, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power5", + .oprofile_type = PPC_OPROFILE_POWER4, + /* SIHV / SIPR bits are implemented on POWER4+ (GQ) + * and above but only works on POWER5 and above + */ + .oprofile_mmcra_sihv = MMCRA_SIHV, + .oprofile_mmcra_sipr = MMCRA_SIPR, + .platform = "power5", + }, + { /* Power5++ */ + .pvr_mask = 0xffffff00, + .pvr_value = 0x003b0300, + .cpu_name = "POWER5+ (gs)", + .cpu_features = CPU_FTRS_POWER5, + .cpu_user_features = COMMON_USER_POWER5_PLUS, + .mmu_features = MMU_FTRS_POWER5, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .oprofile_cpu_type = "ppc64/power5++", + .oprofile_type = PPC_OPROFILE_POWER4, + .oprofile_mmcra_sihv = MMCRA_SIHV, + .oprofile_mmcra_sipr = MMCRA_SIPR, + .platform = "power5+", + }, + { /* Power5 GS */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003b0000, + .cpu_name = "POWER5+ (gs)", + .cpu_features = CPU_FTRS_POWER5, + .cpu_user_features = COMMON_USER_POWER5_PLUS, + .mmu_features = MMU_FTRS_POWER5, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power5+", + .oprofile_type = PPC_OPROFILE_POWER4, + .oprofile_mmcra_sihv = MMCRA_SIHV, + .oprofile_mmcra_sipr = MMCRA_SIPR, + .platform = "power5+", + }, + { /* POWER6 in P5+ mode; 2.04-compliant processor */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x0f000001, + .cpu_name = "POWER5+", + .cpu_features = CPU_FTRS_POWER5, + .cpu_user_features = COMMON_USER_POWER5_PLUS, + .mmu_features = MMU_FTRS_POWER5, + .icache_bsize = 128, + .dcache_bsize = 128, + .oprofile_cpu_type = "ppc64/ibm-compat-v1", + .oprofile_type = PPC_OPROFILE_POWER4, + .platform = "power5+", + }, + { /* Power6 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003e0000, + .cpu_name = "POWER6 (raw)", + .cpu_features = CPU_FTRS_POWER6, + .cpu_user_features = COMMON_USER_POWER6 | + PPC_FEATURE_POWER6_EXT, + .mmu_features = MMU_FTRS_POWER6, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power6", + .oprofile_type = PPC_OPROFILE_POWER4, + .oprofile_mmcra_sihv = POWER6_MMCRA_SIHV, + .oprofile_mmcra_sipr = POWER6_MMCRA_SIPR, + .oprofile_mmcra_clear = POWER6_MMCRA_THRM | + POWER6_MMCRA_OTHER, + .platform = "power6x", + }, + { /* 2.05-compliant processor, i.e. Power6 "architected" mode */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x0f000002, + .cpu_name = "POWER6 (architected)", + .cpu_features = CPU_FTRS_POWER6, + .cpu_user_features = COMMON_USER_POWER6, + .mmu_features = MMU_FTRS_POWER6, + .icache_bsize = 128, + .dcache_bsize = 128, + .oprofile_cpu_type = "ppc64/ibm-compat-v1", + .oprofile_type = PPC_OPROFILE_POWER4, + .platform = "power6", + }, + { /* 2.06-compliant processor, i.e. Power7 "architected" mode */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x0f000003, + .cpu_name = "POWER7 (architected)", + .cpu_features = CPU_FTRS_POWER7, + .cpu_user_features = COMMON_USER_POWER7, + .mmu_features = MMU_FTRS_POWER7, + .icache_bsize = 128, + .dcache_bsize = 128, + .oprofile_type = PPC_OPROFILE_POWER4, + .oprofile_cpu_type = "ppc64/ibm-compat-v1", + .cpu_setup = __setup_cpu_power7, + .cpu_restore = __restore_cpu_power7, + .platform = "power7", + }, + { /* Power7 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003f0000, + .cpu_name = "POWER7 (raw)", + .cpu_features = CPU_FTRS_POWER7, + .cpu_user_features = COMMON_USER_POWER7, + .mmu_features = MMU_FTRS_POWER7, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power7", + .oprofile_type = PPC_OPROFILE_POWER4, + .cpu_setup = __setup_cpu_power7, + .cpu_restore = __restore_cpu_power7, + .platform = "power7", + }, + { /* Power7+ */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004A0000, + .cpu_name = "POWER7+ (raw)", + .cpu_features = CPU_FTRS_POWER7, + .cpu_user_features = COMMON_USER_POWER7, + .mmu_features = MMU_FTRS_POWER7, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power7", + .oprofile_type = PPC_OPROFILE_POWER4, + .cpu_setup = __setup_cpu_power7, + .cpu_restore = __restore_cpu_power7, + .platform = "power7+", + }, + { /* Cell Broadband Engine */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00700000, + .cpu_name = "Cell Broadband Engine", + .cpu_features = CPU_FTRS_CELL, + .cpu_user_features = COMMON_USER_PPC64 | + PPC_FEATURE_CELL | PPC_FEATURE_HAS_ALTIVEC_COMP | + PPC_FEATURE_SMT, + .mmu_features = MMU_FTRS_CELL, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/cell-be", + .oprofile_type = PPC_OPROFILE_CELL, + .platform = "ppc-cell-be", + }, + { /* PA Semi PA6T */ + .pvr_mask = 0x7fff0000, + .pvr_value = 0x00900000, + .cpu_name = "PA6T", + .cpu_features = CPU_FTRS_PA6T, + .cpu_user_features = COMMON_USER_PA6T, + .mmu_features = MMU_FTRS_PA6T, + .icache_bsize = 64, + .dcache_bsize = 64, + .num_pmcs = 6, + .pmc_type = PPC_PMC_PA6T, + .cpu_setup = __setup_cpu_pa6t, + .cpu_restore = __restore_cpu_pa6t, + .oprofile_cpu_type = "ppc64/pa6t", + .oprofile_type = PPC_OPROFILE_PA6T, + .platform = "pa6t", + }, + { /* default match */ + .pvr_mask = 0x00000000, + .pvr_value = 0x00000000, + .cpu_name = "POWER4 (compatible)", + .cpu_features = CPU_FTRS_COMPATIBLE, + .cpu_user_features = COMMON_USER_PPC64, + .mmu_features = MMU_FTRS_DEFAULT_HPTE_ARCH_V2, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .platform = "power4", + } +#endif /* CONFIG_PPC_BOOK3S_64 */ + +#ifdef CONFIG_PPC32 +#if CLASSIC_PPC + { /* 601 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00010000, + .cpu_name = "601", + .cpu_features = CPU_FTRS_PPC601, + .cpu_user_features = COMMON_USER | PPC_FEATURE_601_INSTR | + PPC_FEATURE_UNIFIED_CACHE | PPC_FEATURE_NO_TB, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_generic, + .platform = "ppc601", + }, + { /* 603 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00030000, + .cpu_name = "603", + .cpu_features = CPU_FTRS_603, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = 0, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, + { /* 603e */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00060000, + .cpu_name = "603e", + .cpu_features = CPU_FTRS_603, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = 0, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, + { /* 603ev */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00070000, + .cpu_name = "603ev", + .cpu_features = CPU_FTRS_603, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = 0, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, + { /* 604 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00040000, + .cpu_name = "604", + .cpu_features = CPU_FTRS_604, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 2, + .cpu_setup = __setup_cpu_604, + .machine_check = machine_check_generic, + .platform = "ppc604", + }, + { /* 604e */ + .pvr_mask = 0xfffff000, + .pvr_value = 0x00090000, + .cpu_name = "604e", + .cpu_features = CPU_FTRS_604, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .cpu_setup = __setup_cpu_604, + .machine_check = machine_check_generic, + .platform = "ppc604", + }, + { /* 604r */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00090000, + .cpu_name = "604r", + .cpu_features = CPU_FTRS_604, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .cpu_setup = __setup_cpu_604, + .machine_check = machine_check_generic, + .platform = "ppc604", + }, + { /* 604ev */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x000a0000, + .cpu_name = "604ev", + .cpu_features = CPU_FTRS_604, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .cpu_setup = __setup_cpu_604, + .machine_check = machine_check_generic, + .platform = "ppc604", + }, + { /* 740/750 (0x4202, don't support TAU ?) */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x00084202, + .cpu_name = "740/750", + .cpu_features = CPU_FTRS_740_NOTAU, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .cpu_setup = __setup_cpu_750, + .machine_check = machine_check_generic, + .platform = "ppc750", + }, + { /* 750CX (80100 and 8010x?) */ + .pvr_mask = 0xfffffff0, + .pvr_value = 0x00080100, + .cpu_name = "750CX", + .cpu_features = CPU_FTRS_750, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .cpu_setup = __setup_cpu_750cx, + .machine_check = machine_check_generic, + .platform = "ppc750", + }, + { /* 750CX (82201 and 82202) */ + .pvr_mask = 0xfffffff0, + .pvr_value = 0x00082200, + .cpu_name = "750CX", + .cpu_features = CPU_FTRS_750, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_750cx, + .machine_check = machine_check_generic, + .platform = "ppc750", + }, + { /* 750CXe (82214) */ + .pvr_mask = 0xfffffff0, + .pvr_value = 0x00082210, + .cpu_name = "750CXe", + .cpu_features = CPU_FTRS_750, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_750cx, + .machine_check = machine_check_generic, + .platform = "ppc750", + }, + { /* 750CXe "Gekko" (83214) */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x00083214, + .cpu_name = "750CXe", + .cpu_features = CPU_FTRS_750, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_750cx, + .machine_check = machine_check_generic, + .platform = "ppc750", + }, + { /* 750CL (and "Broadway") */ + .pvr_mask = 0xfffff0e0, + .pvr_value = 0x00087000, + .cpu_name = "750CL", + .cpu_features = CPU_FTRS_750CL, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_750, + .machine_check = machine_check_generic, + .platform = "ppc750", + .oprofile_cpu_type = "ppc/750", + .oprofile_type = PPC_OPROFILE_G4, + }, + { /* 745/755 */ + .pvr_mask = 0xfffff000, + .pvr_value = 0x00083000, + .cpu_name = "745/755", + .cpu_features = CPU_FTRS_750, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_750, + .machine_check = machine_check_generic, + .platform = "ppc750", + }, + { /* 750FX rev 1.x */ + .pvr_mask = 0xffffff00, + .pvr_value = 0x70000100, + .cpu_name = "750FX", + .cpu_features = CPU_FTRS_750FX1, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_750, + .machine_check = machine_check_generic, + .platform = "ppc750", + .oprofile_cpu_type = "ppc/750", + .oprofile_type = PPC_OPROFILE_G4, + }, + { /* 750FX rev 2.0 must disable HID0[DPM] */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x70000200, + .cpu_name = "750FX", + .cpu_features = CPU_FTRS_750FX2, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_750, + .machine_check = machine_check_generic, + .platform = "ppc750", + .oprofile_cpu_type = "ppc/750", + .oprofile_type = PPC_OPROFILE_G4, + }, + { /* 750FX (All revs except 2.0) */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x70000000, + .cpu_name = "750FX", + .cpu_features = CPU_FTRS_750FX, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_750fx, + .machine_check = machine_check_generic, + .platform = "ppc750", + .oprofile_cpu_type = "ppc/750", + .oprofile_type = PPC_OPROFILE_G4, + }, + { /* 750GX */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x70020000, + .cpu_name = "750GX", + .cpu_features = CPU_FTRS_750GX, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_750fx, + .machine_check = machine_check_generic, + .platform = "ppc750", + .oprofile_cpu_type = "ppc/750", + .oprofile_type = PPC_OPROFILE_G4, + }, + { /* 740/750 (L2CR bit need fixup for 740) */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00080000, + .cpu_name = "740/750", + .cpu_features = CPU_FTRS_740, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_750, + .machine_check = machine_check_generic, + .platform = "ppc750", + }, + { /* 7400 rev 1.1 ? (no TAU) */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x000c1101, + .cpu_name = "7400 (1.1)", + .cpu_features = CPU_FTRS_7400_NOTAU, + .cpu_user_features = COMMON_USER | + PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_7400, + .machine_check = machine_check_generic, + .platform = "ppc7400", + }, + { /* 7400 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x000c0000, + .cpu_name = "7400", + .cpu_features = CPU_FTRS_7400, + .cpu_user_features = COMMON_USER | + PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_7400, + .machine_check = machine_check_generic, + .platform = "ppc7400", + }, + { /* 7410 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x800c0000, + .cpu_name = "7410", + .cpu_features = CPU_FTRS_7400, + .cpu_user_features = COMMON_USER | + PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_7410, + .machine_check = machine_check_generic, + .platform = "ppc7400", + }, + { /* 7450 2.0 - no doze/nap */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x80000200, + .cpu_name = "7450", + .cpu_features = CPU_FTRS_7450_20, + .cpu_user_features = COMMON_USER | + PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .oprofile_cpu_type = "ppc/7450", + .oprofile_type = PPC_OPROFILE_G4, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* 7450 2.1 */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x80000201, + .cpu_name = "7450", + .cpu_features = CPU_FTRS_7450_21, + .cpu_user_features = COMMON_USER | + PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .oprofile_cpu_type = "ppc/7450", + .oprofile_type = PPC_OPROFILE_G4, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* 7450 2.3 and newer */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80000000, + .cpu_name = "7450", + .cpu_features = CPU_FTRS_7450_23, + .cpu_user_features = COMMON_USER | + PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .oprofile_cpu_type = "ppc/7450", + .oprofile_type = PPC_OPROFILE_G4, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* 7455 rev 1.x */ + .pvr_mask = 0xffffff00, + .pvr_value = 0x80010100, + .cpu_name = "7455", + .cpu_features = CPU_FTRS_7455_1, + .cpu_user_features = COMMON_USER | + PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .oprofile_cpu_type = "ppc/7450", + .oprofile_type = PPC_OPROFILE_G4, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* 7455 rev 2.0 */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x80010200, + .cpu_name = "7455", + .cpu_features = CPU_FTRS_7455_20, + .cpu_user_features = COMMON_USER | + PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .oprofile_cpu_type = "ppc/7450", + .oprofile_type = PPC_OPROFILE_G4, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* 7455 others */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80010000, + .cpu_name = "7455", + .cpu_features = CPU_FTRS_7455, + .cpu_user_features = COMMON_USER | + PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .oprofile_cpu_type = "ppc/7450", + .oprofile_type = PPC_OPROFILE_G4, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* 7447/7457 Rev 1.0 */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x80020100, + .cpu_name = "7447/7457", + .cpu_features = CPU_FTRS_7447_10, + .cpu_user_features = COMMON_USER | + PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .oprofile_cpu_type = "ppc/7450", + .oprofile_type = PPC_OPROFILE_G4, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* 7447/7457 Rev 1.1 */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x80020101, + .cpu_name = "7447/7457", + .cpu_features = CPU_FTRS_7447_10, + .cpu_user_features = COMMON_USER | + PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .oprofile_cpu_type = "ppc/7450", + .oprofile_type = PPC_OPROFILE_G4, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* 7447/7457 Rev 1.2 and later */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80020000, + .cpu_name = "7447/7457", + .cpu_features = CPU_FTRS_7447, + .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .oprofile_cpu_type = "ppc/7450", + .oprofile_type = PPC_OPROFILE_G4, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* 7447A */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80030000, + .cpu_name = "7447A", + .cpu_features = CPU_FTRS_7447A, + .cpu_user_features = COMMON_USER | + PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .oprofile_cpu_type = "ppc/7450", + .oprofile_type = PPC_OPROFILE_G4, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* 7448 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80040000, + .cpu_name = "7448", + .cpu_features = CPU_FTRS_7448, + .cpu_user_features = COMMON_USER | + PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .oprofile_cpu_type = "ppc/7450", + .oprofile_type = PPC_OPROFILE_G4, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* 82xx (8240, 8245, 8260 are all 603e cores) */ + .pvr_mask = 0x7fff0000, + .pvr_value = 0x00810000, + .cpu_name = "82xx", + .cpu_features = CPU_FTRS_82XX, + .cpu_user_features = COMMON_USER, + .mmu_features = 0, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, + { /* All G2_LE (603e core, plus some) have the same pvr */ + .pvr_mask = 0x7fff0000, + .pvr_value = 0x00820000, + .cpu_name = "G2_LE", + .cpu_features = CPU_FTRS_G2_LE, + .cpu_user_features = COMMON_USER, + .mmu_features = MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, + { /* e300c1 (a 603e core, plus some) on 83xx */ + .pvr_mask = 0x7fff0000, + .pvr_value = 0x00830000, + .cpu_name = "e300c1", + .cpu_features = CPU_FTRS_E300, + .cpu_user_features = COMMON_USER, + .mmu_features = MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, + { /* e300c2 (an e300c1 core, plus some, minus FPU) on 83xx */ + .pvr_mask = 0x7fff0000, + .pvr_value = 0x00840000, + .cpu_name = "e300c2", + .cpu_features = CPU_FTRS_E300C2, + .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, + .mmu_features = MMU_FTR_USE_HIGH_BATS | + MMU_FTR_NEED_DTLB_SW_LRU, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, + { /* e300c3 (e300c1, plus one IU, half cache size) on 83xx */ + .pvr_mask = 0x7fff0000, + .pvr_value = 0x00850000, + .cpu_name = "e300c3", + .cpu_features = CPU_FTRS_E300, + .cpu_user_features = COMMON_USER, + .mmu_features = MMU_FTR_USE_HIGH_BATS | + MMU_FTR_NEED_DTLB_SW_LRU, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .num_pmcs = 4, + .oprofile_cpu_type = "ppc/e300", + .oprofile_type = PPC_OPROFILE_FSL_EMB, + .platform = "ppc603", + }, + { /* e300c4 (e300c1, plus one IU) */ + .pvr_mask = 0x7fff0000, + .pvr_value = 0x00860000, + .cpu_name = "e300c4", + .cpu_features = CPU_FTRS_E300, + .cpu_user_features = COMMON_USER, + .mmu_features = MMU_FTR_USE_HIGH_BATS | + MMU_FTR_NEED_DTLB_SW_LRU, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_generic, + .num_pmcs = 4, + .oprofile_cpu_type = "ppc/e300", + .oprofile_type = PPC_OPROFILE_FSL_EMB, + .platform = "ppc603", + }, + { /* default match, we assume split I/D cache & TB (non-601)... */ + .pvr_mask = 0x00000000, + .pvr_value = 0x00000000, + .cpu_name = "(generic PPC)", + .cpu_features = CPU_FTRS_CLASSIC32, + .cpu_user_features = COMMON_USER, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, +#endif /* CLASSIC_PPC */ +#ifdef CONFIG_8xx + { /* 8xx */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00500000, + .cpu_name = "8xx", + /* CPU_FTR_MAYBE_CAN_DOZE is possible, + * if the 8xx code is there.... */ + .cpu_features = CPU_FTRS_8XX, + .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, + .mmu_features = MMU_FTR_TYPE_8xx, + .icache_bsize = 16, + .dcache_bsize = 16, + .platform = "ppc823", + }, +#endif /* CONFIG_8xx */ +#ifdef CONFIG_40x + { /* 403GC */ + .pvr_mask = 0xffffff00, + .pvr_value = 0x00200200, + .cpu_name = "403GC", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 16, + .dcache_bsize = 16, + .machine_check = machine_check_4xx, + .platform = "ppc403", + }, + { /* 403GCX */ + .pvr_mask = 0xffffff00, + .pvr_value = 0x00201400, + .cpu_name = "403GCX", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_NO_TB, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 16, + .dcache_bsize = 16, + .machine_check = machine_check_4xx, + .platform = "ppc403", + }, + { /* 403G ?? */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00200000, + .cpu_name = "403G ??", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 16, + .dcache_bsize = 16, + .machine_check = machine_check_4xx, + .platform = "ppc403", + }, + { /* 405GP */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x40110000, + .cpu_name = "405GP", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* STB 03xxx */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x40130000, + .cpu_name = "STB03xxx", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* STB 04xxx */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x41810000, + .cpu_name = "STB04xxx", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* NP405L */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x41610000, + .cpu_name = "NP405L", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* NP4GS3 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x40B10000, + .cpu_name = "NP4GS3", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* NP405H */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x41410000, + .cpu_name = "NP405H", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405GPr */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x50910000, + .cpu_name = "405GPr", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* STBx25xx */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x51510000, + .cpu_name = "STBx25xx", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405LP */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x41F10000, + .cpu_name = "405LP", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* Xilinx Virtex-II Pro */ + .pvr_mask = 0xfffff000, + .pvr_value = 0x20010000, + .cpu_name = "Virtex-II Pro", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* Xilinx Virtex-4 FX */ + .pvr_mask = 0xfffff000, + .pvr_value = 0x20011000, + .cpu_name = "Virtex-4 FX", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405EP */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x51210000, + .cpu_name = "405EP", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405EX Rev. A/B with Security */ + .pvr_mask = 0xffff000f, + .pvr_value = 0x12910007, + .cpu_name = "405EX Rev. A/B", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405EX Rev. C without Security */ + .pvr_mask = 0xffff000f, + .pvr_value = 0x1291000d, + .cpu_name = "405EX Rev. C", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405EX Rev. C with Security */ + .pvr_mask = 0xffff000f, + .pvr_value = 0x1291000f, + .cpu_name = "405EX Rev. C", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405EX Rev. D without Security */ + .pvr_mask = 0xffff000f, + .pvr_value = 0x12910003, + .cpu_name = "405EX Rev. D", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405EX Rev. D with Security */ + .pvr_mask = 0xffff000f, + .pvr_value = 0x12910005, + .cpu_name = "405EX Rev. D", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405EXr Rev. A/B without Security */ + .pvr_mask = 0xffff000f, + .pvr_value = 0x12910001, + .cpu_name = "405EXr Rev. A/B", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405EXr Rev. C without Security */ + .pvr_mask = 0xffff000f, + .pvr_value = 0x12910009, + .cpu_name = "405EXr Rev. C", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405EXr Rev. C with Security */ + .pvr_mask = 0xffff000f, + .pvr_value = 0x1291000b, + .cpu_name = "405EXr Rev. C", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405EXr Rev. D without Security */ + .pvr_mask = 0xffff000f, + .pvr_value = 0x12910000, + .cpu_name = "405EXr Rev. D", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405EXr Rev. D with Security */ + .pvr_mask = 0xffff000f, + .pvr_value = 0x12910002, + .cpu_name = "405EXr Rev. D", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { + /* 405EZ */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x41510000, + .cpu_name = "405EZ", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* APM8018X */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x7ff11432, + .cpu_name = "APM8018X", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* default match */ + .pvr_mask = 0x00000000, + .pvr_value = 0x00000000, + .cpu_name = "(generic 40x PPC)", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + } + +#endif /* CONFIG_40x */ +#ifdef CONFIG_44x + { + .pvr_mask = 0xf0000fff, + .pvr_value = 0x40000850, + .cpu_name = "440GR Rev. A", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc440", + }, + { /* Use logical PVR for 440EP (logical pvr = pvr | 0x8) */ + .pvr_mask = 0xf0000fff, + .pvr_value = 0x40000858, + .cpu_name = "440EP Rev. A", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440ep, + .machine_check = machine_check_4xx, + .platform = "ppc440", + }, + { + .pvr_mask = 0xf0000fff, + .pvr_value = 0x400008d3, + .cpu_name = "440GR Rev. B", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc440", + }, + { /* Matches both physical and logical PVR for 440EP (logical pvr = pvr | 0x8) */ + .pvr_mask = 0xf0000ff7, + .pvr_value = 0x400008d4, + .cpu_name = "440EP Rev. C", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440ep, + .machine_check = machine_check_4xx, + .platform = "ppc440", + }, + { /* Use logical PVR for 440EP (logical pvr = pvr | 0x8) */ + .pvr_mask = 0xf0000fff, + .pvr_value = 0x400008db, + .cpu_name = "440EP Rev. B", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440ep, + .machine_check = machine_check_4xx, + .platform = "ppc440", + }, + { /* 440GRX */ + .pvr_mask = 0xf0000ffb, + .pvr_value = 0x200008D0, + .cpu_name = "440GRX", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440grx, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* Use logical PVR for 440EPx (logical pvr = pvr | 0x8) */ + .pvr_mask = 0xf0000ffb, + .pvr_value = 0x200008D8, + .cpu_name = "440EPX", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440epx, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 440GP Rev. B */ + .pvr_mask = 0xf0000fff, + .pvr_value = 0x40000440, + .cpu_name = "440GP Rev. B", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc440gp", + }, + { /* 440GP Rev. C */ + .pvr_mask = 0xf0000fff, + .pvr_value = 0x40000481, + .cpu_name = "440GP Rev. C", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc440gp", + }, + { /* 440GX Rev. A */ + .pvr_mask = 0xf0000fff, + .pvr_value = 0x50000850, + .cpu_name = "440GX Rev. A", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440gx, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 440GX Rev. B */ + .pvr_mask = 0xf0000fff, + .pvr_value = 0x50000851, + .cpu_name = "440GX Rev. B", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440gx, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 440GX Rev. C */ + .pvr_mask = 0xf0000fff, + .pvr_value = 0x50000892, + .cpu_name = "440GX Rev. C", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440gx, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 440GX Rev. F */ + .pvr_mask = 0xf0000fff, + .pvr_value = 0x50000894, + .cpu_name = "440GX Rev. F", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440gx, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 440SP Rev. A */ + .pvr_mask = 0xfff00fff, + .pvr_value = 0x53200891, + .cpu_name = "440SP Rev. A", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc440", + }, + { /* 440SPe Rev. A */ + .pvr_mask = 0xfff00fff, + .pvr_value = 0x53400890, + .cpu_name = "440SPe Rev. A", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440spe, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 440SPe Rev. B */ + .pvr_mask = 0xfff00fff, + .pvr_value = 0x53400891, + .cpu_name = "440SPe Rev. B", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440spe, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 440 in Xilinx Virtex-5 FXT */ + .pvr_mask = 0xfffffff0, + .pvr_value = 0x7ff21910, + .cpu_name = "440 in Virtex-5 FXT", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440x5, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 460EX */ + .pvr_mask = 0xffff0006, + .pvr_value = 0x13020002, + .cpu_name = "460EX", + .cpu_features = CPU_FTRS_440x6, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_460ex, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 460EX Rev B */ + .pvr_mask = 0xffff0007, + .pvr_value = 0x13020004, + .cpu_name = "460EX Rev. B", + .cpu_features = CPU_FTRS_440x6, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_460ex, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 460GT */ + .pvr_mask = 0xffff0006, + .pvr_value = 0x13020000, + .cpu_name = "460GT", + .cpu_features = CPU_FTRS_440x6, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_460gt, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 460GT Rev B */ + .pvr_mask = 0xffff0007, + .pvr_value = 0x13020005, + .cpu_name = "460GT Rev. B", + .cpu_features = CPU_FTRS_440x6, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_460gt, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 460SX */ + .pvr_mask = 0xffffff00, + .pvr_value = 0x13541800, + .cpu_name = "460SX", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_460sx, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 464 in APM821xx */ + .pvr_mask = 0xfffffff0, + .pvr_value = 0x12C41C80, + .cpu_name = "APM821XX", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE | + PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_apm821xx, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 476 DD2 core */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x11a52080, + .cpu_name = "476", + .cpu_features = CPU_FTRS_47X | CPU_FTR_476_DD2, + .cpu_user_features = COMMON_USER_BOOKE | + PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_47x | + MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL, + .icache_bsize = 32, + .dcache_bsize = 128, + .machine_check = machine_check_47x, + .platform = "ppc470", + }, + { /* 476fpe */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x7ff50000, + .cpu_name = "476fpe", + .cpu_features = CPU_FTRS_47X | CPU_FTR_476_DD2, + .cpu_user_features = COMMON_USER_BOOKE | + PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_47x | + MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL, + .icache_bsize = 32, + .dcache_bsize = 128, + .machine_check = machine_check_47x, + .platform = "ppc470", + }, + { /* 476 iss */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00050000, + .cpu_name = "476", + .cpu_features = CPU_FTRS_47X, + .cpu_user_features = COMMON_USER_BOOKE | + PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_47x | + MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL, + .icache_bsize = 32, + .dcache_bsize = 128, + .machine_check = machine_check_47x, + .platform = "ppc470", + }, + { /* 476 others */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x11a50000, + .cpu_name = "476", + .cpu_features = CPU_FTRS_47X, + .cpu_user_features = COMMON_USER_BOOKE | + PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_47x | + MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL, + .icache_bsize = 32, + .dcache_bsize = 128, + .machine_check = machine_check_47x, + .platform = "ppc470", + }, + { /* default match */ + .pvr_mask = 0x00000000, + .pvr_value = 0x00000000, + .cpu_name = "(generic 44x PPC)", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc440", + } +#endif /* CONFIG_44x */ +#ifdef CONFIG_E200 + { /* e200z5 */ + .pvr_mask = 0xfff00000, + .pvr_value = 0x81000000, + .cpu_name = "e200z5", + /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */ + .cpu_features = CPU_FTRS_E200, + .cpu_user_features = COMMON_USER_BOOKE | + PPC_FEATURE_HAS_EFP_SINGLE | + PPC_FEATURE_UNIFIED_CACHE, + .mmu_features = MMU_FTR_TYPE_FSL_E, + .dcache_bsize = 32, + .machine_check = machine_check_e200, + .platform = "ppc5554", + }, + { /* e200z6 */ + .pvr_mask = 0xfff00000, + .pvr_value = 0x81100000, + .cpu_name = "e200z6", + /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */ + .cpu_features = CPU_FTRS_E200, + .cpu_user_features = COMMON_USER_BOOKE | + PPC_FEATURE_HAS_SPE_COMP | + PPC_FEATURE_HAS_EFP_SINGLE_COMP | + PPC_FEATURE_UNIFIED_CACHE, + .mmu_features = MMU_FTR_TYPE_FSL_E, + .dcache_bsize = 32, + .machine_check = machine_check_e200, + .platform = "ppc5554", + }, + { /* default match */ + .pvr_mask = 0x00000000, + .pvr_value = 0x00000000, + .cpu_name = "(generic E200 PPC)", + .cpu_features = CPU_FTRS_E200, + .cpu_user_features = COMMON_USER_BOOKE | + PPC_FEATURE_HAS_EFP_SINGLE | + PPC_FEATURE_UNIFIED_CACHE, + .mmu_features = MMU_FTR_TYPE_FSL_E, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_e200, + .machine_check = machine_check_e200, + .platform = "ppc5554", + } +#endif /* CONFIG_E200 */ +#endif /* CONFIG_PPC32 */ +#ifdef CONFIG_E500 +#ifdef CONFIG_PPC32 + { /* e500 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80200000, + .cpu_name = "e500", + .cpu_features = CPU_FTRS_E500, + .cpu_user_features = COMMON_USER_BOOKE | + PPC_FEATURE_HAS_SPE_COMP | + PPC_FEATURE_HAS_EFP_SINGLE_COMP, + .mmu_features = MMU_FTR_TYPE_FSL_E, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .oprofile_cpu_type = "ppc/e500", + .oprofile_type = PPC_OPROFILE_FSL_EMB, + .cpu_setup = __setup_cpu_e500v1, + .machine_check = machine_check_e500, + .platform = "ppc8540", + }, + { /* e500v2 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80210000, + .cpu_name = "e500v2", + .cpu_features = CPU_FTRS_E500_2, + .cpu_user_features = COMMON_USER_BOOKE | + PPC_FEATURE_HAS_SPE_COMP | + PPC_FEATURE_HAS_EFP_SINGLE_COMP | + PPC_FEATURE_HAS_EFP_DOUBLE_COMP, + .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .oprofile_cpu_type = "ppc/e500", + .oprofile_type = PPC_OPROFILE_FSL_EMB, + .cpu_setup = __setup_cpu_e500v2, + .machine_check = machine_check_e500, + .platform = "ppc8548", + }, + { /* e500mc */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80230000, + .cpu_name = "e500mc", + .cpu_features = CPU_FTRS_E500MC, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | + MMU_FTR_USE_TLBILX, + .icache_bsize = 64, + .dcache_bsize = 64, + .num_pmcs = 4, + .oprofile_cpu_type = "ppc/e500mc", + .oprofile_type = PPC_OPROFILE_FSL_EMB, + .cpu_setup = __setup_cpu_e500mc, + .machine_check = machine_check_e500mc, + .platform = "ppce500mc", + }, +#endif /* CONFIG_PPC32 */ + { /* e5500 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80240000, + .cpu_name = "e5500", + .cpu_features = CPU_FTRS_E5500, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | + MMU_FTR_USE_TLBILX, + .icache_bsize = 64, + .dcache_bsize = 64, + .num_pmcs = 4, + .oprofile_cpu_type = "ppc/e500mc", + .oprofile_type = PPC_OPROFILE_FSL_EMB, + .cpu_setup = __setup_cpu_e5500, + .cpu_restore = __restore_cpu_e5500, + .machine_check = machine_check_e500mc, + .platform = "ppce5500", + }, + { /* e6500 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80400000, + .cpu_name = "e6500", + .cpu_features = CPU_FTRS_E6500, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | + MMU_FTR_USE_TLBILX, + .icache_bsize = 64, + .dcache_bsize = 64, + .num_pmcs = 4, + .oprofile_cpu_type = "ppc/e6500", + .oprofile_type = PPC_OPROFILE_FSL_EMB, + .cpu_setup = __setup_cpu_e5500, + .cpu_restore = __restore_cpu_e5500, + .machine_check = machine_check_e500mc, + .platform = "ppce6500", + }, +#ifdef CONFIG_PPC32 + { /* default match */ + .pvr_mask = 0x00000000, + .pvr_value = 0x00000000, + .cpu_name = "(generic E500 PPC)", + .cpu_features = CPU_FTRS_E500, + .cpu_user_features = COMMON_USER_BOOKE | + PPC_FEATURE_HAS_SPE_COMP | + PPC_FEATURE_HAS_EFP_SINGLE_COMP, + .mmu_features = MMU_FTR_TYPE_FSL_E, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_e500, + .platform = "powerpc", + } +#endif /* CONFIG_PPC32 */ +#endif /* CONFIG_E500 */ + +#ifdef CONFIG_PPC_A2 + { /* Standard A2 (>= DD2) + FPU core */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00480000, + .cpu_name = "A2 (>= DD2)", + .cpu_features = CPU_FTRS_A2, + .cpu_user_features = COMMON_USER_PPC64, + .mmu_features = MMU_FTRS_A2, + .icache_bsize = 64, + .dcache_bsize = 64, + .num_pmcs = 0, + .cpu_setup = __setup_cpu_a2, + .cpu_restore = __restore_cpu_a2, + .machine_check = machine_check_generic, + .platform = "ppca2", + }, + { /* This is a default entry to get going, to be replaced by + * a real one at some stage + */ +#define CPU_FTRS_BASE_BOOK3E (CPU_FTR_USE_TB | \ + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_SMT | \ + CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) + .pvr_mask = 0x00000000, + .pvr_value = 0x00000000, + .cpu_name = "Book3E", + .cpu_features = CPU_FTRS_BASE_BOOK3E, + .cpu_user_features = COMMON_USER_PPC64, + .mmu_features = MMU_FTR_TYPE_3E | MMU_FTR_USE_TLBILX | + MMU_FTR_USE_TLBIVAX_BCAST | + MMU_FTR_LOCK_BCAST_INVAL, + .icache_bsize = 64, + .dcache_bsize = 64, + .num_pmcs = 0, + .machine_check = machine_check_generic, + .platform = "power6", + }, +#endif /* CONFIG_PPC_A2 */ +}; + +static struct cpu_spec the_cpu_spec; + +static struct cpu_spec * __init setup_cpu_spec(unsigned long offset, + struct cpu_spec *s) +{ + struct cpu_spec *t = &the_cpu_spec; + struct cpu_spec old; + + t = PTRRELOC(t); + old = *t; + + /* Copy everything, then do fixups */ + *t = *s; + + /* + * If we are overriding a previous value derived from the real + * PVR with a new value obtained using a logical PVR value, + * don't modify the performance monitor fields. + */ + if (old.num_pmcs && !s->num_pmcs) { + t->num_pmcs = old.num_pmcs; + t->pmc_type = old.pmc_type; + t->oprofile_type = old.oprofile_type; + t->oprofile_mmcra_sihv = old.oprofile_mmcra_sihv; + t->oprofile_mmcra_sipr = old.oprofile_mmcra_sipr; + t->oprofile_mmcra_clear = old.oprofile_mmcra_clear; + + /* + * If we have passed through this logic once before and + * have pulled the default case because the real PVR was + * not found inside cpu_specs[], then we are possibly + * running in compatibility mode. In that case, let the + * oprofiler know which set of compatibility counters to + * pull from by making sure the oprofile_cpu_type string + * is set to that of compatibility mode. If the + * oprofile_cpu_type already has a value, then we are + * possibly overriding a real PVR with a logical one, + * and, in that case, keep the current value for + * oprofile_cpu_type. + */ + if (old.oprofile_cpu_type != NULL) { + t->oprofile_cpu_type = old.oprofile_cpu_type; + t->oprofile_type = old.oprofile_type; + } + } + + *PTRRELOC(&cur_cpu_spec) = &the_cpu_spec; + + /* + * Set the base platform string once; assumes + * we're called with real pvr first. + */ + if (*PTRRELOC(&powerpc_base_platform) == NULL) + *PTRRELOC(&powerpc_base_platform) = t->platform; + +#if defined(CONFIG_PPC64) || defined(CONFIG_BOOKE) + /* ppc64 and booke expect identify_cpu to also call setup_cpu for + * that processor. I will consolidate that at a later time, for now, + * just use #ifdef. We also don't need to PTRRELOC the function + * pointer on ppc64 and booke as we are running at 0 in real mode + * on ppc64 and reloc_offset is always 0 on booke. + */ + if (t->cpu_setup) { + t->cpu_setup(offset, t); + } +#endif /* CONFIG_PPC64 || CONFIG_BOOKE */ + + return t; +} + +struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr) +{ + struct cpu_spec *s = cpu_specs; + int i; + + s = PTRRELOC(s); + + for (i = 0; i < ARRAY_SIZE(cpu_specs); i++,s++) { + if ((pvr & s->pvr_mask) == s->pvr_value) + return setup_cpu_spec(offset, s); + } + + BUG(); + + return NULL; +} diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c new file mode 100644 index 00000000..fdcd8f55 --- /dev/null +++ b/arch/powerpc/kernel/crash.c @@ -0,0 +1,368 @@ +/* + * Architecture specific (PPC64) functions for kexec based crash dumps. + * + * Copyright (C) 2005, IBM Corp. + * + * Created by: Haren Myneni + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + * + */ + +#include <linux/kernel.h> +#include <linux/smp.h> +#include <linux/reboot.h> +#include <linux/kexec.h> +#include <linux/export.h> +#include <linux/crash_dump.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/irq.h> +#include <linux/types.h> + +#include <asm/processor.h> +#include <asm/machdep.h> +#include <asm/kexec.h> +#include <asm/kdump.h> +#include <asm/prom.h> +#include <asm/smp.h> +#include <asm/setjmp.h> +#include <asm/debug.h> + +/* + * The primary CPU waits a while for all secondary CPUs to enter. This is to + * avoid sending an IPI if the secondary CPUs are entering + * crash_kexec_secondary on their own (eg via a system reset). + * + * The secondary timeout has to be longer than the primary. Both timeouts are + * in milliseconds. + */ +#define PRIMARY_TIMEOUT 500 +#define SECONDARY_TIMEOUT 1000 + +#define IPI_TIMEOUT 10000 +#define REAL_MODE_TIMEOUT 10000 + +/* This keeps a track of which one is the crashing cpu. */ +int crashing_cpu = -1; +static int time_to_dump; + +#define CRASH_HANDLER_MAX 3 +/* NULL terminated list of shutdown handles */ +static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1]; +static DEFINE_SPINLOCK(crash_handlers_lock); + +static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; +static int crash_shutdown_cpu = -1; + +static int handle_fault(struct pt_regs *regs) +{ + if (crash_shutdown_cpu == smp_processor_id()) + longjmp(crash_shutdown_buf, 1); + return 0; +} + +#ifdef CONFIG_SMP + +static atomic_t cpus_in_crash; +void crash_ipi_callback(struct pt_regs *regs) +{ + static cpumask_t cpus_state_saved = CPU_MASK_NONE; + + int cpu = smp_processor_id(); + + if (!cpu_online(cpu)) + return; + + hard_irq_disable(); + if (!cpumask_test_cpu(cpu, &cpus_state_saved)) { + crash_save_cpu(regs, cpu); + cpumask_set_cpu(cpu, &cpus_state_saved); + } + + atomic_inc(&cpus_in_crash); + smp_mb__after_atomic_inc(); + + /* + * Starting the kdump boot. + * This barrier is needed to make sure that all CPUs are stopped. + */ + while (!time_to_dump) + cpu_relax(); + + if (ppc_md.kexec_cpu_down) + ppc_md.kexec_cpu_down(1, 1); + +#ifdef CONFIG_PPC64 + kexec_smp_wait(); +#else + for (;;); /* FIXME */ +#endif + + /* NOTREACHED */ +} + +static void crash_kexec_prepare_cpus(int cpu) +{ + unsigned int msecs; + unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ + int tries = 0; + int (*old_handler)(struct pt_regs *regs); + + printk(KERN_EMERG "Sending IPI to other CPUs\n"); + + crash_send_ipi(crash_ipi_callback); + smp_wmb(); + +again: + /* + * FIXME: Until we will have the way to stop other CPUs reliably, + * the crash CPU will send an IPI and wait for other CPUs to + * respond. + */ + msecs = IPI_TIMEOUT; + while ((atomic_read(&cpus_in_crash) < ncpus) && (--msecs > 0)) + mdelay(1); + + /* Would it be better to replace the trap vector here? */ + + if (atomic_read(&cpus_in_crash) >= ncpus) { + printk(KERN_EMERG "IPI complete\n"); + return; + } + + printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n", + ncpus - atomic_read(&cpus_in_crash)); + + /* + * If we have a panic timeout set then we can't wait indefinitely + * for someone to activate system reset. We also give up on the + * second time through if system reset fail to work. + */ + if ((panic_timeout > 0) || (tries > 0)) + return; + + /* + * A system reset will cause all CPUs to take an 0x100 exception. + * The primary CPU returns here via setjmp, and the secondary + * CPUs reexecute the crash_kexec_secondary path. + */ + old_handler = __debugger; + __debugger = handle_fault; + crash_shutdown_cpu = smp_processor_id(); + + if (setjmp(crash_shutdown_buf) == 0) { + printk(KERN_EMERG "Activate system reset (dumprestart) " + "to stop other cpu(s)\n"); + + /* + * A system reset will force all CPUs to execute the + * crash code again. We need to reset cpus_in_crash so we + * wait for everyone to do this. + */ + atomic_set(&cpus_in_crash, 0); + smp_mb(); + + while (atomic_read(&cpus_in_crash) < ncpus) + cpu_relax(); + } + + crash_shutdown_cpu = -1; + __debugger = old_handler; + + tries++; + goto again; +} + +/* + * This function will be called by secondary cpus. + */ +void crash_kexec_secondary(struct pt_regs *regs) +{ + unsigned long flags; + int msecs = SECONDARY_TIMEOUT; + + local_irq_save(flags); + + /* Wait for the primary crash CPU to signal its progress */ + while (crashing_cpu < 0) { + if (--msecs < 0) { + /* No response, kdump image may not have been loaded */ + local_irq_restore(flags); + return; + } + + mdelay(1); + } + + crash_ipi_callback(regs); +} + +#else /* ! CONFIG_SMP */ + +static void crash_kexec_prepare_cpus(int cpu) +{ + /* + * move the secondaries to us so that we can copy + * the new kernel 0-0x100 safely + * + * do this if kexec in setup.c ? + */ +#ifdef CONFIG_PPC64 + smp_release_cpus(); +#else + /* FIXME */ +#endif +} + +void crash_kexec_secondary(struct pt_regs *regs) +{ +} +#endif /* CONFIG_SMP */ + +/* wait for all the CPUs to hit real mode but timeout if they don't come in */ +#if defined(CONFIG_SMP) && defined(CONFIG_PPC_STD_MMU_64) +static void crash_kexec_wait_realmode(int cpu) +{ + unsigned int msecs; + int i; + + msecs = REAL_MODE_TIMEOUT; + for (i=0; i < nr_cpu_ids && msecs > 0; i++) { + if (i == cpu) + continue; + + while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) { + barrier(); + if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0)) + break; + msecs--; + mdelay(1); + } + } + mb(); +} +#else +static inline void crash_kexec_wait_realmode(int cpu) {} +#endif /* CONFIG_SMP && CONFIG_PPC_STD_MMU_64 */ + +/* + * Register a function to be called on shutdown. Only use this if you + * can't reset your device in the second kernel. + */ +int crash_shutdown_register(crash_shutdown_t handler) +{ + unsigned int i, rc; + + spin_lock(&crash_handlers_lock); + for (i = 0 ; i < CRASH_HANDLER_MAX; i++) + if (!crash_shutdown_handles[i]) { + /* Insert handle at first empty entry */ + crash_shutdown_handles[i] = handler; + rc = 0; + break; + } + + if (i == CRASH_HANDLER_MAX) { + printk(KERN_ERR "Crash shutdown handles full, " + "not registered.\n"); + rc = 1; + } + + spin_unlock(&crash_handlers_lock); + return rc; +} +EXPORT_SYMBOL(crash_shutdown_register); + +int crash_shutdown_unregister(crash_shutdown_t handler) +{ + unsigned int i, rc; + + spin_lock(&crash_handlers_lock); + for (i = 0 ; i < CRASH_HANDLER_MAX; i++) + if (crash_shutdown_handles[i] == handler) + break; + + if (i == CRASH_HANDLER_MAX) { + printk(KERN_ERR "Crash shutdown handle not found\n"); + rc = 1; + } else { + /* Shift handles down */ + for (; crash_shutdown_handles[i]; i++) + crash_shutdown_handles[i] = + crash_shutdown_handles[i+1]; + rc = 0; + } + + spin_unlock(&crash_handlers_lock); + return rc; +} +EXPORT_SYMBOL(crash_shutdown_unregister); + +void default_machine_crash_shutdown(struct pt_regs *regs) +{ + unsigned int i; + int (*old_handler)(struct pt_regs *regs); + + /* + * This function is only called after the system + * has panicked or is otherwise in a critical state. + * The minimum amount of code to allow a kexec'd kernel + * to run successfully needs to happen here. + * + * In practice this means stopping other cpus in + * an SMP system. + * The kernel is broken so disable interrupts. + */ + hard_irq_disable(); + + /* + * Make a note of crashing cpu. Will be used in machine_kexec + * such that another IPI will not be sent. + */ + crashing_cpu = smp_processor_id(); + + /* + * If we came in via system reset, wait a while for the secondary + * CPUs to enter. + */ + if (TRAP(regs) == 0x100) + mdelay(PRIMARY_TIMEOUT); + + crash_kexec_prepare_cpus(crashing_cpu); + + crash_save_cpu(regs, crashing_cpu); + + time_to_dump = 1; + + crash_kexec_wait_realmode(crashing_cpu); + + machine_kexec_mask_interrupts(); + + /* + * Call registered shutdown routines safely. Swap out + * __debugger_fault_handler, and replace on exit. + */ + old_handler = __debugger_fault_handler; + __debugger_fault_handler = handle_fault; + crash_shutdown_cpu = smp_processor_id(); + for (i = 0; crash_shutdown_handles[i]; i++) { + if (setjmp(crash_shutdown_buf) == 0) { + /* + * Insert syncs and delay to ensure + * instructions in the dangerous region don't + * leak away from this protected region. + */ + asm volatile("sync; isync"); + /* dangerous region */ + crash_shutdown_handles[i](); + asm volatile("sync; isync"); + } + } + crash_shutdown_cpu = -1; + __debugger_fault_handler = old_handler; + + if (ppc_md.kexec_cpu_down) + ppc_md.kexec_cpu_down(1, 0); +} diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c new file mode 100644 index 00000000..b3ba5163 --- /dev/null +++ b/arch/powerpc/kernel/crash_dump.c @@ -0,0 +1,159 @@ +/* + * Routines for doing kexec-based kdump. + * + * Copyright (C) 2005, IBM Corp. + * + * Created by: Michael Ellerman + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#undef DEBUG + +#include <linux/crash_dump.h> +#include <linux/bootmem.h> +#include <linux/memblock.h> +#include <asm/code-patching.h> +#include <asm/kdump.h> +#include <asm/prom.h> +#include <asm/firmware.h> +#include <asm/uaccess.h> +#include <asm/rtas.h> + +#ifdef DEBUG +#include <asm/udbg.h> +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +#ifndef CONFIG_NONSTATIC_KERNEL +void __init reserve_kdump_trampoline(void) +{ + memblock_reserve(0, KDUMP_RESERVE_LIMIT); +} + +static void __init create_trampoline(unsigned long addr) +{ + unsigned int *p = (unsigned int *)addr; + + /* The maximum range of a single instruction branch, is the current + * instruction's address + (32 MB - 4) bytes. For the trampoline we + * need to branch to current address + 32 MB. So we insert a nop at + * the trampoline address, then the next instruction (+ 4 bytes) + * does a branch to (32 MB - 4). The net effect is that when we + * branch to "addr" we jump to ("addr" + 32 MB). Although it requires + * two instructions it doesn't require any registers. + */ + patch_instruction(p, PPC_INST_NOP); + patch_branch(++p, addr + PHYSICAL_START, 0); +} + +void __init setup_kdump_trampoline(void) +{ + unsigned long i; + + DBG(" -> setup_kdump_trampoline()\n"); + + for (i = KDUMP_TRAMPOLINE_START; i < KDUMP_TRAMPOLINE_END; i += 8) { + create_trampoline(i); + } + +#ifdef CONFIG_PPC_PSERIES + create_trampoline(__pa(system_reset_fwnmi) - PHYSICAL_START); + create_trampoline(__pa(machine_check_fwnmi) - PHYSICAL_START); +#endif /* CONFIG_PPC_PSERIES */ + + DBG(" <- setup_kdump_trampoline()\n"); +} +#endif /* CONFIG_NONSTATIC_KERNEL */ + +static int __init parse_savemaxmem(char *p) +{ + if (p) + saved_max_pfn = (memparse(p, &p) >> PAGE_SHIFT) - 1; + + return 1; +} +__setup("savemaxmem=", parse_savemaxmem); + + +static size_t copy_oldmem_vaddr(void *vaddr, char *buf, size_t csize, + unsigned long offset, int userbuf) +{ + if (userbuf) { + if (copy_to_user((char __user *)buf, (vaddr + offset), csize)) + return -EFAULT; + } else + memcpy(buf, (vaddr + offset), csize); + + return csize; +} + +/** + * copy_oldmem_page - copy one page from "oldmem" + * @pfn: page frame number to be copied + * @buf: target memory address for the copy; this can be in kernel address + * space or user address space (see @userbuf) + * @csize: number of bytes to copy + * @offset: offset in bytes into the page (based on pfn) to begin the copy + * @userbuf: if set, @buf is in user address space, use copy_to_user(), + * otherwise @buf is in kernel address space, use memcpy(). + * + * Copy a page from "oldmem". For this page, there is no pte mapped + * in the current kernel. We stitch up a pte, similar to kmap_atomic. + */ +ssize_t copy_oldmem_page(unsigned long pfn, char *buf, + size_t csize, unsigned long offset, int userbuf) +{ + void *vaddr; + + if (!csize) + return 0; + + csize = min_t(size_t, csize, PAGE_SIZE); + + if ((min_low_pfn < pfn) && (pfn < max_pfn)) { + vaddr = __va(pfn << PAGE_SHIFT); + csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf); + } else { + vaddr = __ioremap(pfn << PAGE_SHIFT, PAGE_SIZE, 0); + csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf); + iounmap(vaddr); + } + + return csize; +} + +#ifdef CONFIG_PPC_RTAS +/* + * The crashkernel region will almost always overlap the RTAS region, so + * we have to be careful when shrinking the crashkernel region. + */ +void crash_free_reserved_phys_range(unsigned long begin, unsigned long end) +{ + unsigned long addr; + const u32 *basep, *sizep; + unsigned int rtas_start = 0, rtas_end = 0; + + basep = of_get_property(rtas.dev, "linux,rtas-base", NULL); + sizep = of_get_property(rtas.dev, "rtas-size", NULL); + + if (basep && sizep) { + rtas_start = *basep; + rtas_end = *basep + *sizep; + } + + for (addr = begin; addr < end; addr += PAGE_SIZE) { + /* Does this page overlap with the RTAS region? */ + if (addr <= rtas_end && ((addr + PAGE_SIZE) > rtas_start)) + continue; + + ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT)); + init_page_count(pfn_to_page(addr >> PAGE_SHIFT)); + free_page((unsigned long)__va(addr)); + totalram_pages++; + } +} +#endif diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c new file mode 100644 index 00000000..5b25c806 --- /dev/null +++ b/arch/powerpc/kernel/dbell.c @@ -0,0 +1,53 @@ +/* + * Author: Kumar Gala <galak@kernel.crashing.org> + * + * Copyright 2009 Freescale Semiconductor Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include <linux/stddef.h> +#include <linux/kernel.h> +#include <linux/smp.h> +#include <linux/threads.h> +#include <linux/hardirq.h> + +#include <asm/dbell.h> +#include <asm/irq_regs.h> + +#ifdef CONFIG_SMP +void doorbell_setup_this_cpu(void) +{ + unsigned long tag = mfspr(SPRN_PIR) & 0x3fff; + + smp_muxed_ipi_set_data(smp_processor_id(), tag); +} + +void doorbell_cause_ipi(int cpu, unsigned long data) +{ + ppc_msgsnd(PPC_DBELL, 0, data); +} + +void doorbell_exception(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + irq_enter(); + + may_hard_irq_enable(); + + smp_ipi_demux(); + + irq_exit(); + set_irq_regs(old_regs); +} +#else /* CONFIG_SMP */ +void doorbell_exception(struct pt_regs *regs) +{ + printk(KERN_WARNING "Received doorbell on non-smp system\n"); +} +#endif /* CONFIG_SMP */ + diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c new file mode 100644 index 00000000..bcfdcd22 --- /dev/null +++ b/arch/powerpc/kernel/dma-iommu.c @@ -0,0 +1,119 @@ +/* + * Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corporation + * + * Provide default implementations of the DMA mapping callbacks for + * busses using the iommu infrastructure + */ + +#include <linux/export.h> +#include <asm/iommu.h> + +/* + * Generic iommu implementation + */ + +/* Allocates a contiguous real buffer and creates mappings over it. + * Returns the virtual address of the buffer and sets dma_handle + * to the dma address (mapping) of the first page. + */ +static void *dma_iommu_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag, + struct dma_attrs *attrs) +{ + return iommu_alloc_coherent(dev, get_iommu_table_base(dev), size, + dma_handle, dev->coherent_dma_mask, flag, + dev_to_node(dev)); +} + +static void dma_iommu_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle, + struct dma_attrs *attrs) +{ + iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle); +} + +/* Creates TCEs for a user provided buffer. The user buffer must be + * contiguous real kernel storage (not vmalloc). The address passed here + * comprises a page address and offset into that page. The dma_addr_t + * returned will point to the same byte within the page as was passed in. + */ +static dma_addr_t dma_iommu_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + return iommu_map_page(dev, get_iommu_table_base(dev), page, offset, + size, device_to_mask(dev), direction, attrs); +} + + +static void dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size, direction, + attrs); +} + + +static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, + int nelems, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + return iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems, + device_to_mask(dev), direction, attrs); +} + +static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist, + int nelems, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems, direction, + attrs); +} + +/* We support DMA to/from any memory page via the iommu */ +static int dma_iommu_dma_supported(struct device *dev, u64 mask) +{ + struct iommu_table *tbl = get_iommu_table_base(dev); + + if (!tbl) { + dev_info(dev, "Warning: IOMMU dma not supported: mask 0x%08llx" + ", table unavailable\n", mask); + return 0; + } + + if ((tbl->it_offset + tbl->it_size) > (mask >> IOMMU_PAGE_SHIFT)) { + dev_info(dev, "Warning: IOMMU window too big for device mask\n"); + dev_info(dev, "mask: 0x%08llx, table end: 0x%08lx\n", + mask, (tbl->it_offset + tbl->it_size) << + IOMMU_PAGE_SHIFT); + return 0; + } else + return 1; +} + +static u64 dma_iommu_get_required_mask(struct device *dev) +{ + struct iommu_table *tbl = get_iommu_table_base(dev); + u64 mask; + if (!tbl) + return 0; + + mask = 1ULL < (fls_long(tbl->it_offset + tbl->it_size) - 1); + mask += mask - 1; + + return mask; +} + +struct dma_map_ops dma_iommu_ops = { + .alloc = dma_iommu_alloc_coherent, + .free = dma_iommu_free_coherent, + .map_sg = dma_iommu_map_sg, + .unmap_sg = dma_iommu_unmap_sg, + .dma_supported = dma_iommu_dma_supported, + .map_page = dma_iommu_map_page, + .unmap_page = dma_iommu_unmap_page, + .get_required_mask = dma_iommu_get_required_mask, +}; +EXPORT_SYMBOL(dma_iommu_ops); diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c new file mode 100644 index 00000000..4ab88daf --- /dev/null +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -0,0 +1,106 @@ +/* + * Contains routines needed to support swiotlb for ppc. + * + * Copyright (C) 2009-2010 Freescale Semiconductor, Inc. + * Author: Becky Bruce + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#include <linux/dma-mapping.h> +#include <linux/pfn.h> +#include <linux/of_platform.h> +#include <linux/platform_device.h> +#include <linux/pci.h> + +#include <asm/machdep.h> +#include <asm/swiotlb.h> +#include <asm/dma.h> +#include <asm/abs_addr.h> + +unsigned int ppc_swiotlb_enable; + +static u64 swiotlb_powerpc_get_required(struct device *dev) +{ + u64 end, mask, max_direct_dma_addr = dev->archdata.max_direct_dma_addr; + + end = memblock_end_of_DRAM(); + if (max_direct_dma_addr && end > max_direct_dma_addr) + end = max_direct_dma_addr; + end += get_dma_offset(dev); + + mask = 1ULL << (fls64(end) - 1); + mask += mask - 1; + + return mask; +} + +/* + * At the moment, all platforms that use this code only require + * swiotlb to be used if we're operating on HIGHMEM. Since + * we don't ever call anything other than map_sg, unmap_sg, + * map_page, and unmap_page on highmem, use normal dma_ops + * for everything else. + */ +struct dma_map_ops swiotlb_dma_ops = { + .alloc = dma_direct_alloc_coherent, + .free = dma_direct_free_coherent, + .map_sg = swiotlb_map_sg_attrs, + .unmap_sg = swiotlb_unmap_sg_attrs, + .dma_supported = swiotlb_dma_supported, + .map_page = swiotlb_map_page, + .unmap_page = swiotlb_unmap_page, + .sync_single_for_cpu = swiotlb_sync_single_for_cpu, + .sync_single_for_device = swiotlb_sync_single_for_device, + .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, + .sync_sg_for_device = swiotlb_sync_sg_for_device, + .mapping_error = swiotlb_dma_mapping_error, + .get_required_mask = swiotlb_powerpc_get_required, +}; + +void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev) +{ + struct pci_controller *hose; + struct dev_archdata *sd; + + hose = pci_bus_to_host(pdev->bus); + sd = &pdev->dev.archdata; + sd->max_direct_dma_addr = + hose->dma_window_base_cur + hose->dma_window_size; +} + +static int ppc_swiotlb_bus_notify(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + struct dev_archdata *sd; + + /* We are only intereted in device addition */ + if (action != BUS_NOTIFY_ADD_DEVICE) + return 0; + + sd = &dev->archdata; + sd->max_direct_dma_addr = 0; + + /* May need to bounce if the device can't address all of DRAM */ + if ((dma_get_mask(dev) + 1) < memblock_end_of_DRAM()) + set_dma_ops(dev, &swiotlb_dma_ops); + + return NOTIFY_DONE; +} + +static struct notifier_block ppc_swiotlb_plat_bus_notifier = { + .notifier_call = ppc_swiotlb_bus_notify, + .priority = 0, +}; + +int __init swiotlb_setup_bus_notifier(void) +{ + bus_register_notifier(&platform_bus_type, + &ppc_swiotlb_plat_bus_notifier); + return 0; +} diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c new file mode 100644 index 00000000..b1ec983d --- /dev/null +++ b/arch/powerpc/kernel/dma.c @@ -0,0 +1,230 @@ +/* + * Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corporation + * + * Provide default implementations of the DMA mapping callbacks for + * directly mapped busses. + */ + +#include <linux/device.h> +#include <linux/dma-mapping.h> +#include <linux/dma-debug.h> +#include <linux/gfp.h> +#include <linux/memblock.h> +#include <linux/export.h> +#include <asm/bug.h> +#include <asm/abs_addr.h> +#include <asm/machdep.h> + +/* + * Generic direct DMA implementation + * + * This implementation supports a per-device offset that can be applied if + * the address at which memory is visible to devices is not 0. Platform code + * can set archdata.dma_data to an unsigned long holding the offset. By + * default the offset is PCI_DRAM_OFFSET. + */ + + +void *dma_direct_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag, + struct dma_attrs *attrs) +{ + void *ret; +#ifdef CONFIG_NOT_COHERENT_CACHE + ret = __dma_alloc_coherent(dev, size, dma_handle, flag); + if (ret == NULL) + return NULL; + *dma_handle += get_dma_offset(dev); + return ret; +#else + struct page *page; + int node = dev_to_node(dev); + + /* ignore region specifiers */ + flag &= ~(__GFP_HIGHMEM); + + page = alloc_pages_node(node, flag, get_order(size)); + if (page == NULL) + return NULL; + ret = page_address(page); + memset(ret, 0, size); + *dma_handle = virt_to_abs(ret) + get_dma_offset(dev); + + return ret; +#endif +} + +void dma_direct_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle, + struct dma_attrs *attrs) +{ +#ifdef CONFIG_NOT_COHERENT_CACHE + __dma_free_coherent(size, vaddr); +#else + free_pages((unsigned long)vaddr, get_order(size)); +#endif +} + +static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) { + sg->dma_address = sg_phys(sg) + get_dma_offset(dev); + sg->dma_length = sg->length; + __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction); + } + + return nents; +} + +static void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ +} + +static int dma_direct_dma_supported(struct device *dev, u64 mask) +{ +#ifdef CONFIG_PPC64 + /* Could be improved so platforms can set the limit in case + * they have limited DMA windows + */ + return mask >= get_dma_offset(dev) + (memblock_end_of_DRAM() - 1); +#else + return 1; +#endif +} + +static u64 dma_direct_get_required_mask(struct device *dev) +{ + u64 end, mask; + + end = memblock_end_of_DRAM() + get_dma_offset(dev); + + mask = 1ULL << (fls64(end) - 1); + mask += mask - 1; + + return mask; +} + +static inline dma_addr_t dma_direct_map_page(struct device *dev, + struct page *page, + unsigned long offset, + size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + BUG_ON(dir == DMA_NONE); + __dma_sync_page(page, offset, size, dir); + return page_to_phys(page) + offset + get_dma_offset(dev); +} + +static inline void dma_direct_unmap_page(struct device *dev, + dma_addr_t dma_address, + size_t size, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ +} + +#ifdef CONFIG_NOT_COHERENT_CACHE +static inline void dma_direct_sync_sg(struct device *dev, + struct scatterlist *sgl, int nents, + enum dma_data_direction direction) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) + __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction); +} + +static inline void dma_direct_sync_single(struct device *dev, + dma_addr_t dma_handle, size_t size, + enum dma_data_direction direction) +{ + __dma_sync(bus_to_virt(dma_handle), size, direction); +} +#endif + +struct dma_map_ops dma_direct_ops = { + .alloc = dma_direct_alloc_coherent, + .free = dma_direct_free_coherent, + .map_sg = dma_direct_map_sg, + .unmap_sg = dma_direct_unmap_sg, + .dma_supported = dma_direct_dma_supported, + .map_page = dma_direct_map_page, + .unmap_page = dma_direct_unmap_page, + .get_required_mask = dma_direct_get_required_mask, +#ifdef CONFIG_NOT_COHERENT_CACHE + .sync_single_for_cpu = dma_direct_sync_single, + .sync_single_for_device = dma_direct_sync_single, + .sync_sg_for_cpu = dma_direct_sync_sg, + .sync_sg_for_device = dma_direct_sync_sg, +#endif +}; +EXPORT_SYMBOL(dma_direct_ops); + +#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) + +int dma_set_mask(struct device *dev, u64 dma_mask) +{ + struct dma_map_ops *dma_ops = get_dma_ops(dev); + + if (ppc_md.dma_set_mask) + return ppc_md.dma_set_mask(dev, dma_mask); + if ((dma_ops != NULL) && (dma_ops->set_dma_mask != NULL)) + return dma_ops->set_dma_mask(dev, dma_mask); + if (!dev->dma_mask || !dma_supported(dev, dma_mask)) + return -EIO; + *dev->dma_mask = dma_mask; + return 0; +} +EXPORT_SYMBOL(dma_set_mask); + +u64 dma_get_required_mask(struct device *dev) +{ + struct dma_map_ops *dma_ops = get_dma_ops(dev); + + if (ppc_md.dma_get_required_mask) + return ppc_md.dma_get_required_mask(dev); + + if (unlikely(dma_ops == NULL)) + return 0; + + if (dma_ops->get_required_mask) + return dma_ops->get_required_mask(dev); + + return DMA_BIT_MASK(8 * sizeof(dma_addr_t)); +} +EXPORT_SYMBOL_GPL(dma_get_required_mask); + +static int __init dma_init(void) +{ + dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); + + return 0; +} +fs_initcall(dma_init); + +int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t handle, size_t size) +{ + unsigned long pfn; + +#ifdef CONFIG_NOT_COHERENT_CACHE + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + pfn = __dma_get_coherent_pfn((unsigned long)cpu_addr); +#else + pfn = page_to_pfn(virt_to_page(cpu_addr)); +#endif + return remap_pfn_range(vma, vma->vm_start, + pfn + vma->vm_pgoff, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); +} +EXPORT_SYMBOL_GPL(dma_mmap_coherent); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S new file mode 100644 index 00000000..ba3aeb4b --- /dev/null +++ b/arch/powerpc/kernel/entry_32.S @@ -0,0 +1,1412 @@ +/* + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * Rewritten by Cort Dougan (cort@fsmlabs.com) for PReP + * Copyright (C) 1996 Cort Dougan <cort@fsmlabs.com> + * Adapted for Power Macintosh by Paul Mackerras. + * Low-level exception handlers and MMU support + * rewritten by Paul Mackerras. + * Copyright (C) 1996 Paul Mackerras. + * MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net). + * + * This file contains the system call entry code, context switch + * code, and exception/interrupt return code for PowerPC. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/errno.h> +#include <linux/sys.h> +#include <linux/threads.h> +#include <asm/reg.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> +#include <asm/ftrace.h> +#include <asm/ptrace.h> + +#undef SHOW_SYSCALLS +#undef SHOW_SYSCALLS_TASK + +/* + * MSR_KERNEL is > 0x10000 on 4xx/Book-E since it include MSR_CE. + */ +#if MSR_KERNEL >= 0x10000 +#define LOAD_MSR_KERNEL(r, x) lis r,(x)@h; ori r,r,(x)@l +#else +#define LOAD_MSR_KERNEL(r, x) li r,(x) +#endif + +#ifdef CONFIG_BOOKE + .globl mcheck_transfer_to_handler +mcheck_transfer_to_handler: + mfspr r0,SPRN_DSRR0 + stw r0,_DSRR0(r11) + mfspr r0,SPRN_DSRR1 + stw r0,_DSRR1(r11) + /* fall through */ + + .globl debug_transfer_to_handler +debug_transfer_to_handler: + mfspr r0,SPRN_CSRR0 + stw r0,_CSRR0(r11) + mfspr r0,SPRN_CSRR1 + stw r0,_CSRR1(r11) + /* fall through */ + + .globl crit_transfer_to_handler +crit_transfer_to_handler: +#ifdef CONFIG_PPC_BOOK3E_MMU + mfspr r0,SPRN_MAS0 + stw r0,MAS0(r11) + mfspr r0,SPRN_MAS1 + stw r0,MAS1(r11) + mfspr r0,SPRN_MAS2 + stw r0,MAS2(r11) + mfspr r0,SPRN_MAS3 + stw r0,MAS3(r11) + mfspr r0,SPRN_MAS6 + stw r0,MAS6(r11) +#ifdef CONFIG_PHYS_64BIT + mfspr r0,SPRN_MAS7 + stw r0,MAS7(r11) +#endif /* CONFIG_PHYS_64BIT */ +#endif /* CONFIG_PPC_BOOK3E_MMU */ +#ifdef CONFIG_44x + mfspr r0,SPRN_MMUCR + stw r0,MMUCR(r11) +#endif + mfspr r0,SPRN_SRR0 + stw r0,_SRR0(r11) + mfspr r0,SPRN_SRR1 + stw r0,_SRR1(r11) + + mfspr r8,SPRN_SPRG_THREAD + lwz r0,KSP_LIMIT(r8) + stw r0,SAVED_KSP_LIMIT(r11) + rlwimi r0,r1,0,0,(31-THREAD_SHIFT) + stw r0,KSP_LIMIT(r8) + /* fall through */ +#endif + +#ifdef CONFIG_40x + .globl crit_transfer_to_handler +crit_transfer_to_handler: + lwz r0,crit_r10@l(0) + stw r0,GPR10(r11) + lwz r0,crit_r11@l(0) + stw r0,GPR11(r11) + mfspr r0,SPRN_SRR0 + stw r0,crit_srr0@l(0) + mfspr r0,SPRN_SRR1 + stw r0,crit_srr1@l(0) + + mfspr r8,SPRN_SPRG_THREAD + lwz r0,KSP_LIMIT(r8) + stw r0,saved_ksp_limit@l(0) + rlwimi r0,r1,0,0,(31-THREAD_SHIFT) + stw r0,KSP_LIMIT(r8) + /* fall through */ +#endif + +/* + * This code finishes saving the registers to the exception frame + * and jumps to the appropriate handler for the exception, turning + * on address translation. + * Note that we rely on the caller having set cr0.eq iff the exception + * occurred in kernel mode (i.e. MSR:PR = 0). + */ + .globl transfer_to_handler_full +transfer_to_handler_full: + SAVE_NVGPRS(r11) + /* fall through */ + + .globl transfer_to_handler +transfer_to_handler: + stw r2,GPR2(r11) + stw r12,_NIP(r11) + stw r9,_MSR(r11) + andi. r2,r9,MSR_PR + mfctr r12 + mfspr r2,SPRN_XER + stw r12,_CTR(r11) + stw r2,_XER(r11) + mfspr r12,SPRN_SPRG_THREAD + addi r2,r12,-THREAD + tovirt(r2,r2) /* set r2 to current */ + beq 2f /* if from user, fix up THREAD.regs */ + addi r11,r1,STACK_FRAME_OVERHEAD + stw r11,PT_REGS(r12) +#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) + /* Check to see if the dbcr0 register is set up to debug. Use the + internal debug mode bit to do this. */ + lwz r12,THREAD_DBCR0(r12) + andis. r12,r12,DBCR0_IDM@h + beq+ 3f + /* From user and task is ptraced - load up global dbcr0 */ + li r12,-1 /* clear all pending debug events */ + mtspr SPRN_DBSR,r12 + lis r11,global_dbcr0@ha + tophys(r11,r11) + addi r11,r11,global_dbcr0@l +#ifdef CONFIG_SMP + rlwinm r9,r1,0,0,(31-THREAD_SHIFT) + lwz r9,TI_CPU(r9) + slwi r9,r9,3 + add r11,r11,r9 +#endif + lwz r12,0(r11) + mtspr SPRN_DBCR0,r12 + lwz r12,4(r11) + addi r12,r12,-1 + stw r12,4(r11) +#endif + b 3f + +2: /* if from kernel, check interrupted DOZE/NAP mode and + * check for stack overflow + */ + lwz r9,KSP_LIMIT(r12) + cmplw r1,r9 /* if r1 <= ksp_limit */ + ble- stack_ovf /* then the kernel stack overflowed */ +5: +#if defined(CONFIG_6xx) || defined(CONFIG_E500) + rlwinm r9,r1,0,0,31-THREAD_SHIFT + tophys(r9,r9) /* check local flags */ + lwz r12,TI_LOCAL_FLAGS(r9) + mtcrf 0x01,r12 + bt- 31-TLF_NAPPING,4f + bt- 31-TLF_SLEEPING,7f +#endif /* CONFIG_6xx || CONFIG_E500 */ + .globl transfer_to_handler_cont +transfer_to_handler_cont: +3: + mflr r9 + lwz r11,0(r9) /* virtual address of handler */ + lwz r9,4(r9) /* where to go when done */ +#ifdef CONFIG_TRACE_IRQFLAGS + lis r12,reenable_mmu@h + ori r12,r12,reenable_mmu@l + mtspr SPRN_SRR0,r12 + mtspr SPRN_SRR1,r10 + SYNC + RFI +reenable_mmu: /* re-enable mmu so we can */ + mfmsr r10 + lwz r12,_MSR(r1) + xor r10,r10,r12 + andi. r10,r10,MSR_EE /* Did EE change? */ + beq 1f + + /* + * The trace_hardirqs_off will use CALLER_ADDR0 and CALLER_ADDR1. + * If from user mode there is only one stack frame on the stack, and + * accessing CALLER_ADDR1 will cause oops. So we need create a dummy + * stack frame to make trace_hardirqs_off happy. + * + * This is handy because we also need to save a bunch of GPRs, + * r3 can be different from GPR3(r1) at this point, r9 and r11 + * contains the old MSR and handler address respectively, + * r4 & r5 can contain page fault arguments that need to be passed + * along as well. r12, CCR, CTR, XER etc... are left clobbered as + * they aren't useful past this point (aren't syscall arguments), + * the rest is restored from the exception frame. + */ + stwu r1,-32(r1) + stw r9,8(r1) + stw r11,12(r1) + stw r3,16(r1) + stw r4,20(r1) + stw r5,24(r1) + andi. r12,r12,MSR_PR + b 11f + bl trace_hardirqs_off + b 12f +11: + bl trace_hardirqs_off +12: + lwz r5,24(r1) + lwz r4,20(r1) + lwz r3,16(r1) + lwz r11,12(r1) + lwz r9,8(r1) + addi r1,r1,32 + lwz r0,GPR0(r1) + lwz r6,GPR6(r1) + lwz r7,GPR7(r1) + lwz r8,GPR8(r1) +1: mtctr r11 + mtlr r9 + bctr /* jump to handler */ +#else /* CONFIG_TRACE_IRQFLAGS */ + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r10 + mtlr r9 + SYNC + RFI /* jump to handler, enable MMU */ +#endif /* CONFIG_TRACE_IRQFLAGS */ + +#if defined (CONFIG_6xx) || defined(CONFIG_E500) +4: rlwinm r12,r12,0,~_TLF_NAPPING + stw r12,TI_LOCAL_FLAGS(r9) + b power_save_ppc32_restore + +7: rlwinm r12,r12,0,~_TLF_SLEEPING + stw r12,TI_LOCAL_FLAGS(r9) + lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */ + rlwinm r9,r9,0,~MSR_EE + lwz r12,_LINK(r11) /* and return to address in LR */ + b fast_exception_return +#endif + +/* + * On kernel stack overflow, load up an initial stack pointer + * and call StackOverflow(regs), which should not return. + */ +stack_ovf: + /* sometimes we use a statically-allocated stack, which is OK. */ + lis r12,_end@h + ori r12,r12,_end@l + cmplw r1,r12 + ble 5b /* r1 <= &_end is OK */ + SAVE_NVGPRS(r11) + addi r3,r1,STACK_FRAME_OVERHEAD + lis r1,init_thread_union@ha + addi r1,r1,init_thread_union@l + addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + lis r9,StackOverflow@ha + addi r9,r9,StackOverflow@l + LOAD_MSR_KERNEL(r10,MSR_KERNEL) + FIX_SRR1(r10,r12) + mtspr SPRN_SRR0,r9 + mtspr SPRN_SRR1,r10 + SYNC + RFI + +/* + * Handle a system call. + */ + .stabs "arch/powerpc/kernel/",N_SO,0,0,0f + .stabs "entry_32.S",N_SO,0,0,0f +0: + +_GLOBAL(DoSyscall) + stw r3,ORIG_GPR3(r1) + li r12,0 + stw r12,RESULT(r1) + lwz r11,_CCR(r1) /* Clear SO bit in CR */ + rlwinm r11,r11,0,4,2 + stw r11,_CCR(r1) +#ifdef SHOW_SYSCALLS + bl do_show_syscall +#endif /* SHOW_SYSCALLS */ +#ifdef CONFIG_TRACE_IRQFLAGS + /* Return from syscalls can (and generally will) hard enable + * interrupts. You aren't supposed to call a syscall with + * interrupts disabled in the first place. However, to ensure + * that we get it right vs. lockdep if it happens, we force + * that hard enable here with appropriate tracing if we see + * that we have been called with interrupts off + */ + mfmsr r11 + andi. r12,r11,MSR_EE + bne+ 1f + /* We came in with interrupts disabled, we enable them now */ + bl trace_hardirqs_on + mfmsr r11 + lwz r0,GPR0(r1) + lwz r3,GPR3(r1) + lwz r4,GPR4(r1) + ori r11,r11,MSR_EE + lwz r5,GPR5(r1) + lwz r6,GPR6(r1) + lwz r7,GPR7(r1) + lwz r8,GPR8(r1) + mtmsr r11 +1: +#endif /* CONFIG_TRACE_IRQFLAGS */ + rlwinm r10,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */ + lwz r11,TI_FLAGS(r10) + andi. r11,r11,_TIF_SYSCALL_T_OR_A + bne- syscall_dotrace +syscall_dotrace_cont: + cmplwi 0,r0,NR_syscalls + lis r10,sys_call_table@h + ori r10,r10,sys_call_table@l + slwi r0,r0,2 + bge- 66f + lwzx r10,r10,r0 /* Fetch system call handler [ptr] */ + mtlr r10 + addi r9,r1,STACK_FRAME_OVERHEAD + PPC440EP_ERR42 + blrl /* Call handler */ + .globl ret_from_syscall +ret_from_syscall: +#ifdef SHOW_SYSCALLS + bl do_show_syscall_exit +#endif + mr r6,r3 + rlwinm r12,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */ + /* disable interrupts so current_thread_info()->flags can't change */ + LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */ + /* Note: We don't bother telling lockdep about it */ + SYNC + MTMSRD(r10) + lwz r9,TI_FLAGS(r12) + li r8,-_LAST_ERRNO + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK) + bne- syscall_exit_work + cmplw 0,r3,r8 + blt+ syscall_exit_cont + lwz r11,_CCR(r1) /* Load CR */ + neg r3,r3 + oris r11,r11,0x1000 /* Set SO bit in CR */ + stw r11,_CCR(r1) +syscall_exit_cont: + lwz r8,_MSR(r1) +#ifdef CONFIG_TRACE_IRQFLAGS + /* If we are going to return from the syscall with interrupts + * off, we trace that here. It shouldn't happen though but we + * want to catch the bugger if it does right ? + */ + andi. r10,r8,MSR_EE + bne+ 1f + stw r3,GPR3(r1) + bl trace_hardirqs_off + lwz r3,GPR3(r1) +1: +#endif /* CONFIG_TRACE_IRQFLAGS */ +#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) + /* If the process has its own DBCR0 value, load it up. The internal + debug mode bit tells us that dbcr0 should be loaded. */ + lwz r0,THREAD+THREAD_DBCR0(r2) + andis. r10,r0,DBCR0_IDM@h + bnel- load_dbcr0 +#endif +#ifdef CONFIG_44x +BEGIN_MMU_FTR_SECTION + lis r4,icache_44x_need_flush@ha + lwz r5,icache_44x_need_flush@l(r4) + cmplwi cr0,r5,0 + bne- 2f +1: +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_47x) +#endif /* CONFIG_44x */ +BEGIN_FTR_SECTION + lwarx r7,0,r1 +END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) + stwcx. r0,0,r1 /* to clear the reservation */ + lwz r4,_LINK(r1) + lwz r5,_CCR(r1) + mtlr r4 + mtcr r5 + lwz r7,_NIP(r1) + FIX_SRR1(r8, r0) + lwz r2,GPR2(r1) + lwz r1,GPR1(r1) + mtspr SPRN_SRR0,r7 + mtspr SPRN_SRR1,r8 + SYNC + RFI +#ifdef CONFIG_44x +2: li r7,0 + iccci r0,r0 + stw r7,icache_44x_need_flush@l(r4) + b 1b +#endif /* CONFIG_44x */ + +66: li r3,-ENOSYS + b ret_from_syscall + + .globl ret_from_fork +ret_from_fork: + REST_NVGPRS(r1) + bl schedule_tail + li r3,0 + b ret_from_syscall + +/* Traced system call support */ +syscall_dotrace: + SAVE_NVGPRS(r1) + li r0,0xc00 + stw r0,_TRAP(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + bl do_syscall_trace_enter + /* + * Restore argument registers possibly just changed. + * We use the return value of do_syscall_trace_enter + * for call number to look up in the table (r0). + */ + mr r0,r3 + lwz r3,GPR3(r1) + lwz r4,GPR4(r1) + lwz r5,GPR5(r1) + lwz r6,GPR6(r1) + lwz r7,GPR7(r1) + lwz r8,GPR8(r1) + REST_NVGPRS(r1) + b syscall_dotrace_cont + +syscall_exit_work: + andi. r0,r9,_TIF_RESTOREALL + beq+ 0f + REST_NVGPRS(r1) + b 2f +0: cmplw 0,r3,r8 + blt+ 1f + andi. r0,r9,_TIF_NOERROR + bne- 1f + lwz r11,_CCR(r1) /* Load CR */ + neg r3,r3 + oris r11,r11,0x1000 /* Set SO bit in CR */ + stw r11,_CCR(r1) + +1: stw r6,RESULT(r1) /* Save result */ + stw r3,GPR3(r1) /* Update return value */ +2: andi. r0,r9,(_TIF_PERSYSCALL_MASK) + beq 4f + + /* Clear per-syscall TIF flags if any are set. */ + + li r11,_TIF_PERSYSCALL_MASK + addi r12,r12,TI_FLAGS +3: lwarx r8,0,r12 + andc r8,r8,r11 +#ifdef CONFIG_IBM405_ERR77 + dcbt 0,r12 +#endif + stwcx. r8,0,r12 + bne- 3b + subi r12,r12,TI_FLAGS + +4: /* Anything which requires enabling interrupts? */ + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) + beq ret_from_except + + /* Re-enable interrupts. There is no need to trace that with + * lockdep as we are supposed to have IRQs on at this point + */ + ori r10,r10,MSR_EE + SYNC + MTMSRD(r10) + + /* Save NVGPRS if they're not saved already */ + lwz r4,_TRAP(r1) + andi. r4,r4,1 + beq 5f + SAVE_NVGPRS(r1) + li r4,0xc00 + stw r4,_TRAP(r1) +5: + addi r3,r1,STACK_FRAME_OVERHEAD + bl do_syscall_trace_leave + b ret_from_except_full + +#ifdef SHOW_SYSCALLS +do_show_syscall: +#ifdef SHOW_SYSCALLS_TASK + lis r11,show_syscalls_task@ha + lwz r11,show_syscalls_task@l(r11) + cmp 0,r2,r11 + bnelr +#endif + stw r31,GPR31(r1) + mflr r31 + lis r3,7f@ha + addi r3,r3,7f@l + lwz r4,GPR0(r1) + lwz r5,GPR3(r1) + lwz r6,GPR4(r1) + lwz r7,GPR5(r1) + lwz r8,GPR6(r1) + lwz r9,GPR7(r1) + bl printk + lis r3,77f@ha + addi r3,r3,77f@l + lwz r4,GPR8(r1) + mr r5,r2 + bl printk + lwz r0,GPR0(r1) + lwz r3,GPR3(r1) + lwz r4,GPR4(r1) + lwz r5,GPR5(r1) + lwz r6,GPR6(r1) + lwz r7,GPR7(r1) + lwz r8,GPR8(r1) + mtlr r31 + lwz r31,GPR31(r1) + blr + +do_show_syscall_exit: +#ifdef SHOW_SYSCALLS_TASK + lis r11,show_syscalls_task@ha + lwz r11,show_syscalls_task@l(r11) + cmp 0,r2,r11 + bnelr +#endif + stw r31,GPR31(r1) + mflr r31 + stw r3,RESULT(r1) /* Save result */ + mr r4,r3 + lis r3,79f@ha + addi r3,r3,79f@l + bl printk + lwz r3,RESULT(r1) + mtlr r31 + lwz r31,GPR31(r1) + blr + +7: .string "syscall %d(%x, %x, %x, %x, %x, " +77: .string "%x), current=%p\n" +79: .string " -> %x\n" + .align 2,0 + +#ifdef SHOW_SYSCALLS_TASK + .data + .globl show_syscalls_task +show_syscalls_task: + .long -1 + .text +#endif +#endif /* SHOW_SYSCALLS */ + +/* + * The fork/clone functions need to copy the full register set into + * the child process. Therefore we need to save all the nonvolatile + * registers (r13 - r31) before calling the C code. + */ + .globl ppc_fork +ppc_fork: + SAVE_NVGPRS(r1) + lwz r0,_TRAP(r1) + rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ + stw r0,_TRAP(r1) /* register set saved */ + b sys_fork + + .globl ppc_vfork +ppc_vfork: + SAVE_NVGPRS(r1) + lwz r0,_TRAP(r1) + rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ + stw r0,_TRAP(r1) /* register set saved */ + b sys_vfork + + .globl ppc_clone +ppc_clone: + SAVE_NVGPRS(r1) + lwz r0,_TRAP(r1) + rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ + stw r0,_TRAP(r1) /* register set saved */ + b sys_clone + + .globl ppc_swapcontext +ppc_swapcontext: + SAVE_NVGPRS(r1) + lwz r0,_TRAP(r1) + rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ + stw r0,_TRAP(r1) /* register set saved */ + b sys_swapcontext + +/* + * Top-level page fault handling. + * This is in assembler because if do_page_fault tells us that + * it is a bad kernel page fault, we want to save the non-volatile + * registers before calling bad_page_fault. + */ + .globl handle_page_fault +handle_page_fault: + stw r4,_DAR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + bl do_page_fault + cmpwi r3,0 + beq+ ret_from_except + SAVE_NVGPRS(r1) + lwz r0,_TRAP(r1) + clrrwi r0,r0,1 + stw r0,_TRAP(r1) + mr r5,r3 + addi r3,r1,STACK_FRAME_OVERHEAD + lwz r4,_DAR(r1) + bl bad_page_fault + b ret_from_except_full + +/* + * This routine switches between two different tasks. The process + * state of one is saved on its kernel stack. Then the state + * of the other is restored from its kernel stack. The memory + * management hardware is updated to the second process's state. + * Finally, we can return to the second process. + * On entry, r3 points to the THREAD for the current task, r4 + * points to the THREAD for the new task. + * + * This routine is always called with interrupts disabled. + * + * Note: there are two ways to get to the "going out" portion + * of this code; either by coming in via the entry (_switch) + * or via "fork" which must set up an environment equivalent + * to the "_switch" path. If you change this , you'll have to + * change the fork code also. + * + * The code which creates the new task context is in 'copy_thread' + * in arch/ppc/kernel/process.c + */ +_GLOBAL(_switch) + stwu r1,-INT_FRAME_SIZE(r1) + mflr r0 + stw r0,INT_FRAME_SIZE+4(r1) + /* r3-r12 are caller saved -- Cort */ + SAVE_NVGPRS(r1) + stw r0,_NIP(r1) /* Return to switch caller */ + mfmsr r11 + li r0,MSR_FP /* Disable floating-point */ +#ifdef CONFIG_ALTIVEC +BEGIN_FTR_SECTION + oris r0,r0,MSR_VEC@h /* Disable altivec */ + mfspr r12,SPRN_VRSAVE /* save vrsave register value */ + stw r12,THREAD+THREAD_VRSAVE(r2) +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) +#endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_SPE +BEGIN_FTR_SECTION + oris r0,r0,MSR_SPE@h /* Disable SPE */ + mfspr r12,SPRN_SPEFSCR /* save spefscr register value */ + stw r12,THREAD+THREAD_SPEFSCR(r2) +END_FTR_SECTION_IFSET(CPU_FTR_SPE) +#endif /* CONFIG_SPE */ + and. r0,r0,r11 /* FP or altivec or SPE enabled? */ + beq+ 1f + andc r11,r11,r0 + MTMSRD(r11) + isync +1: stw r11,_MSR(r1) + mfcr r10 + stw r10,_CCR(r1) + stw r1,KSP(r3) /* Set old stack pointer */ + +#ifdef CONFIG_SMP + /* We need a sync somewhere here to make sure that if the + * previous task gets rescheduled on another CPU, it sees all + * stores it has performed on this one. + */ + sync +#endif /* CONFIG_SMP */ + + tophys(r0,r4) + CLR_TOP32(r0) + mtspr SPRN_SPRG_THREAD,r0 /* Update current THREAD phys addr */ + lwz r1,KSP(r4) /* Load new stack pointer */ + + /* save the old current 'last' for return value */ + mr r3,r2 + addi r2,r4,-THREAD /* Update current */ + +#ifdef CONFIG_ALTIVEC +BEGIN_FTR_SECTION + lwz r0,THREAD+THREAD_VRSAVE(r2) + mtspr SPRN_VRSAVE,r0 /* if G4, restore VRSAVE reg */ +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) +#endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_SPE +BEGIN_FTR_SECTION + lwz r0,THREAD+THREAD_SPEFSCR(r2) + mtspr SPRN_SPEFSCR,r0 /* restore SPEFSCR reg */ +END_FTR_SECTION_IFSET(CPU_FTR_SPE) +#endif /* CONFIG_SPE */ + + lwz r0,_CCR(r1) + mtcrf 0xFF,r0 + /* r3-r12 are destroyed -- Cort */ + REST_NVGPRS(r1) + + lwz r4,_NIP(r1) /* Return to _switch caller in new task */ + mtlr r4 + addi r1,r1,INT_FRAME_SIZE + blr + + .globl fast_exception_return +fast_exception_return: +#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) + andi. r10,r9,MSR_RI /* check for recoverable interrupt */ + beq 1f /* if not, we've got problems */ +#endif + +2: REST_4GPRS(3, r11) + lwz r10,_CCR(r11) + REST_GPR(1, r11) + mtcr r10 + lwz r10,_LINK(r11) + mtlr r10 + REST_GPR(10, r11) + mtspr SPRN_SRR1,r9 + mtspr SPRN_SRR0,r12 + REST_GPR(9, r11) + REST_GPR(12, r11) + lwz r11,GPR11(r11) + SYNC + RFI + +#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) +/* check if the exception happened in a restartable section */ +1: lis r3,exc_exit_restart_end@ha + addi r3,r3,exc_exit_restart_end@l + cmplw r12,r3 + bge 3f + lis r4,exc_exit_restart@ha + addi r4,r4,exc_exit_restart@l + cmplw r12,r4 + blt 3f + lis r3,fee_restarts@ha + tophys(r3,r3) + lwz r5,fee_restarts@l(r3) + addi r5,r5,1 + stw r5,fee_restarts@l(r3) + mr r12,r4 /* restart at exc_exit_restart */ + b 2b + + .section .bss + .align 2 +fee_restarts: + .space 4 + .previous + +/* aargh, a nonrecoverable interrupt, panic */ +/* aargh, we don't know which trap this is */ +/* but the 601 doesn't implement the RI bit, so assume it's OK */ +3: +BEGIN_FTR_SECTION + b 2b +END_FTR_SECTION_IFSET(CPU_FTR_601) + li r10,-1 + stw r10,_TRAP(r11) + addi r3,r1,STACK_FRAME_OVERHEAD + lis r10,MSR_KERNEL@h + ori r10,r10,MSR_KERNEL@l + bl transfer_to_handler_full + .long nonrecoverable_exception + .long ret_from_except +#endif + + .globl ret_from_except_full +ret_from_except_full: + REST_NVGPRS(r1) + /* fall through */ + + .globl ret_from_except +ret_from_except: + /* Hard-disable interrupts so that current_thread_info()->flags + * can't change between when we test it and when we return + * from the interrupt. */ + /* Note: We don't bother telling lockdep about it */ + LOAD_MSR_KERNEL(r10,MSR_KERNEL) + SYNC /* Some chip revs have problems here... */ + MTMSRD(r10) /* disable interrupts */ + + lwz r3,_MSR(r1) /* Returning to user mode? */ + andi. r0,r3,MSR_PR + beq resume_kernel + +user_exc_return: /* r10 contains MSR_KERNEL here */ + /* Check current_thread_info()->flags */ + rlwinm r9,r1,0,0,(31-THREAD_SHIFT) + lwz r9,TI_FLAGS(r9) + andi. r0,r9,_TIF_USER_WORK_MASK + bne do_work + +restore_user: +#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) + /* Check whether this process has its own DBCR0 value. The internal + debug mode bit tells us that dbcr0 should be loaded. */ + lwz r0,THREAD+THREAD_DBCR0(r2) + andis. r10,r0,DBCR0_IDM@h + bnel- load_dbcr0 +#endif + +#ifdef CONFIG_PREEMPT + b restore + +/* N.B. the only way to get here is from the beq following ret_from_except. */ +resume_kernel: + /* check current_thread_info->preempt_count */ + rlwinm r9,r1,0,0,(31-THREAD_SHIFT) + lwz r0,TI_PREEMPT(r9) + cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ + bne restore + lwz r0,TI_FLAGS(r9) + andi. r0,r0,_TIF_NEED_RESCHED + beq+ restore + andi. r0,r3,MSR_EE /* interrupts off? */ + beq restore /* don't schedule if so */ +#ifdef CONFIG_TRACE_IRQFLAGS + /* Lockdep thinks irqs are enabled, we need to call + * preempt_schedule_irq with IRQs off, so we inform lockdep + * now that we -did- turn them off already + */ + bl trace_hardirqs_off +#endif +1: bl preempt_schedule_irq + rlwinm r9,r1,0,0,(31-THREAD_SHIFT) + lwz r3,TI_FLAGS(r9) + andi. r0,r3,_TIF_NEED_RESCHED + bne- 1b +#ifdef CONFIG_TRACE_IRQFLAGS + /* And now, to properly rebalance the above, we tell lockdep they + * are being turned back on, which will happen when we return + */ + bl trace_hardirqs_on +#endif +#else +resume_kernel: +#endif /* CONFIG_PREEMPT */ + + /* interrupts are hard-disabled at this point */ +restore: +#ifdef CONFIG_44x +BEGIN_MMU_FTR_SECTION + b 1f +END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) + lis r4,icache_44x_need_flush@ha + lwz r5,icache_44x_need_flush@l(r4) + cmplwi cr0,r5,0 + beq+ 1f + li r6,0 + iccci r0,r0 + stw r6,icache_44x_need_flush@l(r4) +1: +#endif /* CONFIG_44x */ + + lwz r9,_MSR(r1) +#ifdef CONFIG_TRACE_IRQFLAGS + /* Lockdep doesn't know about the fact that IRQs are temporarily turned + * off in this assembly code while peeking at TI_FLAGS() and such. However + * we need to inform it if the exception turned interrupts off, and we + * are about to trun them back on. + * + * The problem here sadly is that we don't know whether the exceptions was + * one that turned interrupts off or not. So we always tell lockdep about + * turning them on here when we go back to wherever we came from with EE + * on, even if that may meen some redudant calls being tracked. Maybe later + * we could encode what the exception did somewhere or test the exception + * type in the pt_regs but that sounds overkill + */ + andi. r10,r9,MSR_EE + beq 1f + /* + * Since the ftrace irqsoff latency trace checks CALLER_ADDR1, + * which is the stack frame here, we need to force a stack frame + * in case we came from user space. + */ + stwu r1,-32(r1) + mflr r0 + stw r0,4(r1) + stwu r1,-32(r1) + bl trace_hardirqs_on + lwz r1,0(r1) + lwz r1,0(r1) + lwz r9,_MSR(r1) +1: +#endif /* CONFIG_TRACE_IRQFLAGS */ + + lwz r0,GPR0(r1) + lwz r2,GPR2(r1) + REST_4GPRS(3, r1) + REST_2GPRS(7, r1) + + lwz r10,_XER(r1) + lwz r11,_CTR(r1) + mtspr SPRN_XER,r10 + mtctr r11 + + PPC405_ERR77(0,r1) +BEGIN_FTR_SECTION + lwarx r11,0,r1 +END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) + stwcx. r0,0,r1 /* to clear the reservation */ + +#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) + andi. r10,r9,MSR_RI /* check if this exception occurred */ + beql nonrecoverable /* at a bad place (MSR:RI = 0) */ + + lwz r10,_CCR(r1) + lwz r11,_LINK(r1) + mtcrf 0xFF,r10 + mtlr r11 + + /* + * Once we put values in SRR0 and SRR1, we are in a state + * where exceptions are not recoverable, since taking an + * exception will trash SRR0 and SRR1. Therefore we clear the + * MSR:RI bit to indicate this. If we do take an exception, + * we can't return to the point of the exception but we + * can restart the exception exit path at the label + * exc_exit_restart below. -- paulus + */ + LOAD_MSR_KERNEL(r10,MSR_KERNEL & ~MSR_RI) + SYNC + MTMSRD(r10) /* clear the RI bit */ + .globl exc_exit_restart +exc_exit_restart: + lwz r12,_NIP(r1) + FIX_SRR1(r9,r10) + mtspr SPRN_SRR0,r12 + mtspr SPRN_SRR1,r9 + REST_4GPRS(9, r1) + lwz r1,GPR1(r1) + .globl exc_exit_restart_end +exc_exit_restart_end: + SYNC + RFI + +#else /* !(CONFIG_4xx || CONFIG_BOOKE) */ + /* + * This is a bit different on 4xx/Book-E because it doesn't have + * the RI bit in the MSR. + * The TLB miss handler checks if we have interrupted + * the exception exit path and restarts it if so + * (well maybe one day it will... :). + */ + lwz r11,_LINK(r1) + mtlr r11 + lwz r10,_CCR(r1) + mtcrf 0xff,r10 + REST_2GPRS(9, r1) + .globl exc_exit_restart +exc_exit_restart: + lwz r11,_NIP(r1) + lwz r12,_MSR(r1) +exc_exit_start: + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r12 + REST_2GPRS(11, r1) + lwz r1,GPR1(r1) + .globl exc_exit_restart_end +exc_exit_restart_end: + PPC405_ERR77_SYNC + rfi + b . /* prevent prefetch past rfi */ + +/* + * Returning from a critical interrupt in user mode doesn't need + * to be any different from a normal exception. For a critical + * interrupt in the kernel, we just return (without checking for + * preemption) since the interrupt may have happened at some crucial + * place (e.g. inside the TLB miss handler), and because we will be + * running with r1 pointing into critical_stack, not the current + * process's kernel stack (and therefore current_thread_info() will + * give the wrong answer). + * We have to restore various SPRs that may have been in use at the + * time of the critical interrupt. + * + */ +#ifdef CONFIG_40x +#define PPC_40x_TURN_OFF_MSR_DR \ + /* avoid any possible TLB misses here by turning off MSR.DR, we \ + * assume the instructions here are mapped by a pinned TLB entry */ \ + li r10,MSR_IR; \ + mtmsr r10; \ + isync; \ + tophys(r1, r1); +#else +#define PPC_40x_TURN_OFF_MSR_DR +#endif + +#define RET_FROM_EXC_LEVEL(exc_lvl_srr0, exc_lvl_srr1, exc_lvl_rfi) \ + REST_NVGPRS(r1); \ + lwz r3,_MSR(r1); \ + andi. r3,r3,MSR_PR; \ + LOAD_MSR_KERNEL(r10,MSR_KERNEL); \ + bne user_exc_return; \ + lwz r0,GPR0(r1); \ + lwz r2,GPR2(r1); \ + REST_4GPRS(3, r1); \ + REST_2GPRS(7, r1); \ + lwz r10,_XER(r1); \ + lwz r11,_CTR(r1); \ + mtspr SPRN_XER,r10; \ + mtctr r11; \ + PPC405_ERR77(0,r1); \ + stwcx. r0,0,r1; /* to clear the reservation */ \ + lwz r11,_LINK(r1); \ + mtlr r11; \ + lwz r10,_CCR(r1); \ + mtcrf 0xff,r10; \ + PPC_40x_TURN_OFF_MSR_DR; \ + lwz r9,_DEAR(r1); \ + lwz r10,_ESR(r1); \ + mtspr SPRN_DEAR,r9; \ + mtspr SPRN_ESR,r10; \ + lwz r11,_NIP(r1); \ + lwz r12,_MSR(r1); \ + mtspr exc_lvl_srr0,r11; \ + mtspr exc_lvl_srr1,r12; \ + lwz r9,GPR9(r1); \ + lwz r12,GPR12(r1); \ + lwz r10,GPR10(r1); \ + lwz r11,GPR11(r1); \ + lwz r1,GPR1(r1); \ + PPC405_ERR77_SYNC; \ + exc_lvl_rfi; \ + b .; /* prevent prefetch past exc_lvl_rfi */ + +#define RESTORE_xSRR(exc_lvl_srr0, exc_lvl_srr1) \ + lwz r9,_##exc_lvl_srr0(r1); \ + lwz r10,_##exc_lvl_srr1(r1); \ + mtspr SPRN_##exc_lvl_srr0,r9; \ + mtspr SPRN_##exc_lvl_srr1,r10; + +#if defined(CONFIG_PPC_BOOK3E_MMU) +#ifdef CONFIG_PHYS_64BIT +#define RESTORE_MAS7 \ + lwz r11,MAS7(r1); \ + mtspr SPRN_MAS7,r11; +#else +#define RESTORE_MAS7 +#endif /* CONFIG_PHYS_64BIT */ +#define RESTORE_MMU_REGS \ + lwz r9,MAS0(r1); \ + lwz r10,MAS1(r1); \ + lwz r11,MAS2(r1); \ + mtspr SPRN_MAS0,r9; \ + lwz r9,MAS3(r1); \ + mtspr SPRN_MAS1,r10; \ + lwz r10,MAS6(r1); \ + mtspr SPRN_MAS2,r11; \ + mtspr SPRN_MAS3,r9; \ + mtspr SPRN_MAS6,r10; \ + RESTORE_MAS7; +#elif defined(CONFIG_44x) +#define RESTORE_MMU_REGS \ + lwz r9,MMUCR(r1); \ + mtspr SPRN_MMUCR,r9; +#else +#define RESTORE_MMU_REGS +#endif + +#ifdef CONFIG_40x + .globl ret_from_crit_exc +ret_from_crit_exc: + mfspr r9,SPRN_SPRG_THREAD + lis r10,saved_ksp_limit@ha; + lwz r10,saved_ksp_limit@l(r10); + tovirt(r9,r9); + stw r10,KSP_LIMIT(r9) + lis r9,crit_srr0@ha; + lwz r9,crit_srr0@l(r9); + lis r10,crit_srr1@ha; + lwz r10,crit_srr1@l(r10); + mtspr SPRN_SRR0,r9; + mtspr SPRN_SRR1,r10; + RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI) +#endif /* CONFIG_40x */ + +#ifdef CONFIG_BOOKE + .globl ret_from_crit_exc +ret_from_crit_exc: + mfspr r9,SPRN_SPRG_THREAD + lwz r10,SAVED_KSP_LIMIT(r1) + stw r10,KSP_LIMIT(r9) + RESTORE_xSRR(SRR0,SRR1); + RESTORE_MMU_REGS; + RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI) + + .globl ret_from_debug_exc +ret_from_debug_exc: + mfspr r9,SPRN_SPRG_THREAD + lwz r10,SAVED_KSP_LIMIT(r1) + stw r10,KSP_LIMIT(r9) + lwz r9,THREAD_INFO-THREAD(r9) + rlwinm r10,r1,0,0,(31-THREAD_SHIFT) + lwz r10,TI_PREEMPT(r10) + stw r10,TI_PREEMPT(r9) + RESTORE_xSRR(SRR0,SRR1); + RESTORE_xSRR(CSRR0,CSRR1); + RESTORE_MMU_REGS; + RET_FROM_EXC_LEVEL(SPRN_DSRR0, SPRN_DSRR1, PPC_RFDI) + + .globl ret_from_mcheck_exc +ret_from_mcheck_exc: + mfspr r9,SPRN_SPRG_THREAD + lwz r10,SAVED_KSP_LIMIT(r1) + stw r10,KSP_LIMIT(r9) + RESTORE_xSRR(SRR0,SRR1); + RESTORE_xSRR(CSRR0,CSRR1); + RESTORE_xSRR(DSRR0,DSRR1); + RESTORE_MMU_REGS; + RET_FROM_EXC_LEVEL(SPRN_MCSRR0, SPRN_MCSRR1, PPC_RFMCI) +#endif /* CONFIG_BOOKE */ + +/* + * Load the DBCR0 value for a task that is being ptraced, + * having first saved away the global DBCR0. Note that r0 + * has the dbcr0 value to set upon entry to this. + */ +load_dbcr0: + mfmsr r10 /* first disable debug exceptions */ + rlwinm r10,r10,0,~MSR_DE + mtmsr r10 + isync + mfspr r10,SPRN_DBCR0 + lis r11,global_dbcr0@ha + addi r11,r11,global_dbcr0@l +#ifdef CONFIG_SMP + rlwinm r9,r1,0,0,(31-THREAD_SHIFT) + lwz r9,TI_CPU(r9) + slwi r9,r9,3 + add r11,r11,r9 +#endif + stw r10,0(r11) + mtspr SPRN_DBCR0,r0 + lwz r10,4(r11) + addi r10,r10,1 + stw r10,4(r11) + li r11,-1 + mtspr SPRN_DBSR,r11 /* clear all pending debug events */ + blr + + .section .bss + .align 4 +global_dbcr0: + .space 8*NR_CPUS + .previous +#endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ + +do_work: /* r10 contains MSR_KERNEL here */ + andi. r0,r9,_TIF_NEED_RESCHED + beq do_user_signal + +do_resched: /* r10 contains MSR_KERNEL here */ + /* Note: We don't need to inform lockdep that we are enabling + * interrupts here. As far as it knows, they are already enabled + */ + ori r10,r10,MSR_EE + SYNC + MTMSRD(r10) /* hard-enable interrupts */ + bl schedule +recheck: + /* Note: And we don't tell it we are disabling them again + * neither. Those disable/enable cycles used to peek at + * TI_FLAGS aren't advertised. + */ + LOAD_MSR_KERNEL(r10,MSR_KERNEL) + SYNC + MTMSRD(r10) /* disable interrupts */ + rlwinm r9,r1,0,0,(31-THREAD_SHIFT) + lwz r9,TI_FLAGS(r9) + andi. r0,r9,_TIF_NEED_RESCHED + bne- do_resched + andi. r0,r9,_TIF_USER_WORK_MASK + beq restore_user +do_user_signal: /* r10 contains MSR_KERNEL here */ + ori r10,r10,MSR_EE + SYNC + MTMSRD(r10) /* hard-enable interrupts */ + /* save r13-r31 in the exception frame, if not already done */ + lwz r3,_TRAP(r1) + andi. r0,r3,1 + beq 2f + SAVE_NVGPRS(r1) + rlwinm r3,r3,0,0,30 + stw r3,_TRAP(r1) +2: addi r3,r1,STACK_FRAME_OVERHEAD + mr r4,r9 + bl do_notify_resume + REST_NVGPRS(r1) + b recheck + +/* + * We come here when we are at the end of handling an exception + * that occurred at a place where taking an exception will lose + * state information, such as the contents of SRR0 and SRR1. + */ +nonrecoverable: + lis r10,exc_exit_restart_end@ha + addi r10,r10,exc_exit_restart_end@l + cmplw r12,r10 + bge 3f + lis r11,exc_exit_restart@ha + addi r11,r11,exc_exit_restart@l + cmplw r12,r11 + blt 3f + lis r10,ee_restarts@ha + lwz r12,ee_restarts@l(r10) + addi r12,r12,1 + stw r12,ee_restarts@l(r10) + mr r12,r11 /* restart at exc_exit_restart */ + blr +3: /* OK, we can't recover, kill this process */ + /* but the 601 doesn't implement the RI bit, so assume it's OK */ +BEGIN_FTR_SECTION + blr +END_FTR_SECTION_IFSET(CPU_FTR_601) + lwz r3,_TRAP(r1) + andi. r0,r3,1 + beq 4f + SAVE_NVGPRS(r1) + rlwinm r3,r3,0,0,30 + stw r3,_TRAP(r1) +4: addi r3,r1,STACK_FRAME_OVERHEAD + bl nonrecoverable_exception + /* shouldn't return */ + b 4b + + .section .bss + .align 2 +ee_restarts: + .space 4 + .previous + +/* + * PROM code for specific machines follows. Put it + * here so it's easy to add arch-specific sections later. + * -- Cort + */ +#ifdef CONFIG_PPC_RTAS +/* + * On CHRP, the Run-Time Abstraction Services (RTAS) have to be + * called with the MMU off. + */ +_GLOBAL(enter_rtas) + stwu r1,-INT_FRAME_SIZE(r1) + mflr r0 + stw r0,INT_FRAME_SIZE+4(r1) + LOAD_REG_ADDR(r4, rtas) + lis r6,1f@ha /* physical return address for rtas */ + addi r6,r6,1f@l + tophys(r6,r6) + tophys(r7,r1) + lwz r8,RTASENTRY(r4) + lwz r4,RTASBASE(r4) + mfmsr r9 + stw r9,8(r1) + LOAD_MSR_KERNEL(r0,MSR_KERNEL) + SYNC /* disable interrupts so SRR0/1 */ + MTMSRD(r0) /* don't get trashed */ + li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR) + mtlr r6 + mtspr SPRN_SPRG_RTAS,r7 + mtspr SPRN_SRR0,r8 + mtspr SPRN_SRR1,r9 + RFI +1: tophys(r9,r1) + lwz r8,INT_FRAME_SIZE+4(r9) /* get return address */ + lwz r9,8(r9) /* original msr value */ + FIX_SRR1(r9,r0) + addi r1,r1,INT_FRAME_SIZE + li r0,0 + mtspr SPRN_SPRG_RTAS,r0 + mtspr SPRN_SRR0,r8 + mtspr SPRN_SRR1,r9 + RFI /* return to caller */ + + .globl machine_check_in_rtas +machine_check_in_rtas: + twi 31,0,0 + /* XXX load up BATs and panic */ + +#endif /* CONFIG_PPC_RTAS */ + +#ifdef CONFIG_FUNCTION_TRACER +#ifdef CONFIG_DYNAMIC_FTRACE +_GLOBAL(mcount) +_GLOBAL(_mcount) + /* + * It is required that _mcount on PPC32 must preserve the + * link register. But we have r0 to play with. We use r0 + * to push the return address back to the caller of mcount + * into the ctr register, restore the link register and + * then jump back using the ctr register. + */ + mflr r0 + mtctr r0 + lwz r0, 4(r1) + mtlr r0 + bctr + +_GLOBAL(ftrace_caller) + MCOUNT_SAVE_FRAME + /* r3 ends up with link register */ + subi r3, r3, MCOUNT_INSN_SIZE +.globl ftrace_call +ftrace_call: + bl ftrace_stub + nop +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +.globl ftrace_graph_call +ftrace_graph_call: + b ftrace_graph_stub +_GLOBAL(ftrace_graph_stub) +#endif + MCOUNT_RESTORE_FRAME + /* old link register ends up in ctr reg */ + bctr +#else +_GLOBAL(mcount) +_GLOBAL(_mcount) + + MCOUNT_SAVE_FRAME + + subi r3, r3, MCOUNT_INSN_SIZE + LOAD_REG_ADDR(r5, ftrace_trace_function) + lwz r5,0(r5) + + mtctr r5 + bctrl + nop + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + b ftrace_graph_caller +#endif + MCOUNT_RESTORE_FRAME + bctr +#endif + +_GLOBAL(ftrace_stub) + blr + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +_GLOBAL(ftrace_graph_caller) + /* load r4 with local address */ + lwz r4, 44(r1) + subi r4, r4, MCOUNT_INSN_SIZE + + /* get the parent address */ + addi r3, r1, 52 + + bl prepare_ftrace_return + nop + + MCOUNT_RESTORE_FRAME + /* old link register ends up in ctr reg */ + bctr + +_GLOBAL(return_to_handler) + /* need to save return values */ + stwu r1, -32(r1) + stw r3, 20(r1) + stw r4, 16(r1) + stw r31, 12(r1) + mr r31, r1 + + bl ftrace_return_to_handler + nop + + /* return value has real return address */ + mtlr r3 + + lwz r3, 20(r1) + lwz r4, 16(r1) + lwz r31,12(r1) + lwz r1, 0(r1) + + /* Jump back to real return address */ + blr +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +#endif /* CONFIG_MCOUNT */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S new file mode 100644 index 00000000..ef2074c3 --- /dev/null +++ b/arch/powerpc/kernel/entry_64.S @@ -0,0 +1,1152 @@ +/* + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP + * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu> + * Adapted for Power Macintosh by Paul Mackerras. + * Low-level exception handlers and MMU support + * rewritten by Paul Mackerras. + * Copyright (C) 1996 Paul Mackerras. + * MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net). + * + * This file contains the system call entry code, context switch + * code, and exception/interrupt return code for PowerPC. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/errno.h> +#include <asm/unistd.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/cputable.h> +#include <asm/firmware.h> +#include <asm/bug.h> +#include <asm/ptrace.h> +#include <asm/irqflags.h> +#include <asm/ftrace.h> +#include <asm/hw_irq.h> + +/* + * System calls. + */ + .section ".toc","aw" +.SYS_CALL_TABLE: + .tc .sys_call_table[TC],.sys_call_table + +/* This value is used to mark exception frames on the stack. */ +exception_marker: + .tc ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER + + .section ".text" + .align 7 + +#undef SHOW_SYSCALLS + + .globl system_call_common +system_call_common: + andi. r10,r12,MSR_PR + mr r10,r1 + addi r1,r1,-INT_FRAME_SIZE + beq- 1f + ld r1,PACAKSAVE(r13) +1: std r10,0(r1) + std r11,_NIP(r1) + std r12,_MSR(r1) + std r0,GPR0(r1) + std r10,GPR1(r1) + ACCOUNT_CPU_USER_ENTRY(r10, r11) + /* + * This "crclr so" clears CR0.SO, which is the error indication on + * return from this system call. There must be no cmp instruction + * between it and the "mfcr r9" below, otherwise if XER.SO is set, + * CR0.SO will get set, causing all system calls to appear to fail. + */ + crclr so + std r2,GPR2(r1) + std r3,GPR3(r1) + std r4,GPR4(r1) + std r5,GPR5(r1) + std r6,GPR6(r1) + std r7,GPR7(r1) + std r8,GPR8(r1) + li r11,0 + std r11,GPR9(r1) + std r11,GPR10(r1) + std r11,GPR11(r1) + std r11,GPR12(r1) + std r9,GPR13(r1) + mfcr r9 + mflr r10 + li r11,0xc01 + std r9,_CCR(r1) + std r10,_LINK(r1) + std r11,_TRAP(r1) + mfxer r9 + mfctr r10 + std r9,_XER(r1) + std r10,_CTR(r1) + std r3,ORIG_GPR3(r1) + ld r2,PACATOC(r13) + addi r9,r1,STACK_FRAME_OVERHEAD + ld r11,exception_marker@toc(r2) + std r11,-16(r9) /* "regshere" marker */ +#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR) +BEGIN_FW_FTR_SECTION + beq 33f + /* if from user, see if there are any DTL entries to process */ + ld r10,PACALPPACAPTR(r13) /* get ptr to VPA */ + ld r11,PACA_DTL_RIDX(r13) /* get log read index */ + ld r10,LPPACA_DTLIDX(r10) /* get log write index */ + cmpd cr1,r11,r10 + beq+ cr1,33f + bl .accumulate_stolen_time + REST_GPR(0,r1) + REST_4GPRS(3,r1) + REST_2GPRS(7,r1) + addi r9,r1,STACK_FRAME_OVERHEAD +33: +END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) +#endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */ + + /* + * A syscall should always be called with interrupts enabled + * so we just unconditionally hard-enable here. When some kind + * of irq tracing is used, we additionally check that condition + * is correct + */ +#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_BUG) + lbz r10,PACASOFTIRQEN(r13) + xori r10,r10,1 +1: tdnei r10,0 + EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING +#endif + +#ifdef CONFIG_PPC_BOOK3E + wrteei 1 +#else + ld r11,PACAKMSR(r13) + ori r11,r11,MSR_EE + mtmsrd r11,1 +#endif /* CONFIG_PPC_BOOK3E */ + + /* We do need to set SOFTE in the stack frame or the return + * from interrupt will be painful + */ + li r10,1 + std r10,SOFTE(r1) + +#ifdef SHOW_SYSCALLS + bl .do_show_syscall + REST_GPR(0,r1) + REST_4GPRS(3,r1) + REST_2GPRS(7,r1) + addi r9,r1,STACK_FRAME_OVERHEAD +#endif + clrrdi r11,r1,THREAD_SHIFT + ld r10,TI_FLAGS(r11) + andi. r11,r10,_TIF_SYSCALL_T_OR_A + bne- syscall_dotrace +syscall_dotrace_cont: + cmpldi 0,r0,NR_syscalls + bge- syscall_enosys + +system_call: /* label this so stack traces look sane */ +/* + * Need to vector to 32 Bit or default sys_call_table here, + * based on caller's run-mode / personality. + */ + ld r11,.SYS_CALL_TABLE@toc(2) + andi. r10,r10,_TIF_32BIT + beq 15f + addi r11,r11,8 /* use 32-bit syscall entries */ + clrldi r3,r3,32 + clrldi r4,r4,32 + clrldi r5,r5,32 + clrldi r6,r6,32 + clrldi r7,r7,32 + clrldi r8,r8,32 +15: + slwi r0,r0,4 + ldx r10,r11,r0 /* Fetch system call handler [ptr] */ + mtctr r10 + bctrl /* Call handler */ + +syscall_exit: + std r3,RESULT(r1) +#ifdef SHOW_SYSCALLS + bl .do_show_syscall_exit + ld r3,RESULT(r1) +#endif + clrrdi r12,r1,THREAD_SHIFT + + ld r8,_MSR(r1) +#ifdef CONFIG_PPC_BOOK3S + /* No MSR:RI on BookE */ + andi. r10,r8,MSR_RI + beq- unrecov_restore +#endif + /* + * Disable interrupts so current_thread_info()->flags can't change, + * and so that we don't get interrupted after loading SRR0/1. + */ +#ifdef CONFIG_PPC_BOOK3E + wrteei 0 +#else + ld r10,PACAKMSR(r13) + mtmsrd r10,1 +#endif /* CONFIG_PPC_BOOK3E */ + + ld r9,TI_FLAGS(r12) + li r11,-_LAST_ERRNO + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK) + bne- syscall_exit_work + cmpld r3,r11 + ld r5,_CCR(r1) + bge- syscall_error +syscall_error_cont: + ld r7,_NIP(r1) +BEGIN_FTR_SECTION + stdcx. r0,0,r1 /* to clear the reservation */ +END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) + andi. r6,r8,MSR_PR + ld r4,_LINK(r1) + /* + * Clear RI before restoring r13. If we are returning to + * userspace and we take an exception after restoring r13, + * we end up corrupting the userspace r13 value. + */ +#ifdef CONFIG_PPC_BOOK3S + /* No MSR:RI on BookE */ + li r12,MSR_RI + andc r11,r10,r12 + mtmsrd r11,1 /* clear MSR.RI */ +#endif /* CONFIG_PPC_BOOK3S */ + + beq- 1f + ACCOUNT_CPU_USER_EXIT(r11, r12) + ld r13,GPR13(r1) /* only restore r13 if returning to usermode */ +1: ld r2,GPR2(r1) + ld r1,GPR1(r1) + mtlr r4 + mtcr r5 + mtspr SPRN_SRR0,r7 + mtspr SPRN_SRR1,r8 + RFI + b . /* prevent speculative execution */ + +syscall_error: + oris r5,r5,0x1000 /* Set SO bit in CR */ + neg r3,r3 + std r5,_CCR(r1) + b syscall_error_cont + +/* Traced system call support */ +syscall_dotrace: + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl .do_syscall_trace_enter + /* + * Restore argument registers possibly just changed. + * We use the return value of do_syscall_trace_enter + * for the call number to look up in the table (r0). + */ + mr r0,r3 + ld r3,GPR3(r1) + ld r4,GPR4(r1) + ld r5,GPR5(r1) + ld r6,GPR6(r1) + ld r7,GPR7(r1) + ld r8,GPR8(r1) + addi r9,r1,STACK_FRAME_OVERHEAD + clrrdi r10,r1,THREAD_SHIFT + ld r10,TI_FLAGS(r10) + b syscall_dotrace_cont + +syscall_enosys: + li r3,-ENOSYS + b syscall_exit + +syscall_exit_work: + /* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr. + If TIF_NOERROR is set, just save r3 as it is. */ + + andi. r0,r9,_TIF_RESTOREALL + beq+ 0f + REST_NVGPRS(r1) + b 2f +0: cmpld r3,r11 /* r10 is -LAST_ERRNO */ + blt+ 1f + andi. r0,r9,_TIF_NOERROR + bne- 1f + ld r5,_CCR(r1) + neg r3,r3 + oris r5,r5,0x1000 /* Set SO bit in CR */ + std r5,_CCR(r1) +1: std r3,GPR3(r1) +2: andi. r0,r9,(_TIF_PERSYSCALL_MASK) + beq 4f + + /* Clear per-syscall TIF flags if any are set. */ + + li r11,_TIF_PERSYSCALL_MASK + addi r12,r12,TI_FLAGS +3: ldarx r10,0,r12 + andc r10,r10,r11 + stdcx. r10,0,r12 + bne- 3b + subi r12,r12,TI_FLAGS + +4: /* Anything else left to do? */ + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) + beq .ret_from_except_lite + + /* Re-enable interrupts */ +#ifdef CONFIG_PPC_BOOK3E + wrteei 1 +#else + ld r10,PACAKMSR(r13) + ori r10,r10,MSR_EE + mtmsrd r10,1 +#endif /* CONFIG_PPC_BOOK3E */ + + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl .do_syscall_trace_leave + b .ret_from_except + +/* Save non-volatile GPRs, if not already saved. */ +_GLOBAL(save_nvgprs) + ld r11,_TRAP(r1) + andi. r0,r11,1 + beqlr- + SAVE_NVGPRS(r1) + clrrdi r0,r11,1 + std r0,_TRAP(r1) + blr + + +/* + * The sigsuspend and rt_sigsuspend system calls can call do_signal + * and thus put the process into the stopped state where we might + * want to examine its user state with ptrace. Therefore we need + * to save all the nonvolatile registers (r14 - r31) before calling + * the C code. Similarly, fork, vfork and clone need the full + * register state on the stack so that it can be copied to the child. + */ + +_GLOBAL(ppc_fork) + bl .save_nvgprs + bl .sys_fork + b syscall_exit + +_GLOBAL(ppc_vfork) + bl .save_nvgprs + bl .sys_vfork + b syscall_exit + +_GLOBAL(ppc_clone) + bl .save_nvgprs + bl .sys_clone + b syscall_exit + +_GLOBAL(ppc32_swapcontext) + bl .save_nvgprs + bl .compat_sys_swapcontext + b syscall_exit + +_GLOBAL(ppc64_swapcontext) + bl .save_nvgprs + bl .sys_swapcontext + b syscall_exit + +_GLOBAL(ret_from_fork) + bl .schedule_tail + REST_NVGPRS(r1) + li r3,0 + b syscall_exit + +/* + * This routine switches between two different tasks. The process + * state of one is saved on its kernel stack. Then the state + * of the other is restored from its kernel stack. The memory + * management hardware is updated to the second process's state. + * Finally, we can return to the second process, via ret_from_except. + * On entry, r3 points to the THREAD for the current task, r4 + * points to the THREAD for the new task. + * + * Note: there are two ways to get to the "going out" portion + * of this code; either by coming in via the entry (_switch) + * or via "fork" which must set up an environment equivalent + * to the "_switch" path. If you change this you'll have to change + * the fork code also. + * + * The code which creates the new task context is in 'copy_thread' + * in arch/powerpc/kernel/process.c + */ + .align 7 +_GLOBAL(_switch) + mflr r0 + std r0,16(r1) + stdu r1,-SWITCH_FRAME_SIZE(r1) + /* r3-r13 are caller saved -- Cort */ + SAVE_8GPRS(14, r1) + SAVE_10GPRS(22, r1) + mflr r20 /* Return to switch caller */ + mfmsr r22 + li r0, MSR_FP +#ifdef CONFIG_VSX +BEGIN_FTR_SECTION + oris r0,r0,MSR_VSX@h /* Disable VSX */ +END_FTR_SECTION_IFSET(CPU_FTR_VSX) +#endif /* CONFIG_VSX */ +#ifdef CONFIG_ALTIVEC +BEGIN_FTR_SECTION + oris r0,r0,MSR_VEC@h /* Disable altivec */ + mfspr r24,SPRN_VRSAVE /* save vrsave register value */ + std r24,THREAD_VRSAVE(r3) +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) +#endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_PPC64 +BEGIN_FTR_SECTION + mfspr r25,SPRN_DSCR + std r25,THREAD_DSCR(r3) +END_FTR_SECTION_IFSET(CPU_FTR_DSCR) +#endif + and. r0,r0,r22 + beq+ 1f + andc r22,r22,r0 + MTMSRD(r22) + isync +1: std r20,_NIP(r1) + mfcr r23 + std r23,_CCR(r1) + std r1,KSP(r3) /* Set old stack pointer */ + +#ifdef CONFIG_SMP + /* We need a sync somewhere here to make sure that if the + * previous task gets rescheduled on another CPU, it sees all + * stores it has performed on this one. + */ + sync +#endif /* CONFIG_SMP */ + + /* + * If we optimise away the clear of the reservation in system + * calls because we know the CPU tracks the address of the + * reservation, then we need to clear it here to cover the + * case that the kernel context switch path has no larx + * instructions. + */ +BEGIN_FTR_SECTION + ldarx r6,0,r1 +END_FTR_SECTION_IFSET(CPU_FTR_STCX_CHECKS_ADDRESS) + + addi r6,r4,-THREAD /* Convert THREAD to 'current' */ + std r6,PACACURRENT(r13) /* Set new 'current' */ + + ld r8,KSP(r4) /* new stack pointer */ +#ifdef CONFIG_PPC_BOOK3S +BEGIN_FTR_SECTION + BEGIN_FTR_SECTION_NESTED(95) + clrrdi r6,r8,28 /* get its ESID */ + clrrdi r9,r1,28 /* get current sp ESID */ + FTR_SECTION_ELSE_NESTED(95) + clrrdi r6,r8,40 /* get its 1T ESID */ + clrrdi r9,r1,40 /* get current sp 1T ESID */ + ALT_MMU_FTR_SECTION_END_NESTED_IFCLR(MMU_FTR_1T_SEGMENT, 95) +FTR_SECTION_ELSE + b 2f +ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_SLB) + clrldi. r0,r6,2 /* is new ESID c00000000? */ + cmpd cr1,r6,r9 /* or is new ESID the same as current ESID? */ + cror eq,4*cr1+eq,eq + beq 2f /* if yes, don't slbie it */ + + /* Bolt in the new stack SLB entry */ + ld r7,KSP_VSID(r4) /* Get new stack's VSID */ + oris r0,r6,(SLB_ESID_V)@h + ori r0,r0,(SLB_NUM_BOLTED-1)@l +BEGIN_FTR_SECTION + li r9,MMU_SEGSIZE_1T /* insert B field */ + oris r6,r6,(MMU_SEGSIZE_1T << SLBIE_SSIZE_SHIFT)@h + rldimi r7,r9,SLB_VSID_SSIZE_SHIFT,0 +END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) + + /* Update the last bolted SLB. No write barriers are needed + * here, provided we only update the current CPU's SLB shadow + * buffer. + */ + ld r9,PACA_SLBSHADOWPTR(r13) + li r12,0 + std r12,SLBSHADOW_STACKESID(r9) /* Clear ESID */ + std r7,SLBSHADOW_STACKVSID(r9) /* Save VSID */ + std r0,SLBSHADOW_STACKESID(r9) /* Save ESID */ + + /* No need to check for MMU_FTR_NO_SLBIE_B here, since when + * we have 1TB segments, the only CPUs known to have the errata + * only support less than 1TB of system memory and we'll never + * actually hit this code path. + */ + + slbie r6 + slbie r6 /* Workaround POWER5 < DD2.1 issue */ + slbmte r7,r0 + isync +2: +#endif /* !CONFIG_PPC_BOOK3S */ + + clrrdi r7,r8,THREAD_SHIFT /* base of new stack */ + /* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE + because we don't need to leave the 288-byte ABI gap at the + top of the kernel stack. */ + addi r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE + + mr r1,r8 /* start using new stack pointer */ + std r7,PACAKSAVE(r13) + + ld r6,_CCR(r1) + mtcrf 0xFF,r6 + +#ifdef CONFIG_ALTIVEC +BEGIN_FTR_SECTION + ld r0,THREAD_VRSAVE(r4) + mtspr SPRN_VRSAVE,r0 /* if G4, restore VRSAVE reg */ +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) +#endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_PPC64 +BEGIN_FTR_SECTION + ld r0,THREAD_DSCR(r4) + cmpd r0,r25 + beq 1f + mtspr SPRN_DSCR,r0 +1: +END_FTR_SECTION_IFSET(CPU_FTR_DSCR) +#endif + + /* r3-r13 are destroyed -- Cort */ + REST_8GPRS(14, r1) + REST_10GPRS(22, r1) + + /* convert old thread to its task_struct for return value */ + addi r3,r3,-THREAD + ld r7,_NIP(r1) /* Return to _switch caller in new task */ + mtlr r7 + addi r1,r1,SWITCH_FRAME_SIZE + blr + + .align 7 +_GLOBAL(ret_from_except) + ld r11,_TRAP(r1) + andi. r0,r11,1 + bne .ret_from_except_lite + REST_NVGPRS(r1) + +_GLOBAL(ret_from_except_lite) + /* + * Disable interrupts so that current_thread_info()->flags + * can't change between when we test it and when we return + * from the interrupt. + */ +#ifdef CONFIG_PPC_BOOK3E + wrteei 0 +#else + ld r10,PACAKMSR(r13) /* Get kernel MSR without EE */ + mtmsrd r10,1 /* Update machine state */ +#endif /* CONFIG_PPC_BOOK3E */ + +#ifdef CONFIG_PREEMPT + clrrdi r9,r1,THREAD_SHIFT /* current_thread_info() */ + li r0,_TIF_NEED_RESCHED /* bits to check */ + ld r3,_MSR(r1) + ld r4,TI_FLAGS(r9) + /* Move MSR_PR bit in r3 to _TIF_SIGPENDING position in r0 */ + rlwimi r0,r3,32+TIF_SIGPENDING-MSR_PR_LG,_TIF_SIGPENDING + and. r0,r4,r0 /* check NEED_RESCHED and maybe SIGPENDING */ + bne do_work + +#else /* !CONFIG_PREEMPT */ + ld r3,_MSR(r1) /* Returning to user mode? */ + andi. r3,r3,MSR_PR + beq restore /* if not, just restore regs and return */ + + /* Check current_thread_info()->flags */ + clrrdi r9,r1,THREAD_SHIFT + ld r4,TI_FLAGS(r9) + andi. r0,r4,_TIF_USER_WORK_MASK + bne do_work +#endif /* !CONFIG_PREEMPT */ + + .globl fast_exc_return_irq +fast_exc_return_irq: +restore: + /* + * This is the main kernel exit path. First we check if we + * are about to re-enable interrupts + */ + ld r5,SOFTE(r1) + lbz r6,PACASOFTIRQEN(r13) + cmpwi cr0,r5,0 + beq restore_irq_off + + /* We are enabling, were we already enabled ? Yes, just return */ + cmpwi cr0,r6,1 + beq cr0,do_restore + + /* + * We are about to soft-enable interrupts (we are hard disabled + * at this point). We check if there's anything that needs to + * be replayed first. + */ + lbz r0,PACAIRQHAPPENED(r13) + cmpwi cr0,r0,0 + bne- restore_check_irq_replay + + /* + * Get here when nothing happened while soft-disabled, just + * soft-enable and move-on. We will hard-enable as a side + * effect of rfi + */ +restore_no_replay: + TRACE_ENABLE_INTS + li r0,1 + stb r0,PACASOFTIRQEN(r13); + + /* + * Final return path. BookE is handled in a different file + */ +do_restore: +#ifdef CONFIG_PPC_BOOK3E + b .exception_return_book3e +#else + /* + * Clear the reservation. If we know the CPU tracks the address of + * the reservation then we can potentially save some cycles and use + * a larx. On POWER6 and POWER7 this is significantly faster. + */ +BEGIN_FTR_SECTION + stdcx. r0,0,r1 /* to clear the reservation */ +FTR_SECTION_ELSE + ldarx r4,0,r1 +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) + + /* + * Some code path such as load_up_fpu or altivec return directly + * here. They run entirely hard disabled and do not alter the + * interrupt state. They also don't use lwarx/stwcx. and thus + * are known not to leave dangling reservations. + */ + .globl fast_exception_return +fast_exception_return: + ld r3,_MSR(r1) + ld r4,_CTR(r1) + ld r0,_LINK(r1) + mtctr r4 + mtlr r0 + ld r4,_XER(r1) + mtspr SPRN_XER,r4 + + REST_8GPRS(5, r1) + + andi. r0,r3,MSR_RI + beq- unrecov_restore + + /* + * Clear RI before restoring r13. If we are returning to + * userspace and we take an exception after restoring r13, + * we end up corrupting the userspace r13 value. + */ + ld r4,PACAKMSR(r13) /* Get kernel MSR without EE */ + andc r4,r4,r0 /* r0 contains MSR_RI here */ + mtmsrd r4,1 + + /* + * r13 is our per cpu area, only restore it if we are returning to + * userspace the value stored in the stack frame may belong to + * another CPU. + */ + andi. r0,r3,MSR_PR + beq 1f + ACCOUNT_CPU_USER_EXIT(r2, r4) + REST_GPR(13, r1) +1: + mtspr SPRN_SRR1,r3 + + ld r2,_CCR(r1) + mtcrf 0xFF,r2 + ld r2,_NIP(r1) + mtspr SPRN_SRR0,r2 + + ld r0,GPR0(r1) + ld r2,GPR2(r1) + ld r3,GPR3(r1) + ld r4,GPR4(r1) + ld r1,GPR1(r1) + + rfid + b . /* prevent speculative execution */ + +#endif /* CONFIG_PPC_BOOK3E */ + + /* + * We are returning to a context with interrupts soft disabled. + * + * However, we may also about to hard enable, so we need to + * make sure that in this case, we also clear PACA_IRQ_HARD_DIS + * or that bit can get out of sync and bad things will happen + */ +restore_irq_off: + ld r3,_MSR(r1) + lbz r7,PACAIRQHAPPENED(r13) + andi. r0,r3,MSR_EE + beq 1f + rlwinm r7,r7,0,~PACA_IRQ_HARD_DIS + stb r7,PACAIRQHAPPENED(r13) +1: li r0,0 + stb r0,PACASOFTIRQEN(r13); + TRACE_DISABLE_INTS + b do_restore + + /* + * Something did happen, check if a re-emit is needed + * (this also clears paca->irq_happened) + */ +restore_check_irq_replay: + /* XXX: We could implement a fast path here where we check + * for irq_happened being just 0x01, in which case we can + * clear it and return. That means that we would potentially + * miss a decrementer having wrapped all the way around. + * + * Still, this might be useful for things like hash_page + */ + bl .__check_irq_replay + cmpwi cr0,r3,0 + beq restore_no_replay + + /* + * We need to re-emit an interrupt. We do so by re-using our + * existing exception frame. We first change the trap value, + * but we need to ensure we preserve the low nibble of it + */ + ld r4,_TRAP(r1) + clrldi r4,r4,60 + or r4,r4,r3 + std r4,_TRAP(r1) + + /* + * Then find the right handler and call it. Interrupts are + * still soft-disabled and we keep them that way. + */ + cmpwi cr0,r3,0x500 + bne 1f + addi r3,r1,STACK_FRAME_OVERHEAD; + bl .do_IRQ + b .ret_from_except +1: cmpwi cr0,r3,0x900 + bne 1f + addi r3,r1,STACK_FRAME_OVERHEAD; + bl .timer_interrupt + b .ret_from_except +#ifdef CONFIG_PPC_BOOK3E +1: cmpwi cr0,r3,0x280 + bne 1f + addi r3,r1,STACK_FRAME_OVERHEAD; + bl .doorbell_exception + b .ret_from_except +#endif /* CONFIG_PPC_BOOK3E */ +1: b .ret_from_except /* What else to do here ? */ + + + +3: +do_work: +#ifdef CONFIG_PREEMPT + andi. r0,r3,MSR_PR /* Returning to user mode? */ + bne user_work + /* Check that preempt_count() == 0 and interrupts are enabled */ + lwz r8,TI_PREEMPT(r9) + cmpwi cr1,r8,0 + ld r0,SOFTE(r1) + cmpdi r0,0 + crandc eq,cr1*4+eq,eq + bne restore + + /* + * Here we are preempting the current task. We want to make + * sure we are soft-disabled first + */ + SOFT_DISABLE_INTS(r3,r4) +1: bl .preempt_schedule_irq + + /* Re-test flags and eventually loop */ + clrrdi r9,r1,THREAD_SHIFT + ld r4,TI_FLAGS(r9) + andi. r0,r4,_TIF_NEED_RESCHED + bne 1b + b restore + +user_work: +#endif /* CONFIG_PREEMPT */ + + andi. r0,r4,_TIF_NEED_RESCHED + beq 1f + bl .restore_interrupts + bl .schedule + b .ret_from_except_lite + +1: bl .save_nvgprs + bl .restore_interrupts + addi r3,r1,STACK_FRAME_OVERHEAD + bl .do_notify_resume + b .ret_from_except + +unrecov_restore: + addi r3,r1,STACK_FRAME_OVERHEAD + bl .unrecoverable_exception + b unrecov_restore + +#ifdef CONFIG_PPC_RTAS +/* + * On CHRP, the Run-Time Abstraction Services (RTAS) have to be + * called with the MMU off. + * + * In addition, we need to be in 32b mode, at least for now. + * + * Note: r3 is an input parameter to rtas, so don't trash it... + */ +_GLOBAL(enter_rtas) + mflr r0 + std r0,16(r1) + stdu r1,-RTAS_FRAME_SIZE(r1) /* Save SP and create stack space. */ + + /* Because RTAS is running in 32b mode, it clobbers the high order half + * of all registers that it saves. We therefore save those registers + * RTAS might touch to the stack. (r0, r3-r13 are caller saved) + */ + SAVE_GPR(2, r1) /* Save the TOC */ + SAVE_GPR(13, r1) /* Save paca */ + SAVE_8GPRS(14, r1) /* Save the non-volatiles */ + SAVE_10GPRS(22, r1) /* ditto */ + + mfcr r4 + std r4,_CCR(r1) + mfctr r5 + std r5,_CTR(r1) + mfspr r6,SPRN_XER + std r6,_XER(r1) + mfdar r7 + std r7,_DAR(r1) + mfdsisr r8 + std r8,_DSISR(r1) + + /* Temporary workaround to clear CR until RTAS can be modified to + * ignore all bits. + */ + li r0,0 + mtcr r0 + +#ifdef CONFIG_BUG + /* There is no way it is acceptable to get here with interrupts enabled, + * check it with the asm equivalent of WARN_ON + */ + lbz r0,PACASOFTIRQEN(r13) +1: tdnei r0,0 + EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING +#endif + + /* Hard-disable interrupts */ + mfmsr r6 + rldicl r7,r6,48,1 + rotldi r7,r7,16 + mtmsrd r7,1 + + /* Unfortunately, the stack pointer and the MSR are also clobbered, + * so they are saved in the PACA which allows us to restore + * our original state after RTAS returns. + */ + std r1,PACAR1(r13) + std r6,PACASAVEDMSR(r13) + + /* Setup our real return addr */ + LOAD_REG_ADDR(r4,.rtas_return_loc) + clrldi r4,r4,2 /* convert to realmode address */ + mtlr r4 + + li r0,0 + ori r0,r0,MSR_EE|MSR_SE|MSR_BE|MSR_RI + andc r0,r6,r0 + + li r9,1 + rldicr r9,r9,MSR_SF_LG,(63-MSR_SF_LG) + ori r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI + andc r6,r0,r9 + sync /* disable interrupts so SRR0/1 */ + mtmsrd r0 /* don't get trashed */ + + LOAD_REG_ADDR(r4, rtas) + ld r5,RTASENTRY(r4) /* get the rtas->entry value */ + ld r4,RTASBASE(r4) /* get the rtas->base value */ + + mtspr SPRN_SRR0,r5 + mtspr SPRN_SRR1,r6 + rfid + b . /* prevent speculative execution */ + +_STATIC(rtas_return_loc) + /* relocation is off at this point */ + GET_PACA(r4) + clrldi r4,r4,2 /* convert to realmode address */ + + bcl 20,31,$+4 +0: mflr r3 + ld r3,(1f-0b)(r3) /* get &.rtas_restore_regs */ + + mfmsr r6 + li r0,MSR_RI + andc r6,r6,r0 + sync + mtmsrd r6 + + ld r1,PACAR1(r4) /* Restore our SP */ + ld r4,PACASAVEDMSR(r4) /* Restore our MSR */ + + mtspr SPRN_SRR0,r3 + mtspr SPRN_SRR1,r4 + rfid + b . /* prevent speculative execution */ + + .align 3 +1: .llong .rtas_restore_regs + +_STATIC(rtas_restore_regs) + /* relocation is on at this point */ + REST_GPR(2, r1) /* Restore the TOC */ + REST_GPR(13, r1) /* Restore paca */ + REST_8GPRS(14, r1) /* Restore the non-volatiles */ + REST_10GPRS(22, r1) /* ditto */ + + GET_PACA(r13) + + ld r4,_CCR(r1) + mtcr r4 + ld r5,_CTR(r1) + mtctr r5 + ld r6,_XER(r1) + mtspr SPRN_XER,r6 + ld r7,_DAR(r1) + mtdar r7 + ld r8,_DSISR(r1) + mtdsisr r8 + + addi r1,r1,RTAS_FRAME_SIZE /* Unstack our frame */ + ld r0,16(r1) /* get return address */ + + mtlr r0 + blr /* return to caller */ + +#endif /* CONFIG_PPC_RTAS */ + +_GLOBAL(enter_prom) + mflr r0 + std r0,16(r1) + stdu r1,-PROM_FRAME_SIZE(r1) /* Save SP and create stack space */ + + /* Because PROM is running in 32b mode, it clobbers the high order half + * of all registers that it saves. We therefore save those registers + * PROM might touch to the stack. (r0, r3-r13 are caller saved) + */ + SAVE_GPR(2, r1) + SAVE_GPR(13, r1) + SAVE_8GPRS(14, r1) + SAVE_10GPRS(22, r1) + mfcr r10 + mfmsr r11 + std r10,_CCR(r1) + std r11,_MSR(r1) + + /* Get the PROM entrypoint */ + mtlr r4 + + /* Switch MSR to 32 bits mode + */ +#ifdef CONFIG_PPC_BOOK3E + rlwinm r11,r11,0,1,31 + mtmsr r11 +#else /* CONFIG_PPC_BOOK3E */ + mfmsr r11 + li r12,1 + rldicr r12,r12,MSR_SF_LG,(63-MSR_SF_LG) + andc r11,r11,r12 + li r12,1 + rldicr r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG) + andc r11,r11,r12 + mtmsrd r11 +#endif /* CONFIG_PPC_BOOK3E */ + isync + + /* Enter PROM here... */ + blrl + + /* Just make sure that r1 top 32 bits didn't get + * corrupt by OF + */ + rldicl r1,r1,0,32 + + /* Restore the MSR (back to 64 bits) */ + ld r0,_MSR(r1) + MTMSRD(r0) + isync + + /* Restore other registers */ + REST_GPR(2, r1) + REST_GPR(13, r1) + REST_8GPRS(14, r1) + REST_10GPRS(22, r1) + ld r4,_CCR(r1) + mtcr r4 + + addi r1,r1,PROM_FRAME_SIZE + ld r0,16(r1) + mtlr r0 + blr + +#ifdef CONFIG_FUNCTION_TRACER +#ifdef CONFIG_DYNAMIC_FTRACE +_GLOBAL(mcount) +_GLOBAL(_mcount) + blr + +_GLOBAL(ftrace_caller) + /* Taken from output of objdump from lib64/glibc */ + mflr r3 + ld r11, 0(r1) + stdu r1, -112(r1) + std r3, 128(r1) + ld r4, 16(r11) + subi r3, r3, MCOUNT_INSN_SIZE +.globl ftrace_call +ftrace_call: + bl ftrace_stub + nop +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +.globl ftrace_graph_call +ftrace_graph_call: + b ftrace_graph_stub +_GLOBAL(ftrace_graph_stub) +#endif + ld r0, 128(r1) + mtlr r0 + addi r1, r1, 112 +_GLOBAL(ftrace_stub) + blr +#else +_GLOBAL(mcount) + blr + +_GLOBAL(_mcount) + /* Taken from output of objdump from lib64/glibc */ + mflr r3 + ld r11, 0(r1) + stdu r1, -112(r1) + std r3, 128(r1) + ld r4, 16(r11) + + subi r3, r3, MCOUNT_INSN_SIZE + LOAD_REG_ADDR(r5,ftrace_trace_function) + ld r5,0(r5) + ld r5,0(r5) + mtctr r5 + bctrl + nop + + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + b ftrace_graph_caller +#endif + ld r0, 128(r1) + mtlr r0 + addi r1, r1, 112 +_GLOBAL(ftrace_stub) + blr + +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +_GLOBAL(ftrace_graph_caller) + /* load r4 with local address */ + ld r4, 128(r1) + subi r4, r4, MCOUNT_INSN_SIZE + + /* get the parent address */ + ld r11, 112(r1) + addi r3, r11, 16 + + bl .prepare_ftrace_return + nop + + ld r0, 128(r1) + mtlr r0 + addi r1, r1, 112 + blr + +_GLOBAL(return_to_handler) + /* need to save return values */ + std r4, -24(r1) + std r3, -16(r1) + std r31, -8(r1) + mr r31, r1 + stdu r1, -112(r1) + + bl .ftrace_return_to_handler + nop + + /* return value has real return address */ + mtlr r3 + + ld r1, 0(r1) + ld r4, -24(r1) + ld r3, -16(r1) + ld r31, -8(r1) + + /* Jump back to real return address */ + blr + +_GLOBAL(mod_return_to_handler) + /* need to save return values */ + std r4, -32(r1) + std r3, -24(r1) + /* save TOC */ + std r2, -16(r1) + std r31, -8(r1) + mr r31, r1 + stdu r1, -112(r1) + + /* + * We are in a module using the module's TOC. + * Switch to our TOC to run inside the core kernel. + */ + ld r2, PACATOC(r13) + + bl .ftrace_return_to_handler + nop + + /* return value has real return address */ + mtlr r3 + + ld r1, 0(r1) + ld r4, -32(r1) + ld r3, -24(r1) + ld r2, -16(r1) + ld r31, -8(r1) + + /* Jump back to real return address */ + blr +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +#endif /* CONFIG_FUNCTION_TRACER */ diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S new file mode 100644 index 00000000..7215cc24 --- /dev/null +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -0,0 +1,1326 @@ +/* + * Boot code and exception vectors for Book3E processors + * + * Copyright (C) 2007 Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/threads.h> +#include <asm/reg.h> +#include <asm/page.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/cputable.h> +#include <asm/setup.h> +#include <asm/thread_info.h> +#include <asm/reg_a2.h> +#include <asm/exception-64e.h> +#include <asm/bug.h> +#include <asm/irqflags.h> +#include <asm/ptrace.h> +#include <asm/ppc-opcode.h> +#include <asm/mmu.h> +#include <asm/hw_irq.h> + +/* XXX This will ultimately add space for a special exception save + * structure used to save things like SRR0/SRR1, SPRGs, MAS, etc... + * when taking special interrupts. For now we don't support that, + * special interrupts from within a non-standard level will probably + * blow you up + */ +#define SPECIAL_EXC_FRAME_SIZE INT_FRAME_SIZE + +/* Exception prolog code for all exceptions */ +#define EXCEPTION_PROLOG(n, type, addition) \ + mtspr SPRN_SPRG_##type##_SCRATCH,r13; /* get spare registers */ \ + mfspr r13,SPRN_SPRG_PACA; /* get PACA */ \ + std r10,PACA_EX##type+EX_R10(r13); \ + std r11,PACA_EX##type+EX_R11(r13); \ + mfcr r10; /* save CR */ \ + addition; /* additional code for that exc. */ \ + std r1,PACA_EX##type+EX_R1(r13); /* save old r1 in the PACA */ \ + stw r10,PACA_EX##type+EX_CR(r13); /* save old CR in the PACA */ \ + mfspr r11,SPRN_##type##_SRR1;/* what are we coming from */ \ + type##_SET_KSTACK; /* get special stack if necessary */\ + andi. r10,r11,MSR_PR; /* save stack pointer */ \ + beq 1f; /* branch around if supervisor */ \ + ld r1,PACAKSAVE(r13); /* get kernel stack coming from usr */\ +1: cmpdi cr1,r1,0; /* check if SP makes sense */ \ + bge- cr1,exc_##n##_bad_stack;/* bad stack (TODO: out of line) */ \ + mfspr r10,SPRN_##type##_SRR0; /* read SRR0 before touching stack */ + +/* Exception type-specific macros */ +#define GEN_SET_KSTACK \ + subi r1,r1,INT_FRAME_SIZE; /* alloc frame on kernel stack */ +#define SPRN_GEN_SRR0 SPRN_SRR0 +#define SPRN_GEN_SRR1 SPRN_SRR1 + +#define CRIT_SET_KSTACK \ + ld r1,PACA_CRIT_STACK(r13); \ + subi r1,r1,SPECIAL_EXC_FRAME_SIZE; +#define SPRN_CRIT_SRR0 SPRN_CSRR0 +#define SPRN_CRIT_SRR1 SPRN_CSRR1 + +#define DBG_SET_KSTACK \ + ld r1,PACA_DBG_STACK(r13); \ + subi r1,r1,SPECIAL_EXC_FRAME_SIZE; +#define SPRN_DBG_SRR0 SPRN_DSRR0 +#define SPRN_DBG_SRR1 SPRN_DSRR1 + +#define MC_SET_KSTACK \ + ld r1,PACA_MC_STACK(r13); \ + subi r1,r1,SPECIAL_EXC_FRAME_SIZE; +#define SPRN_MC_SRR0 SPRN_MCSRR0 +#define SPRN_MC_SRR1 SPRN_MCSRR1 + +#define NORMAL_EXCEPTION_PROLOG(n, addition) \ + EXCEPTION_PROLOG(n, GEN, addition##_GEN(n)) + +#define CRIT_EXCEPTION_PROLOG(n, addition) \ + EXCEPTION_PROLOG(n, CRIT, addition##_CRIT(n)) + +#define DBG_EXCEPTION_PROLOG(n, addition) \ + EXCEPTION_PROLOG(n, DBG, addition##_DBG(n)) + +#define MC_EXCEPTION_PROLOG(n, addition) \ + EXCEPTION_PROLOG(n, MC, addition##_MC(n)) + + +/* Variants of the "addition" argument for the prolog + */ +#define PROLOG_ADDITION_NONE_GEN(n) +#define PROLOG_ADDITION_NONE_CRIT(n) +#define PROLOG_ADDITION_NONE_DBG(n) +#define PROLOG_ADDITION_NONE_MC(n) + +#define PROLOG_ADDITION_MASKABLE_GEN(n) \ + lbz r11,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */ \ + cmpwi cr0,r11,0; /* yes -> go out of line */ \ + beq masked_interrupt_book3e_##n + +#define PROLOG_ADDITION_2REGS_GEN(n) \ + std r14,PACA_EXGEN+EX_R14(r13); \ + std r15,PACA_EXGEN+EX_R15(r13) + +#define PROLOG_ADDITION_1REG_GEN(n) \ + std r14,PACA_EXGEN+EX_R14(r13); + +#define PROLOG_ADDITION_2REGS_CRIT(n) \ + std r14,PACA_EXCRIT+EX_R14(r13); \ + std r15,PACA_EXCRIT+EX_R15(r13) + +#define PROLOG_ADDITION_2REGS_DBG(n) \ + std r14,PACA_EXDBG+EX_R14(r13); \ + std r15,PACA_EXDBG+EX_R15(r13) + +#define PROLOG_ADDITION_2REGS_MC(n) \ + std r14,PACA_EXMC+EX_R14(r13); \ + std r15,PACA_EXMC+EX_R15(r13) + + +/* Core exception code for all exceptions except TLB misses. + * XXX: Needs to make SPRN_SPRG_GEN depend on exception type + */ +#define EXCEPTION_COMMON(n, excf, ints) \ +exc_##n##_common: \ + std r0,GPR0(r1); /* save r0 in stackframe */ \ + std r2,GPR2(r1); /* save r2 in stackframe */ \ + SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ + SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \ + std r9,GPR9(r1); /* save r9 in stackframe */ \ + std r10,_NIP(r1); /* save SRR0 to stackframe */ \ + std r11,_MSR(r1); /* save SRR1 to stackframe */ \ + ACCOUNT_CPU_USER_ENTRY(r10,r11);/* accounting (uses cr0+eq) */ \ + ld r3,excf+EX_R10(r13); /* get back r10 */ \ + ld r4,excf+EX_R11(r13); /* get back r11 */ \ + mfspr r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 */ \ + std r12,GPR12(r1); /* save r12 in stackframe */ \ + ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \ + mflr r6; /* save LR in stackframe */ \ + mfctr r7; /* save CTR in stackframe */ \ + mfspr r8,SPRN_XER; /* save XER in stackframe */ \ + ld r9,excf+EX_R1(r13); /* load orig r1 back from PACA */ \ + lwz r10,excf+EX_CR(r13); /* load orig CR back from PACA */ \ + lbz r11,PACASOFTIRQEN(r13); /* get current IRQ softe */ \ + ld r12,exception_marker@toc(r2); \ + li r0,0; \ + std r3,GPR10(r1); /* save r10 to stackframe */ \ + std r4,GPR11(r1); /* save r11 to stackframe */ \ + std r5,GPR13(r1); /* save it to stackframe */ \ + std r6,_LINK(r1); \ + std r7,_CTR(r1); \ + std r8,_XER(r1); \ + li r3,(n)+1; /* indicate partial regs in trap */ \ + std r9,0(r1); /* store stack frame back link */ \ + std r10,_CCR(r1); /* store orig CR in stackframe */ \ + std r9,GPR1(r1); /* store stack frame back link */ \ + std r11,SOFTE(r1); /* and save it to stackframe */ \ + std r12,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ \ + std r3,_TRAP(r1); /* set trap number */ \ + std r0,RESULT(r1); /* clear regs->result */ \ + ints; + +/* Variants for the "ints" argument. This one does nothing when we want + * to keep interrupts in their original state + */ +#define INTS_KEEP + +/* This second version is meant for exceptions that don't immediately + * hard-enable. We set a bit in paca->irq_happened to ensure that + * a subsequent call to arch_local_irq_restore() will properly + * hard-enable and avoid the fast-path + */ +#define INTS_DISABLE SOFT_DISABLE_INTS(r3,r4) + +/* This is called by exceptions that used INTS_KEEP (that did not touch + * irq indicators in the PACA). This will restore MSR:EE to it's previous + * value + * + * XXX In the long run, we may want to open-code it in order to separate the + * load from the wrtee, thus limiting the latency caused by the dependency + * but at this point, I'll favor code clarity until we have a near to final + * implementation + */ +#define INTS_RESTORE_HARD \ + ld r11,_MSR(r1); \ + wrtee r11; + +/* XXX FIXME: Restore r14/r15 when necessary */ +#define BAD_STACK_TRAMPOLINE(n) \ +exc_##n##_bad_stack: \ + li r1,(n); /* get exception number */ \ + sth r1,PACA_TRAP_SAVE(r13); /* store trap */ \ + b bad_stack_book3e; /* bad stack error */ + +/* WARNING: If you change the layout of this stub, make sure you chcek + * the debug exception handler which handles single stepping + * into exceptions from userspace, and the MM code in + * arch/powerpc/mm/tlb_nohash.c which patches the branch here + * and would need to be updated if that branch is moved + */ +#define EXCEPTION_STUB(loc, label) \ + . = interrupt_base_book3e + loc; \ + nop; /* To make debug interrupts happy */ \ + b exc_##label##_book3e; + +#define ACK_NONE(r) +#define ACK_DEC(r) \ + lis r,TSR_DIS@h; \ + mtspr SPRN_TSR,r +#define ACK_FIT(r) \ + lis r,TSR_FIS@h; \ + mtspr SPRN_TSR,r + +/* Used by asynchronous interrupt that may happen in the idle loop. + * + * This check if the thread was in the idle loop, and if yes, returns + * to the caller rather than the PC. This is to avoid a race if + * interrupts happen before the wait instruction. + */ +#define CHECK_NAPPING() \ + clrrdi r11,r1,THREAD_SHIFT; \ + ld r10,TI_LOCAL_FLAGS(r11); \ + andi. r9,r10,_TLF_NAPPING; \ + beq+ 1f; \ + ld r8,_LINK(r1); \ + rlwinm r7,r10,0,~_TLF_NAPPING; \ + std r8,_NIP(r1); \ + std r7,TI_LOCAL_FLAGS(r11); \ +1: + + +#define MASKABLE_EXCEPTION(trapnum, label, hdlr, ack) \ + START_EXCEPTION(label); \ + NORMAL_EXCEPTION_PROLOG(trapnum, PROLOG_ADDITION_MASKABLE) \ + EXCEPTION_COMMON(trapnum, PACA_EXGEN, INTS_DISABLE) \ + ack(r8); \ + CHECK_NAPPING(); \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + bl hdlr; \ + b .ret_from_except_lite; + +/* This value is used to mark exception frames on the stack. */ + .section ".toc","aw" +exception_marker: + .tc ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER + + +/* + * And here we have the exception vectors ! + */ + + .text + .balign 0x1000 + .globl interrupt_base_book3e +interrupt_base_book3e: /* fake trap */ + EXCEPTION_STUB(0x000, machine_check) /* 0x0200 */ + EXCEPTION_STUB(0x020, critical_input) /* 0x0580 */ + EXCEPTION_STUB(0x040, debug_crit) /* 0x0d00 */ + EXCEPTION_STUB(0x060, data_storage) /* 0x0300 */ + EXCEPTION_STUB(0x080, instruction_storage) /* 0x0400 */ + EXCEPTION_STUB(0x0a0, external_input) /* 0x0500 */ + EXCEPTION_STUB(0x0c0, alignment) /* 0x0600 */ + EXCEPTION_STUB(0x0e0, program) /* 0x0700 */ + EXCEPTION_STUB(0x100, fp_unavailable) /* 0x0800 */ + EXCEPTION_STUB(0x120, system_call) /* 0x0c00 */ + EXCEPTION_STUB(0x140, ap_unavailable) /* 0x0f20 */ + EXCEPTION_STUB(0x160, decrementer) /* 0x0900 */ + EXCEPTION_STUB(0x180, fixed_interval) /* 0x0980 */ + EXCEPTION_STUB(0x1a0, watchdog) /* 0x09f0 */ + EXCEPTION_STUB(0x1c0, data_tlb_miss) + EXCEPTION_STUB(0x1e0, instruction_tlb_miss) + EXCEPTION_STUB(0x260, perfmon) + EXCEPTION_STUB(0x280, doorbell) + EXCEPTION_STUB(0x2a0, doorbell_crit) + EXCEPTION_STUB(0x2c0, guest_doorbell) + EXCEPTION_STUB(0x2e0, guest_doorbell_crit) + EXCEPTION_STUB(0x300, hypercall) + EXCEPTION_STUB(0x320, ehpriv) + + .globl interrupt_end_book3e +interrupt_end_book3e: + +/* Critical Input Interrupt */ + START_EXCEPTION(critical_input); + CRIT_EXCEPTION_PROLOG(0x100, PROLOG_ADDITION_NONE) +// EXCEPTION_COMMON(0x100, PACA_EXCRIT, INTS_DISABLE) +// bl special_reg_save_crit +// CHECK_NAPPING(); +// addi r3,r1,STACK_FRAME_OVERHEAD +// bl .critical_exception +// b ret_from_crit_except + b . + +/* Machine Check Interrupt */ + START_EXCEPTION(machine_check); + CRIT_EXCEPTION_PROLOG(0x200, PROLOG_ADDITION_NONE) +// EXCEPTION_COMMON(0x200, PACA_EXMC, INTS_DISABLE) +// bl special_reg_save_mc +// addi r3,r1,STACK_FRAME_OVERHEAD +// CHECK_NAPPING(); +// bl .machine_check_exception +// b ret_from_mc_except + b . + +/* Data Storage Interrupt */ + START_EXCEPTION(data_storage) + NORMAL_EXCEPTION_PROLOG(0x300, PROLOG_ADDITION_2REGS) + mfspr r14,SPRN_DEAR + mfspr r15,SPRN_ESR + EXCEPTION_COMMON(0x300, PACA_EXGEN, INTS_DISABLE) + b storage_fault_common + +/* Instruction Storage Interrupt */ + START_EXCEPTION(instruction_storage); + NORMAL_EXCEPTION_PROLOG(0x400, PROLOG_ADDITION_2REGS) + li r15,0 + mr r14,r10 + EXCEPTION_COMMON(0x400, PACA_EXGEN, INTS_DISABLE) + b storage_fault_common + +/* External Input Interrupt */ + MASKABLE_EXCEPTION(0x500, external_input, .do_IRQ, ACK_NONE) + +/* Alignment */ + START_EXCEPTION(alignment); + NORMAL_EXCEPTION_PROLOG(0x600, PROLOG_ADDITION_2REGS) + mfspr r14,SPRN_DEAR + mfspr r15,SPRN_ESR + EXCEPTION_COMMON(0x600, PACA_EXGEN, INTS_KEEP) + b alignment_more /* no room, go out of line */ + +/* Program Interrupt */ + START_EXCEPTION(program); + NORMAL_EXCEPTION_PROLOG(0x700, PROLOG_ADDITION_1REG) + mfspr r14,SPRN_ESR + EXCEPTION_COMMON(0x700, PACA_EXGEN, INTS_DISABLE) + std r14,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + ld r14,PACA_EXGEN+EX_R14(r13) + bl .save_nvgprs + bl .program_check_exception + b .ret_from_except + +/* Floating Point Unavailable Interrupt */ + START_EXCEPTION(fp_unavailable); + NORMAL_EXCEPTION_PROLOG(0x800, PROLOG_ADDITION_NONE) + /* we can probably do a shorter exception entry for that one... */ + EXCEPTION_COMMON(0x800, PACA_EXGEN, INTS_KEEP) + ld r12,_MSR(r1) + andi. r0,r12,MSR_PR; + beq- 1f + bl .load_up_fpu + b fast_exception_return +1: INTS_DISABLE + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl .kernel_fp_unavailable_exception + b .ret_from_except + +/* Decrementer Interrupt */ + MASKABLE_EXCEPTION(0x900, decrementer, .timer_interrupt, ACK_DEC) + +/* Fixed Interval Timer Interrupt */ + MASKABLE_EXCEPTION(0x980, fixed_interval, .unknown_exception, ACK_FIT) + +/* Watchdog Timer Interrupt */ + START_EXCEPTION(watchdog); + CRIT_EXCEPTION_PROLOG(0x9f0, PROLOG_ADDITION_NONE) +// EXCEPTION_COMMON(0x9f0, PACA_EXCRIT, INTS_DISABLE) +// bl special_reg_save_crit +// CHECK_NAPPING(); +// addi r3,r1,STACK_FRAME_OVERHEAD +// bl .unknown_exception +// b ret_from_crit_except + b . + +/* System Call Interrupt */ + START_EXCEPTION(system_call) + mr r9,r13 /* keep a copy of userland r13 */ + mfspr r11,SPRN_SRR0 /* get return address */ + mfspr r12,SPRN_SRR1 /* get previous MSR */ + mfspr r13,SPRN_SPRG_PACA /* get our PACA */ + b system_call_common + +/* Auxiliary Processor Unavailable Interrupt */ + START_EXCEPTION(ap_unavailable); + NORMAL_EXCEPTION_PROLOG(0xf20, PROLOG_ADDITION_NONE) + EXCEPTION_COMMON(0xf20, PACA_EXGEN, INTS_DISABLE) + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl .unknown_exception + b .ret_from_except + +/* Debug exception as a critical interrupt*/ + START_EXCEPTION(debug_crit); + CRIT_EXCEPTION_PROLOG(0xd00, PROLOG_ADDITION_2REGS) + + /* + * If there is a single step or branch-taken exception in an + * exception entry sequence, it was probably meant to apply to + * the code where the exception occurred (since exception entry + * doesn't turn off DE automatically). We simulate the effect + * of turning off DE on entry to an exception handler by turning + * off DE in the CSRR1 value and clearing the debug status. + */ + + mfspr r14,SPRN_DBSR /* check single-step/branch taken */ + andis. r15,r14,DBSR_IC@h + beq+ 1f + + LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e) + LOAD_REG_IMMEDIATE(r15,interrupt_end_book3e) + cmpld cr0,r10,r14 + cmpld cr1,r10,r15 + blt+ cr0,1f + bge+ cr1,1f + + /* here it looks like we got an inappropriate debug exception. */ + lis r14,DBSR_IC@h /* clear the IC event */ + rlwinm r11,r11,0,~MSR_DE /* clear DE in the CSRR1 value */ + mtspr SPRN_DBSR,r14 + mtspr SPRN_CSRR1,r11 + lwz r10,PACA_EXCRIT+EX_CR(r13) /* restore registers */ + ld r1,PACA_EXCRIT+EX_R1(r13) + ld r14,PACA_EXCRIT+EX_R14(r13) + ld r15,PACA_EXCRIT+EX_R15(r13) + mtcr r10 + ld r10,PACA_EXCRIT+EX_R10(r13) /* restore registers */ + ld r11,PACA_EXCRIT+EX_R11(r13) + mfspr r13,SPRN_SPRG_CRIT_SCRATCH + rfci + + /* Normal debug exception */ + /* XXX We only handle coming from userspace for now since we can't + * quite save properly an interrupted kernel state yet + */ +1: andi. r14,r11,MSR_PR; /* check for userspace again */ + beq kernel_dbg_exc; /* if from kernel mode */ + + /* Now we mash up things to make it look like we are coming on a + * normal exception + */ + mfspr r15,SPRN_SPRG_CRIT_SCRATCH + mtspr SPRN_SPRG_GEN_SCRATCH,r15 + mfspr r14,SPRN_DBSR + EXCEPTION_COMMON(0xd00, PACA_EXCRIT, INTS_DISABLE) + std r14,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + mr r4,r14 + ld r14,PACA_EXCRIT+EX_R14(r13) + ld r15,PACA_EXCRIT+EX_R15(r13) + bl .save_nvgprs + bl .DebugException + b .ret_from_except + +kernel_dbg_exc: + b . /* NYI */ + +/* Debug exception as a debug interrupt*/ + START_EXCEPTION(debug_debug); + DBG_EXCEPTION_PROLOG(0xd08, PROLOG_ADDITION_2REGS) + + /* + * If there is a single step or branch-taken exception in an + * exception entry sequence, it was probably meant to apply to + * the code where the exception occurred (since exception entry + * doesn't turn off DE automatically). We simulate the effect + * of turning off DE on entry to an exception handler by turning + * off DE in the DSRR1 value and clearing the debug status. + */ + + mfspr r14,SPRN_DBSR /* check single-step/branch taken */ + andis. r15,r14,DBSR_IC@h + beq+ 1f + + LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e) + LOAD_REG_IMMEDIATE(r15,interrupt_end_book3e) + cmpld cr0,r10,r14 + cmpld cr1,r10,r15 + blt+ cr0,1f + bge+ cr1,1f + + /* here it looks like we got an inappropriate debug exception. */ + lis r14,DBSR_IC@h /* clear the IC event */ + rlwinm r11,r11,0,~MSR_DE /* clear DE in the DSRR1 value */ + mtspr SPRN_DBSR,r14 + mtspr SPRN_DSRR1,r11 + lwz r10,PACA_EXDBG+EX_CR(r13) /* restore registers */ + ld r1,PACA_EXDBG+EX_R1(r13) + ld r14,PACA_EXDBG+EX_R14(r13) + ld r15,PACA_EXDBG+EX_R15(r13) + mtcr r10 + ld r10,PACA_EXDBG+EX_R10(r13) /* restore registers */ + ld r11,PACA_EXDBG+EX_R11(r13) + mfspr r13,SPRN_SPRG_DBG_SCRATCH + rfdi + + /* Normal debug exception */ + /* XXX We only handle coming from userspace for now since we can't + * quite save properly an interrupted kernel state yet + */ +1: andi. r14,r11,MSR_PR; /* check for userspace again */ + beq kernel_dbg_exc; /* if from kernel mode */ + + /* Now we mash up things to make it look like we are coming on a + * normal exception + */ + mfspr r15,SPRN_SPRG_DBG_SCRATCH + mtspr SPRN_SPRG_GEN_SCRATCH,r15 + mfspr r14,SPRN_DBSR + EXCEPTION_COMMON(0xd08, PACA_EXDBG, INTS_DISABLE) + std r14,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + mr r4,r14 + ld r14,PACA_EXDBG+EX_R14(r13) + ld r15,PACA_EXDBG+EX_R15(r13) + bl .save_nvgprs + bl .DebugException + b .ret_from_except + + START_EXCEPTION(perfmon); + NORMAL_EXCEPTION_PROLOG(0x260, PROLOG_ADDITION_NONE) + EXCEPTION_COMMON(0x260, PACA_EXGEN, INTS_DISABLE) + addi r3,r1,STACK_FRAME_OVERHEAD + bl .performance_monitor_exception + b .ret_from_except_lite + +/* Doorbell interrupt */ + MASKABLE_EXCEPTION(0x280, doorbell, .doorbell_exception, ACK_NONE) + +/* Doorbell critical Interrupt */ + START_EXCEPTION(doorbell_crit); + CRIT_EXCEPTION_PROLOG(0x2a0, PROLOG_ADDITION_NONE) +// EXCEPTION_COMMON(0x2a0, PACA_EXCRIT, INTS_DISABLE) +// bl special_reg_save_crit +// CHECK_NAPPING(); +// addi r3,r1,STACK_FRAME_OVERHEAD +// bl .doorbell_critical_exception +// b ret_from_crit_except + b . + +/* Guest Doorbell */ + MASKABLE_EXCEPTION(0x2c0, guest_doorbell, .unknown_exception, ACK_NONE) + +/* Guest Doorbell critical Interrupt */ + START_EXCEPTION(guest_doorbell_crit); + CRIT_EXCEPTION_PROLOG(0x2e0, PROLOG_ADDITION_NONE) +// EXCEPTION_COMMON(0x2e0, PACA_EXCRIT, INTS_DISABLE) +// bl special_reg_save_crit +// CHECK_NAPPING(); +// addi r3,r1,STACK_FRAME_OVERHEAD +// bl .guest_doorbell_critical_exception +// b ret_from_crit_except + b . + +/* Hypervisor call */ + START_EXCEPTION(hypercall); + NORMAL_EXCEPTION_PROLOG(0x310, PROLOG_ADDITION_NONE) + EXCEPTION_COMMON(0x310, PACA_EXGEN, INTS_KEEP) + addi r3,r1,STACK_FRAME_OVERHEAD + bl .save_nvgprs + INTS_RESTORE_HARD + bl .unknown_exception + b .ret_from_except + +/* Embedded Hypervisor priviledged */ + START_EXCEPTION(ehpriv); + NORMAL_EXCEPTION_PROLOG(0x320, PROLOG_ADDITION_NONE) + EXCEPTION_COMMON(0x320, PACA_EXGEN, INTS_KEEP) + addi r3,r1,STACK_FRAME_OVERHEAD + bl .save_nvgprs + INTS_RESTORE_HARD + bl .unknown_exception + b .ret_from_except + +/* + * An interrupt came in while soft-disabled; We mark paca->irq_happened + * accordingly and if the interrupt is level sensitive, we hard disable + */ + +masked_interrupt_book3e_0x500: + /* XXX When adding support for EPR, use PACA_IRQ_EE_EDGE */ + li r11,PACA_IRQ_EE + b masked_interrupt_book3e_full_mask + +masked_interrupt_book3e_0x900: + ACK_DEC(r11); + li r11,PACA_IRQ_DEC + b masked_interrupt_book3e_no_mask +masked_interrupt_book3e_0x980: + ACK_FIT(r11); + li r11,PACA_IRQ_DEC + b masked_interrupt_book3e_no_mask +masked_interrupt_book3e_0x280: +masked_interrupt_book3e_0x2c0: + li r11,PACA_IRQ_DBELL + b masked_interrupt_book3e_no_mask + +masked_interrupt_book3e_no_mask: + mtcr r10 + lbz r10,PACAIRQHAPPENED(r13) + or r10,r10,r11 + stb r10,PACAIRQHAPPENED(r13) + b 1f +masked_interrupt_book3e_full_mask: + mtcr r10 + lbz r10,PACAIRQHAPPENED(r13) + or r10,r10,r11 + stb r10,PACAIRQHAPPENED(r13) + mfspr r10,SPRN_SRR1 + rldicl r11,r10,48,1 /* clear MSR_EE */ + rotldi r10,r11,16 + mtspr SPRN_SRR1,r10 +1: ld r10,PACA_EXGEN+EX_R10(r13); + ld r11,PACA_EXGEN+EX_R11(r13); + mfspr r13,SPRN_SPRG_GEN_SCRATCH; + rfi + b . +/* + * Called from arch_local_irq_enable when an interrupt needs + * to be resent. r3 contains either 0x500,0x900,0x260 or 0x280 + * to indicate the kind of interrupt. MSR:EE is already off. + * We generate a stackframe like if a real interrupt had happened. + * + * Note: While MSR:EE is off, we need to make sure that _MSR + * in the generated frame has EE set to 1 or the exception + * handler will not properly re-enable them. + */ +_GLOBAL(__replay_interrupt) + /* We are going to jump to the exception common code which + * will retrieve various register values from the PACA which + * we don't give a damn about. + */ + mflr r10 + mfmsr r11 + mfcr r4 + mtspr SPRN_SPRG_GEN_SCRATCH,r13; + std r1,PACA_EXGEN+EX_R1(r13); + stw r4,PACA_EXGEN+EX_CR(r13); + ori r11,r11,MSR_EE + subi r1,r1,INT_FRAME_SIZE; + cmpwi cr0,r3,0x500 + beq exc_0x500_common + cmpwi cr0,r3,0x900 + beq exc_0x900_common + cmpwi cr0,r3,0x280 + beq exc_0x280_common + blr + + +/* + * This is called from 0x300 and 0x400 handlers after the prologs with + * r14 and r15 containing the fault address and error code, with the + * original values stashed away in the PACA + */ +storage_fault_common: + std r14,_DAR(r1) + std r15,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + mr r4,r14 + mr r5,r15 + ld r14,PACA_EXGEN+EX_R14(r13) + ld r15,PACA_EXGEN+EX_R15(r13) + bl .do_page_fault + cmpdi r3,0 + bne- 1f + b .ret_from_except_lite +1: bl .save_nvgprs + mr r5,r3 + addi r3,r1,STACK_FRAME_OVERHEAD + ld r4,_DAR(r1) + bl .bad_page_fault + b .ret_from_except + +/* + * Alignment exception doesn't fit entirely in the 0x100 bytes so it + * continues here. + */ +alignment_more: + std r14,_DAR(r1) + std r15,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + ld r14,PACA_EXGEN+EX_R14(r13) + ld r15,PACA_EXGEN+EX_R15(r13) + bl .save_nvgprs + INTS_RESTORE_HARD + bl .alignment_exception + b .ret_from_except + +/* + * We branch here from entry_64.S for the last stage of the exception + * return code path. MSR:EE is expected to be off at that point + */ +_GLOBAL(exception_return_book3e) + b 1f + +/* This is the return from load_up_fpu fast path which could do with + * less GPR restores in fact, but for now we have a single return path + */ + .globl fast_exception_return +fast_exception_return: + wrteei 0 +1: mr r0,r13 + ld r10,_MSR(r1) + REST_4GPRS(2, r1) + andi. r6,r10,MSR_PR + REST_2GPRS(6, r1) + beq 1f + ACCOUNT_CPU_USER_EXIT(r10, r11) + ld r0,GPR13(r1) + +1: stdcx. r0,0,r1 /* to clear the reservation */ + + ld r8,_CCR(r1) + ld r9,_LINK(r1) + ld r10,_CTR(r1) + ld r11,_XER(r1) + mtcr r8 + mtlr r9 + mtctr r10 + mtxer r11 + REST_2GPRS(8, r1) + ld r10,GPR10(r1) + ld r11,GPR11(r1) + ld r12,GPR12(r1) + mtspr SPRN_SPRG_GEN_SCRATCH,r0 + + std r10,PACA_EXGEN+EX_R10(r13); + std r11,PACA_EXGEN+EX_R11(r13); + ld r10,_NIP(r1) + ld r11,_MSR(r1) + ld r0,GPR0(r1) + ld r1,GPR1(r1) + mtspr SPRN_SRR0,r10 + mtspr SPRN_SRR1,r11 + ld r10,PACA_EXGEN+EX_R10(r13) + ld r11,PACA_EXGEN+EX_R11(r13) + mfspr r13,SPRN_SPRG_GEN_SCRATCH + rfi + +/* + * Trampolines used when spotting a bad kernel stack pointer in + * the exception entry code. + * + * TODO: move some bits like SRR0 read to trampoline, pass PACA + * index around, etc... to handle crit & mcheck + */ +BAD_STACK_TRAMPOLINE(0x000) +BAD_STACK_TRAMPOLINE(0x100) +BAD_STACK_TRAMPOLINE(0x200) +BAD_STACK_TRAMPOLINE(0x260) +BAD_STACK_TRAMPOLINE(0x280) +BAD_STACK_TRAMPOLINE(0x2a0) +BAD_STACK_TRAMPOLINE(0x2c0) +BAD_STACK_TRAMPOLINE(0x2e0) +BAD_STACK_TRAMPOLINE(0x300) +BAD_STACK_TRAMPOLINE(0x310) +BAD_STACK_TRAMPOLINE(0x320) +BAD_STACK_TRAMPOLINE(0x400) +BAD_STACK_TRAMPOLINE(0x500) +BAD_STACK_TRAMPOLINE(0x600) +BAD_STACK_TRAMPOLINE(0x700) +BAD_STACK_TRAMPOLINE(0x800) +BAD_STACK_TRAMPOLINE(0x900) +BAD_STACK_TRAMPOLINE(0x980) +BAD_STACK_TRAMPOLINE(0x9f0) +BAD_STACK_TRAMPOLINE(0xa00) +BAD_STACK_TRAMPOLINE(0xb00) +BAD_STACK_TRAMPOLINE(0xc00) +BAD_STACK_TRAMPOLINE(0xd00) +BAD_STACK_TRAMPOLINE(0xd08) +BAD_STACK_TRAMPOLINE(0xe00) +BAD_STACK_TRAMPOLINE(0xf00) +BAD_STACK_TRAMPOLINE(0xf20) + + .globl bad_stack_book3e +bad_stack_book3e: + /* XXX: Needs to make SPRN_SPRG_GEN depend on exception type */ + mfspr r10,SPRN_SRR0; /* read SRR0 before touching stack */ + ld r1,PACAEMERGSP(r13) + subi r1,r1,64+INT_FRAME_SIZE + std r10,_NIP(r1) + std r11,_MSR(r1) + ld r10,PACA_EXGEN+EX_R1(r13) /* FIXME for crit & mcheck */ + lwz r11,PACA_EXGEN+EX_CR(r13) /* FIXME for crit & mcheck */ + std r10,GPR1(r1) + std r11,_CCR(r1) + mfspr r10,SPRN_DEAR + mfspr r11,SPRN_ESR + std r10,_DAR(r1) + std r11,_DSISR(r1) + std r0,GPR0(r1); /* save r0 in stackframe */ \ + std r2,GPR2(r1); /* save r2 in stackframe */ \ + SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ + SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \ + std r9,GPR9(r1); /* save r9 in stackframe */ \ + ld r3,PACA_EXGEN+EX_R10(r13);/* get back r10 */ \ + ld r4,PACA_EXGEN+EX_R11(r13);/* get back r11 */ \ + mfspr r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 XXX can be wrong */ \ + std r3,GPR10(r1); /* save r10 to stackframe */ \ + std r4,GPR11(r1); /* save r11 to stackframe */ \ + std r12,GPR12(r1); /* save r12 in stackframe */ \ + std r5,GPR13(r1); /* save it to stackframe */ \ + mflr r10 + mfctr r11 + mfxer r12 + std r10,_LINK(r1) + std r11,_CTR(r1) + std r12,_XER(r1) + SAVE_10GPRS(14,r1) + SAVE_8GPRS(24,r1) + lhz r12,PACA_TRAP_SAVE(r13) + std r12,_TRAP(r1) + addi r11,r1,INT_FRAME_SIZE + std r11,0(r1) + li r12,0 + std r12,0(r11) + ld r2,PACATOC(r13) +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl .kernel_bad_stack + b 1b + +/* + * Setup the initial TLB for a core. This current implementation + * assume that whatever we are running off will not conflict with + * the new mapping at PAGE_OFFSET. + */ +_GLOBAL(initial_tlb_book3e) + + /* Look for the first TLB with IPROT set */ + mfspr r4,SPRN_TLB0CFG + andi. r3,r4,TLBnCFG_IPROT + lis r3,MAS0_TLBSEL(0)@h + bne found_iprot + + mfspr r4,SPRN_TLB1CFG + andi. r3,r4,TLBnCFG_IPROT + lis r3,MAS0_TLBSEL(1)@h + bne found_iprot + + mfspr r4,SPRN_TLB2CFG + andi. r3,r4,TLBnCFG_IPROT + lis r3,MAS0_TLBSEL(2)@h + bne found_iprot + + lis r3,MAS0_TLBSEL(3)@h + mfspr r4,SPRN_TLB3CFG + /* fall through */ + +found_iprot: + andi. r5,r4,TLBnCFG_HES + bne have_hes + + mflr r8 /* save LR */ +/* 1. Find the index of the entry we're executing in + * + * r3 = MAS0_TLBSEL (for the iprot array) + * r4 = SPRN_TLBnCFG + */ + bl invstr /* Find our address */ +invstr: mflr r6 /* Make it accessible */ + mfmsr r7 + rlwinm r5,r7,27,31,31 /* extract MSR[IS] */ + mfspr r7,SPRN_PID + slwi r7,r7,16 + or r7,r7,r5 + mtspr SPRN_MAS6,r7 + tlbsx 0,r6 /* search MSR[IS], SPID=PID */ + + mfspr r3,SPRN_MAS0 + rlwinm r5,r3,16,20,31 /* Extract MAS0(Entry) */ + + mfspr r7,SPRN_MAS1 /* Insure IPROT set */ + oris r7,r7,MAS1_IPROT@h + mtspr SPRN_MAS1,r7 + tlbwe + +/* 2. Invalidate all entries except the entry we're executing in + * + * r3 = MAS0 w/TLBSEL & ESEL for the entry we are running in + * r4 = SPRN_TLBnCFG + * r5 = ESEL of entry we are running in + */ + andi. r4,r4,TLBnCFG_N_ENTRY /* Extract # entries */ + li r6,0 /* Set Entry counter to 0 */ +1: mr r7,r3 /* Set MAS0(TLBSEL) */ + rlwimi r7,r6,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r6) */ + mtspr SPRN_MAS0,r7 + tlbre + mfspr r7,SPRN_MAS1 + rlwinm r7,r7,0,2,31 /* Clear MAS1 Valid and IPROT */ + cmpw r5,r6 + beq skpinv /* Dont update the current execution TLB */ + mtspr SPRN_MAS1,r7 + tlbwe + isync +skpinv: addi r6,r6,1 /* Increment */ + cmpw r6,r4 /* Are we done? */ + bne 1b /* If not, repeat */ + + /* Invalidate all TLBs */ + PPC_TLBILX_ALL(0,0) + sync + isync + +/* 3. Setup a temp mapping and jump to it + * + * r3 = MAS0 w/TLBSEL & ESEL for the entry we are running in + * r5 = ESEL of entry we are running in + */ + andi. r7,r5,0x1 /* Find an entry not used and is non-zero */ + addi r7,r7,0x1 + mr r4,r3 /* Set MAS0(TLBSEL) = 1 */ + mtspr SPRN_MAS0,r4 + tlbre + + rlwimi r4,r7,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r7) */ + mtspr SPRN_MAS0,r4 + + mfspr r7,SPRN_MAS1 + xori r6,r7,MAS1_TS /* Setup TMP mapping in the other Address space */ + mtspr SPRN_MAS1,r6 + + tlbwe + + mfmsr r6 + xori r6,r6,MSR_IS + mtspr SPRN_SRR1,r6 + bl 1f /* Find our address */ +1: mflr r6 + addi r6,r6,(2f - 1b) + mtspr SPRN_SRR0,r6 + rfi +2: + +/* 4. Clear out PIDs & Search info + * + * r3 = MAS0 w/TLBSEL & ESEL for the entry we started in + * r4 = MAS0 w/TLBSEL & ESEL for the temp mapping + * r5 = MAS3 + */ + li r6,0 + mtspr SPRN_MAS6,r6 + mtspr SPRN_PID,r6 + +/* 5. Invalidate mapping we started in + * + * r3 = MAS0 w/TLBSEL & ESEL for the entry we started in + * r4 = MAS0 w/TLBSEL & ESEL for the temp mapping + * r5 = MAS3 + */ + mtspr SPRN_MAS0,r3 + tlbre + mfspr r6,SPRN_MAS1 + rlwinm r6,r6,0,2,0 /* clear IPROT */ + mtspr SPRN_MAS1,r6 + tlbwe + + /* Invalidate TLB1 */ + PPC_TLBILX_ALL(0,0) + sync + isync + +/* The mapping only needs to be cache-coherent on SMP */ +#ifdef CONFIG_SMP +#define M_IF_SMP MAS2_M +#else +#define M_IF_SMP 0 +#endif + +/* 6. Setup KERNELBASE mapping in TLB[0] + * + * r3 = MAS0 w/TLBSEL & ESEL for the entry we started in + * r4 = MAS0 w/TLBSEL & ESEL for the temp mapping + * r5 = MAS3 + */ + rlwinm r3,r3,0,16,3 /* clear ESEL */ + mtspr SPRN_MAS0,r3 + lis r6,(MAS1_VALID|MAS1_IPROT)@h + ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_1GB))@l + mtspr SPRN_MAS1,r6 + + LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET | M_IF_SMP) + mtspr SPRN_MAS2,r6 + + rlwinm r5,r5,0,0,25 + ori r5,r5,MAS3_SR | MAS3_SW | MAS3_SX + mtspr SPRN_MAS3,r5 + li r5,-1 + rlwinm r5,r5,0,0,25 + + tlbwe + +/* 7. Jump to KERNELBASE mapping + * + * r4 = MAS0 w/TLBSEL & ESEL for the temp mapping + */ + /* Now we branch the new virtual address mapped by this entry */ + LOAD_REG_IMMEDIATE(r6,2f) + lis r7,MSR_KERNEL@h + ori r7,r7,MSR_KERNEL@l + mtspr SPRN_SRR0,r6 + mtspr SPRN_SRR1,r7 + rfi /* start execution out of TLB1[0] entry */ +2: + +/* 8. Clear out the temp mapping + * + * r4 = MAS0 w/TLBSEL & ESEL for the entry we are running in + */ + mtspr SPRN_MAS0,r4 + tlbre + mfspr r5,SPRN_MAS1 + rlwinm r5,r5,0,2,0 /* clear IPROT */ + mtspr SPRN_MAS1,r5 + tlbwe + + /* Invalidate TLB1 */ + PPC_TLBILX_ALL(0,0) + sync + isync + + /* We translate LR and return */ + tovirt(r8,r8) + mtlr r8 + blr + +have_hes: + /* Setup MAS 0,1,2,3 and 7 for tlbwe of a 1G entry that maps the + * kernel linear mapping. We also set MAS8 once for all here though + * that will have to be made dependent on whether we are running under + * a hypervisor I suppose. + */ + + /* BEWARE, MAGIC + * This code is called as an ordinary function on the boot CPU. But to + * avoid duplication, this code is also used in SCOM bringup of + * secondary CPUs. We read the code between the initial_tlb_code_start + * and initial_tlb_code_end labels one instruction at a time and RAM it + * into the new core via SCOM. That doesn't process branches, so there + * must be none between those two labels. It also means if this code + * ever takes any parameters, the SCOM code must also be updated to + * provide them. + */ + .globl a2_tlbinit_code_start +a2_tlbinit_code_start: + + ori r11,r3,MAS0_WQ_ALLWAYS + oris r11,r11,MAS0_ESEL(3)@h /* Use way 3: workaround A2 erratum 376 */ + mtspr SPRN_MAS0,r11 + lis r3,(MAS1_VALID | MAS1_IPROT)@h + ori r3,r3,BOOK3E_PAGESZ_1GB << MAS1_TSIZE_SHIFT + mtspr SPRN_MAS1,r3 + LOAD_REG_IMMEDIATE(r3, PAGE_OFFSET | MAS2_M) + mtspr SPRN_MAS2,r3 + li r3,MAS3_SR | MAS3_SW | MAS3_SX + mtspr SPRN_MAS7_MAS3,r3 + li r3,0 + mtspr SPRN_MAS8,r3 + + /* Write the TLB entry */ + tlbwe + + .globl a2_tlbinit_after_linear_map +a2_tlbinit_after_linear_map: + + /* Now we branch the new virtual address mapped by this entry */ + LOAD_REG_IMMEDIATE(r3,1f) + mtctr r3 + bctr + +1: /* We are now running at PAGE_OFFSET, clean the TLB of everything + * else (including IPROTed things left by firmware) + * r4 = TLBnCFG + * r3 = current address (more or less) + */ + + li r5,0 + mtspr SPRN_MAS6,r5 + tlbsx 0,r3 + + rlwinm r9,r4,0,TLBnCFG_N_ENTRY + rlwinm r10,r4,8,0xff + addi r10,r10,-1 /* Get inner loop mask */ + + li r3,1 + + mfspr r5,SPRN_MAS1 + rlwinm r5,r5,0,(~(MAS1_VALID|MAS1_IPROT)) + + mfspr r6,SPRN_MAS2 + rldicr r6,r6,0,51 /* Extract EPN */ + + mfspr r7,SPRN_MAS0 + rlwinm r7,r7,0,0xffff0fff /* Clear HES and WQ */ + + rlwinm r8,r7,16,0xfff /* Extract ESEL */ + +2: add r4,r3,r8 + and r4,r4,r10 + + rlwimi r7,r4,16,MAS0_ESEL_MASK + + mtspr SPRN_MAS0,r7 + mtspr SPRN_MAS1,r5 + mtspr SPRN_MAS2,r6 + tlbwe + + addi r3,r3,1 + and. r4,r3,r10 + + bne 3f + addis r6,r6,(1<<30)@h +3: + cmpw r3,r9 + blt 2b + + .globl a2_tlbinit_after_iprot_flush +a2_tlbinit_after_iprot_flush: + +#ifdef CONFIG_PPC_EARLY_DEBUG_WSP + /* Now establish early debug mappings if applicable */ + /* Restore the MAS0 we used for linear mapping load */ + mtspr SPRN_MAS0,r11 + + lis r3,(MAS1_VALID | MAS1_IPROT)@h + ori r3,r3,(BOOK3E_PAGESZ_4K << MAS1_TSIZE_SHIFT) + mtspr SPRN_MAS1,r3 + LOAD_REG_IMMEDIATE(r3, WSP_UART_VIRT | MAS2_I | MAS2_G) + mtspr SPRN_MAS2,r3 + LOAD_REG_IMMEDIATE(r3, WSP_UART_PHYS | MAS3_SR | MAS3_SW) + mtspr SPRN_MAS7_MAS3,r3 + /* re-use the MAS8 value from the linear mapping */ + tlbwe +#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */ + + PPC_TLBILX(0,0,0) + sync + isync + + .globl a2_tlbinit_code_end +a2_tlbinit_code_end: + + /* We translate LR and return */ + mflr r3 + tovirt(r3,r3) + mtlr r3 + blr + +/* + * Main entry (boot CPU, thread 0) + * + * We enter here from head_64.S, possibly after the prom_init trampoline + * with r3 and r4 already saved to r31 and 30 respectively and in 64 bits + * mode. Anything else is as it was left by the bootloader + * + * Initial requirements of this port: + * + * - Kernel loaded at 0 physical + * - A good lump of memory mapped 0:0 by UTLB entry 0 + * - MSR:IS & MSR:DS set to 0 + * + * Note that some of the above requirements will be relaxed in the future + * as the kernel becomes smarter at dealing with different initial conditions + * but for now you have to be careful + */ +_GLOBAL(start_initialization_book3e) + mflr r28 + + /* First, we need to setup some initial TLBs to map the kernel + * text, data and bss at PAGE_OFFSET. We don't have a real mode + * and always use AS 0, so we just set it up to match our link + * address and never use 0 based addresses. + */ + bl .initial_tlb_book3e + + /* Init global core bits */ + bl .init_core_book3e + + /* Init per-thread bits */ + bl .init_thread_book3e + + /* Return to common init code */ + tovirt(r28,r28) + mtlr r28 + blr + + +/* + * Secondary core/processor entry + * + * This is entered for thread 0 of a secondary core, all other threads + * are expected to be stopped. It's similar to start_initialization_book3e + * except that it's generally entered from the holding loop in head_64.S + * after CPUs have been gathered by Open Firmware. + * + * We assume we are in 32 bits mode running with whatever TLB entry was + * set for us by the firmware or POR engine. + */ +_GLOBAL(book3e_secondary_core_init_tlb_set) + li r4,1 + b .generic_secondary_smp_init + +_GLOBAL(book3e_secondary_core_init) + mflr r28 + + /* Do we need to setup initial TLB entry ? */ + cmplwi r4,0 + bne 2f + + /* Setup TLB for this core */ + bl .initial_tlb_book3e + + /* We can return from the above running at a different + * address, so recalculate r2 (TOC) + */ + bl .relative_toc + + /* Init global core bits */ +2: bl .init_core_book3e + + /* Init per-thread bits */ +3: bl .init_thread_book3e + + /* Return to common init code at proper virtual address. + * + * Due to various previous assumptions, we know we entered this + * function at either the final PAGE_OFFSET mapping or using a + * 1:1 mapping at 0, so we don't bother doing a complicated check + * here, we just ensure the return address has the right top bits. + * + * Note that if we ever want to be smarter about where we can be + * started from, we have to be careful that by the time we reach + * the code below we may already be running at a different location + * than the one we were called from since initial_tlb_book3e can + * have moved us already. + */ + cmpdi cr0,r28,0 + blt 1f + lis r3,PAGE_OFFSET@highest + sldi r3,r3,32 + or r28,r28,r3 +1: mtlr r28 + blr + +_GLOBAL(book3e_secondary_thread_init) + mflr r28 + b 3b + +_STATIC(init_core_book3e) + /* Establish the interrupt vector base */ + LOAD_REG_IMMEDIATE(r3, interrupt_base_book3e) + mtspr SPRN_IVPR,r3 + sync + blr + +_STATIC(init_thread_book3e) + lis r3,(SPRN_EPCR_ICM | SPRN_EPCR_GICM)@h + mtspr SPRN_EPCR,r3 + + /* Make sure interrupts are off */ + wrteei 0 + + /* disable all timers and clear out status */ + li r3,0 + mtspr SPRN_TCR,r3 + mfspr r3,SPRN_TSR + mtspr SPRN_TSR,r3 + + blr + +_GLOBAL(__setup_base_ivors) + SET_IVOR(0, 0x020) /* Critical Input */ + SET_IVOR(1, 0x000) /* Machine Check */ + SET_IVOR(2, 0x060) /* Data Storage */ + SET_IVOR(3, 0x080) /* Instruction Storage */ + SET_IVOR(4, 0x0a0) /* External Input */ + SET_IVOR(5, 0x0c0) /* Alignment */ + SET_IVOR(6, 0x0e0) /* Program */ + SET_IVOR(7, 0x100) /* FP Unavailable */ + SET_IVOR(8, 0x120) /* System Call */ + SET_IVOR(9, 0x140) /* Auxiliary Processor Unavailable */ + SET_IVOR(10, 0x160) /* Decrementer */ + SET_IVOR(11, 0x180) /* Fixed Interval Timer */ + SET_IVOR(12, 0x1a0) /* Watchdog Timer */ + SET_IVOR(13, 0x1c0) /* Data TLB Error */ + SET_IVOR(14, 0x1e0) /* Instruction TLB Error */ + SET_IVOR(15, 0x040) /* Debug */ + + sync + + blr + +_GLOBAL(setup_perfmon_ivor) + SET_IVOR(35, 0x260) /* Performance Monitor */ + blr + +_GLOBAL(setup_doorbell_ivors) + SET_IVOR(36, 0x280) /* Processor Doorbell */ + SET_IVOR(37, 0x2a0) /* Processor Doorbell Crit */ + + /* Check MMUCFG[LPIDSIZE] to determine if we have category E.HV */ + mfspr r10,SPRN_MMUCFG + rlwinm. r10,r10,0,MMUCFG_LPIDSIZE + beqlr + + SET_IVOR(38, 0x2c0) /* Guest Processor Doorbell */ + SET_IVOR(39, 0x2e0) /* Guest Processor Doorbell Crit/MC */ + blr + +_GLOBAL(setup_ehv_ivors) + /* + * We may be running as a guest and lack E.HV even on a chip + * that normally has it. + */ + mfspr r10,SPRN_MMUCFG + rlwinm. r10,r10,0,MMUCFG_LPIDSIZE + beqlr + + SET_IVOR(40, 0x300) /* Embedded Hypervisor System Call */ + SET_IVOR(41, 0x320) /* Embedded Hypervisor Privilege */ + blr diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S new file mode 100644 index 00000000..8f880bc7 --- /dev/null +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -0,0 +1,1066 @@ +/* + * This file contains the 64-bit "server" PowerPC variant + * of the low level exception handling including exception + * vectors, exception return, part of the slb and stab + * handling and other fixed offset specific things. + * + * This file is meant to be #included from head_64.S due to + * position dependent assembly. + * + * Most of this originates from head_64.S and thus has the same + * copyright history. + * + */ + +#include <asm/hw_irq.h> +#include <asm/exception-64s.h> +#include <asm/ptrace.h> + +/* + * We layout physical memory as follows: + * 0x0000 - 0x00ff : Secondary processor spin code + * 0x0100 - 0x2fff : pSeries Interrupt prologs + * 0x3000 - 0x5fff : interrupt support common interrupt prologs + * 0x6000 - 0x6fff : Initial (CPU0) segment table + * 0x7000 - 0x7fff : FWNMI data area + * 0x8000 - : Early init and support code + */ + +/* + * This is the start of the interrupt handlers for pSeries + * This code runs with relocation off. + * Code from here to __end_interrupts gets copied down to real + * address 0x100 when we are running a relocatable kernel. + * Therefore any relative branches in this section must only + * branch to labels in this section. + */ + . = 0x100 + .globl __start_interrupts +__start_interrupts: + + .globl system_reset_pSeries; +system_reset_pSeries: + HMT_MEDIUM; + SET_SCRATCH0(r13) +#ifdef CONFIG_PPC_P7_NAP +BEGIN_FTR_SECTION + /* Running native on arch 2.06 or later, check if we are + * waking up from nap. We only handle no state loss and + * supervisor state loss. We do -not- handle hypervisor + * state loss at this time. + */ + mfspr r13,SPRN_SRR1 + rlwinm. r13,r13,47-31,30,31 + beq 9f + + /* waking up from powersave (nap) state */ + cmpwi cr1,r13,2 + /* Total loss of HV state is fatal, we could try to use the + * PIR to locate a PACA, then use an emergency stack etc... + * but for now, let's just stay stuck here + */ + bgt cr1,. + GET_PACA(r13) + +#ifdef CONFIG_KVM_BOOK3S_64_HV + lbz r0,PACAPROCSTART(r13) + cmpwi r0,0x80 + bne 1f + li r0,1 + stb r0,PACAPROCSTART(r13) + b kvm_start_guest +1: +#endif + + beq cr1,2f + b .power7_wakeup_noloss +2: b .power7_wakeup_loss +9: +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) +#endif /* CONFIG_PPC_P7_NAP */ + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, + NOTEST, 0x100) + + . = 0x200 +machine_check_pSeries_1: + /* This is moved out of line as it can be patched by FW, but + * some code path might still want to branch into the original + * vector + */ + b machine_check_pSeries + + . = 0x300 + .globl data_access_pSeries +data_access_pSeries: + HMT_MEDIUM + SET_SCRATCH0(r13) +#ifndef CONFIG_POWER4_ONLY +BEGIN_FTR_SECTION + b data_access_check_stab +data_access_not_stab: +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) +#endif + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD, + KVMTEST, 0x300) + + . = 0x380 + .globl data_access_slb_pSeries +data_access_slb_pSeries: + HMT_MEDIUM + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x380) + std r3,PACA_EXSLB+EX_R3(r13) + mfspr r3,SPRN_DAR +#ifdef __DISABLED__ + /* Keep that around for when we re-implement dynamic VSIDs */ + cmpdi r3,0 + bge slb_miss_user_pseries +#endif /* __DISABLED__ */ + mfspr r12,SPRN_SRR1 +#ifndef CONFIG_RELOCATABLE + b .slb_miss_realmode +#else + /* + * We can't just use a direct branch to .slb_miss_realmode + * because the distance from here to there depends on where + * the kernel ends up being put. + */ + mfctr r11 + ld r10,PACAKBASE(r13) + LOAD_HANDLER(r10, .slb_miss_realmode) + mtctr r10 + bctr +#endif + + STD_EXCEPTION_PSERIES(0x400, 0x400, instruction_access) + + . = 0x480 + .globl instruction_access_slb_pSeries +instruction_access_slb_pSeries: + HMT_MEDIUM + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480) + std r3,PACA_EXSLB+EX_R3(r13) + mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ +#ifdef __DISABLED__ + /* Keep that around for when we re-implement dynamic VSIDs */ + cmpdi r3,0 + bge slb_miss_user_pseries +#endif /* __DISABLED__ */ + mfspr r12,SPRN_SRR1 +#ifndef CONFIG_RELOCATABLE + b .slb_miss_realmode +#else + mfctr r11 + ld r10,PACAKBASE(r13) + LOAD_HANDLER(r10, .slb_miss_realmode) + mtctr r10 + bctr +#endif + + /* We open code these as we can't have a ". = x" (even with + * x = "." within a feature section + */ + . = 0x500; + .globl hardware_interrupt_pSeries; + .globl hardware_interrupt_hv; +hardware_interrupt_pSeries: +hardware_interrupt_hv: + BEGIN_FTR_SECTION + _MASKABLE_EXCEPTION_PSERIES(0x502, hardware_interrupt, + EXC_HV, SOFTEN_TEST_HV) + KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502) + FTR_SECTION_ELSE + _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt, + EXC_STD, SOFTEN_TEST_HV_201) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500) + ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) + + STD_EXCEPTION_PSERIES(0x600, 0x600, alignment) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x600) + + STD_EXCEPTION_PSERIES(0x700, 0x700, program_check) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x700) + + STD_EXCEPTION_PSERIES(0x800, 0x800, fp_unavailable) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x800) + + MASKABLE_EXCEPTION_PSERIES(0x900, 0x900, decrementer) + MASKABLE_EXCEPTION_HV(0x980, 0x982, decrementer) + + STD_EXCEPTION_PSERIES(0xa00, 0xa00, trap_0a) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xa00) + + STD_EXCEPTION_PSERIES(0xb00, 0xb00, trap_0b) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xb00) + + . = 0xc00 + .globl system_call_pSeries +system_call_pSeries: + HMT_MEDIUM +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER + SET_SCRATCH0(r13) + GET_PACA(r13) + std r9,PACA_EXGEN+EX_R9(r13) + std r10,PACA_EXGEN+EX_R10(r13) + mfcr r9 + KVMTEST(0xc00) + GET_SCRATCH0(r13) +#endif +BEGIN_FTR_SECTION + cmpdi r0,0x1ebe + beq- 1f +END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) + mr r9,r13 + GET_PACA(r13) + mfspr r11,SPRN_SRR0 + mfspr r12,SPRN_SRR1 + ld r10,PACAKBASE(r13) + LOAD_HANDLER(r10, system_call_entry) + mtspr SPRN_SRR0,r10 + ld r10,PACAKMSR(r13) + mtspr SPRN_SRR1,r10 + rfid + b . /* prevent speculative execution */ + + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xc00) + +/* Fast LE/BE switch system call */ +1: mfspr r12,SPRN_SRR1 + xori r12,r12,MSR_LE + mtspr SPRN_SRR1,r12 + rfid /* return to userspace */ + b . + + STD_EXCEPTION_PSERIES(0xd00, 0xd00, single_step) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xd00) + + /* At 0xe??? we have a bunch of hypervisor exceptions, we branch + * out of line to handle them + */ + . = 0xe00 + b h_data_storage_hv + . = 0xe20 + b h_instr_storage_hv + . = 0xe40 + b emulation_assist_hv + . = 0xe50 + b hmi_exception_hv + . = 0xe60 + b hmi_exception_hv + + /* We need to deal with the Altivec unavailable exception + * here which is at 0xf20, thus in the middle of the + * prolog code of the PerformanceMonitor one. A little + * trickery is thus necessary + */ +performance_monitor_pSeries_1: + . = 0xf00 + b performance_monitor_pSeries + +altivec_unavailable_pSeries_1: + . = 0xf20 + b altivec_unavailable_pSeries + +vsx_unavailable_pSeries_1: + . = 0xf40 + b vsx_unavailable_pSeries + +#ifdef CONFIG_CBE_RAS + STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error) + KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1202) +#endif /* CONFIG_CBE_RAS */ + + STD_EXCEPTION_PSERIES(0x1300, 0x1300, instruction_breakpoint) + KVM_HANDLER_PR_SKIP(PACA_EXGEN, EXC_STD, 0x1300) + +#ifdef CONFIG_CBE_RAS + STD_EXCEPTION_HV(0x1600, 0x1602, cbe_maintenance) + KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1602) +#endif /* CONFIG_CBE_RAS */ + + STD_EXCEPTION_PSERIES(0x1700, 0x1700, altivec_assist) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x1700) + +#ifdef CONFIG_CBE_RAS + STD_EXCEPTION_HV(0x1800, 0x1802, cbe_thermal) + KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1802) +#endif /* CONFIG_CBE_RAS */ + + . = 0x3000 + +/*** Out of line interrupts support ***/ + + /* moved from 0x200 */ +machine_check_pSeries: + .globl machine_check_fwnmi +machine_check_fwnmi: + HMT_MEDIUM + SET_SCRATCH0(r13) /* save r13 */ + EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common, + EXC_STD, KVMTEST, 0x200) + KVM_HANDLER_SKIP(PACA_EXMC, EXC_STD, 0x200) + +#ifndef CONFIG_POWER4_ONLY + /* moved from 0x300 */ +data_access_check_stab: + GET_PACA(r13) + std r9,PACA_EXSLB+EX_R9(r13) + std r10,PACA_EXSLB+EX_R10(r13) + mfspr r10,SPRN_DAR + mfspr r9,SPRN_DSISR + srdi r10,r10,60 + rlwimi r10,r9,16,0x20 +#ifdef CONFIG_KVM_BOOK3S_PR + lbz r9,HSTATE_IN_GUEST(r13) + rlwimi r10,r9,8,0x300 +#endif + mfcr r9 + cmpwi r10,0x2c + beq do_stab_bolted_pSeries + mtcrf 0x80,r9 + ld r9,PACA_EXSLB+EX_R9(r13) + ld r10,PACA_EXSLB+EX_R10(r13) + b data_access_not_stab +do_stab_bolted_pSeries: + std r11,PACA_EXSLB+EX_R11(r13) + std r12,PACA_EXSLB+EX_R12(r13) + GET_SCRATCH0(r10) + std r10,PACA_EXSLB+EX_R13(r13) + EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD) +#endif /* CONFIG_POWER4_ONLY */ + + KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300) + KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x400) + KVM_HANDLER_PR(PACA_EXSLB, EXC_STD, 0x480) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x900) + KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x982) + + .align 7 + /* moved from 0xe00 */ + STD_EXCEPTION_HV(., 0xe02, h_data_storage) + KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0xe02) + STD_EXCEPTION_HV(., 0xe22, h_instr_storage) + KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe22) + STD_EXCEPTION_HV(., 0xe42, emulation_assist) + KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe42) + STD_EXCEPTION_HV(., 0xe62, hmi_exception) /* need to flush cache ? */ + KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe62) + + /* moved from 0xf00 */ + STD_EXCEPTION_PSERIES(., 0xf00, performance_monitor) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf00) + STD_EXCEPTION_PSERIES(., 0xf20, altivec_unavailable) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf20) + STD_EXCEPTION_PSERIES(., 0xf40, vsx_unavailable) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40) + +/* + * An interrupt came in while soft-disabled. We set paca->irq_happened, + * then, if it was a decrementer interrupt, we bump the dec to max and + * and return, else we hard disable and return. This is called with + * r10 containing the value to OR to the paca field. + */ +#define MASKED_INTERRUPT(_H) \ +masked_##_H##interrupt: \ + std r11,PACA_EXGEN+EX_R11(r13); \ + lbz r11,PACAIRQHAPPENED(r13); \ + or r11,r11,r10; \ + stb r11,PACAIRQHAPPENED(r13); \ + andi. r10,r10,PACA_IRQ_DEC; \ + beq 1f; \ + lis r10,0x7fff; \ + ori r10,r10,0xffff; \ + mtspr SPRN_DEC,r10; \ + b 2f; \ +1: mfspr r10,SPRN_##_H##SRR1; \ + rldicl r10,r10,48,1; /* clear MSR_EE */ \ + rotldi r10,r10,16; \ + mtspr SPRN_##_H##SRR1,r10; \ +2: mtcrf 0x80,r9; \ + ld r9,PACA_EXGEN+EX_R9(r13); \ + ld r10,PACA_EXGEN+EX_R10(r13); \ + ld r11,PACA_EXGEN+EX_R11(r13); \ + GET_SCRATCH0(r13); \ + ##_H##rfid; \ + b . + + MASKED_INTERRUPT() + MASKED_INTERRUPT(H) + +/* + * Called from arch_local_irq_enable when an interrupt needs + * to be resent. r3 contains 0x500 or 0x900 to indicate which + * kind of interrupt. MSR:EE is already off. We generate a + * stackframe like if a real interrupt had happened. + * + * Note: While MSR:EE is off, we need to make sure that _MSR + * in the generated frame has EE set to 1 or the exception + * handler will not properly re-enable them. + */ +_GLOBAL(__replay_interrupt) + /* We are going to jump to the exception common code which + * will retrieve various register values from the PACA which + * we don't give a damn about, so we don't bother storing them. + */ + mfmsr r12 + mflr r11 + mfcr r9 + ori r12,r12,MSR_EE + andi. r3,r3,0x0800 + bne decrementer_common + b hardware_interrupt_common + +#ifdef CONFIG_PPC_PSERIES +/* + * Vectors for the FWNMI option. Share common code. + */ + .globl system_reset_fwnmi + .align 7 +system_reset_fwnmi: + HMT_MEDIUM + SET_SCRATCH0(r13) /* save r13 */ + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, + NOTEST, 0x100) + +#endif /* CONFIG_PPC_PSERIES */ + +#ifdef __DISABLED__ +/* + * This is used for when the SLB miss handler has to go virtual, + * which doesn't happen for now anymore but will once we re-implement + * dynamic VSIDs for shared page tables + */ +slb_miss_user_pseries: + std r10,PACA_EXGEN+EX_R10(r13) + std r11,PACA_EXGEN+EX_R11(r13) + std r12,PACA_EXGEN+EX_R12(r13) + GET_SCRATCH0(r10) + ld r11,PACA_EXSLB+EX_R9(r13) + ld r12,PACA_EXSLB+EX_R3(r13) + std r10,PACA_EXGEN+EX_R13(r13) + std r11,PACA_EXGEN+EX_R9(r13) + std r12,PACA_EXGEN+EX_R3(r13) + clrrdi r12,r13,32 + mfmsr r10 + mfspr r11,SRR0 /* save SRR0 */ + ori r12,r12,slb_miss_user_common@l /* virt addr of handler */ + ori r10,r10,MSR_IR|MSR_DR|MSR_RI + mtspr SRR0,r12 + mfspr r12,SRR1 /* and SRR1 */ + mtspr SRR1,r10 + rfid + b . /* prevent spec. execution */ +#endif /* __DISABLED__ */ + + .align 7 + .globl __end_interrupts +__end_interrupts: + +/* + * Code from here down to __end_handlers is invoked from the + * exception prologs above. Because the prologs assemble the + * addresses of these handlers using the LOAD_HANDLER macro, + * which uses an addi instruction, these handlers must be in + * the first 32k of the kernel image. + */ + +/*** Common interrupt handlers ***/ + + STD_EXCEPTION_COMMON(0x100, system_reset, .system_reset_exception) + + /* + * Machine check is different because we use a different + * save area: PACA_EXMC instead of PACA_EXGEN. + */ + .align 7 + .globl machine_check_common +machine_check_common: + EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC) + FINISH_NAP + DISABLE_INTS + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl .machine_check_exception + b .ret_from_except + + STD_EXCEPTION_COMMON_ASYNC(0x500, hardware_interrupt, do_IRQ) + STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, .timer_interrupt) + STD_EXCEPTION_COMMON(0xa00, trap_0a, .unknown_exception) + STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception) + STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception) + STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception) + STD_EXCEPTION_COMMON(0xe40, emulation_assist, .program_check_exception) + STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception) + STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, .performance_monitor_exception) + STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception) +#ifdef CONFIG_ALTIVEC + STD_EXCEPTION_COMMON(0x1700, altivec_assist, .altivec_assist_exception) +#else + STD_EXCEPTION_COMMON(0x1700, altivec_assist, .unknown_exception) +#endif +#ifdef CONFIG_CBE_RAS + STD_EXCEPTION_COMMON(0x1200, cbe_system_error, .cbe_system_error_exception) + STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, .cbe_maintenance_exception) + STD_EXCEPTION_COMMON(0x1800, cbe_thermal, .cbe_thermal_exception) +#endif /* CONFIG_CBE_RAS */ + + .align 7 +system_call_entry: + b system_call_common + +ppc64_runlatch_on_trampoline: + b .__ppc64_runlatch_on + +/* + * Here we have detected that the kernel stack pointer is bad. + * R9 contains the saved CR, r13 points to the paca, + * r10 contains the (bad) kernel stack pointer, + * r11 and r12 contain the saved SRR0 and SRR1. + * We switch to using an emergency stack, save the registers there, + * and call kernel_bad_stack(), which panics. + */ +bad_stack: + ld r1,PACAEMERGSP(r13) + subi r1,r1,64+INT_FRAME_SIZE + std r9,_CCR(r1) + std r10,GPR1(r1) + std r11,_NIP(r1) + std r12,_MSR(r1) + mfspr r11,SPRN_DAR + mfspr r12,SPRN_DSISR + std r11,_DAR(r1) + std r12,_DSISR(r1) + mflr r10 + mfctr r11 + mfxer r12 + std r10,_LINK(r1) + std r11,_CTR(r1) + std r12,_XER(r1) + SAVE_GPR(0,r1) + SAVE_GPR(2,r1) + ld r10,EX_R3(r3) + std r10,GPR3(r1) + SAVE_GPR(4,r1) + SAVE_4GPRS(5,r1) + ld r9,EX_R9(r3) + ld r10,EX_R10(r3) + SAVE_2GPRS(9,r1) + ld r9,EX_R11(r3) + ld r10,EX_R12(r3) + ld r11,EX_R13(r3) + std r9,GPR11(r1) + std r10,GPR12(r1) + std r11,GPR13(r1) +BEGIN_FTR_SECTION + ld r10,EX_CFAR(r3) + std r10,ORIG_GPR3(r1) +END_FTR_SECTION_IFSET(CPU_FTR_CFAR) + SAVE_8GPRS(14,r1) + SAVE_10GPRS(22,r1) + lhz r12,PACA_TRAP_SAVE(r13) + std r12,_TRAP(r1) + addi r11,r1,INT_FRAME_SIZE + std r11,0(r1) + li r12,0 + std r12,0(r11) + ld r2,PACATOC(r13) + ld r11,exception_marker@toc(r2) + std r12,RESULT(r1) + std r11,STACK_FRAME_OVERHEAD-16(r1) +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl .kernel_bad_stack + b 1b + +/* + * Here r13 points to the paca, r9 contains the saved CR, + * SRR0 and SRR1 are saved in r11 and r12, + * r9 - r13 are saved in paca->exgen. + */ + .align 7 + .globl data_access_common +data_access_common: + mfspr r10,SPRN_DAR + std r10,PACA_EXGEN+EX_DAR(r13) + mfspr r10,SPRN_DSISR + stw r10,PACA_EXGEN+EX_DSISR(r13) + EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN) + DISABLE_INTS + ld r12,_MSR(r1) + ld r3,PACA_EXGEN+EX_DAR(r13) + lwz r4,PACA_EXGEN+EX_DSISR(r13) + li r5,0x300 + b .do_hash_page /* Try to handle as hpte fault */ + + .align 7 + .globl h_data_storage_common +h_data_storage_common: + mfspr r10,SPRN_HDAR + std r10,PACA_EXGEN+EX_DAR(r13) + mfspr r10,SPRN_HDSISR + stw r10,PACA_EXGEN+EX_DSISR(r13) + EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN) + bl .save_nvgprs + DISABLE_INTS + addi r3,r1,STACK_FRAME_OVERHEAD + bl .unknown_exception + b .ret_from_except + + .align 7 + .globl instruction_access_common +instruction_access_common: + EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN) + DISABLE_INTS + ld r12,_MSR(r1) + ld r3,_NIP(r1) + andis. r4,r12,0x5820 + li r5,0x400 + b .do_hash_page /* Try to handle as hpte fault */ + + STD_EXCEPTION_COMMON(0xe20, h_instr_storage, .unknown_exception) + +/* + * Here is the common SLB miss user that is used when going to virtual + * mode for SLB misses, that is currently not used + */ +#ifdef __DISABLED__ + .align 7 + .globl slb_miss_user_common +slb_miss_user_common: + mflr r10 + std r3,PACA_EXGEN+EX_DAR(r13) + stw r9,PACA_EXGEN+EX_CCR(r13) + std r10,PACA_EXGEN+EX_LR(r13) + std r11,PACA_EXGEN+EX_SRR0(r13) + bl .slb_allocate_user + + ld r10,PACA_EXGEN+EX_LR(r13) + ld r3,PACA_EXGEN+EX_R3(r13) + lwz r9,PACA_EXGEN+EX_CCR(r13) + ld r11,PACA_EXGEN+EX_SRR0(r13) + mtlr r10 + beq- slb_miss_fault + + andi. r10,r12,MSR_RI /* check for unrecoverable exception */ + beq- unrecov_user_slb + mfmsr r10 + +.machine push +.machine "power4" + mtcrf 0x80,r9 +.machine pop + + clrrdi r10,r10,2 /* clear RI before setting SRR0/1 */ + mtmsrd r10,1 + + mtspr SRR0,r11 + mtspr SRR1,r12 + + ld r9,PACA_EXGEN+EX_R9(r13) + ld r10,PACA_EXGEN+EX_R10(r13) + ld r11,PACA_EXGEN+EX_R11(r13) + ld r12,PACA_EXGEN+EX_R12(r13) + ld r13,PACA_EXGEN+EX_R13(r13) + rfid + b . + +slb_miss_fault: + EXCEPTION_PROLOG_COMMON(0x380, PACA_EXGEN) + ld r4,PACA_EXGEN+EX_DAR(r13) + li r5,0 + std r4,_DAR(r1) + std r5,_DSISR(r1) + b handle_page_fault + +unrecov_user_slb: + EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN) + DISABLE_INTS + bl .save_nvgprs +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl .unrecoverable_exception + b 1b + +#endif /* __DISABLED__ */ + + +/* + * r13 points to the PACA, r9 contains the saved CR, + * r12 contain the saved SRR1, SRR0 is still ready for return + * r3 has the faulting address + * r9 - r13 are saved in paca->exslb. + * r3 is saved in paca->slb_r3 + * We assume we aren't going to take any exceptions during this procedure. + */ +_GLOBAL(slb_miss_realmode) + mflr r10 +#ifdef CONFIG_RELOCATABLE + mtctr r11 +#endif + + stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ + std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ + + bl .slb_allocate_realmode + + /* All done -- return from exception. */ + + ld r10,PACA_EXSLB+EX_LR(r13) + ld r3,PACA_EXSLB+EX_R3(r13) + lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ + + mtlr r10 + + andi. r10,r12,MSR_RI /* check for unrecoverable exception */ + beq- 2f + +.machine push +.machine "power4" + mtcrf 0x80,r9 + mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ +.machine pop + + ld r9,PACA_EXSLB+EX_R9(r13) + ld r10,PACA_EXSLB+EX_R10(r13) + ld r11,PACA_EXSLB+EX_R11(r13) + ld r12,PACA_EXSLB+EX_R12(r13) + ld r13,PACA_EXSLB+EX_R13(r13) + rfid + b . /* prevent speculative execution */ + +2: mfspr r11,SPRN_SRR0 + ld r10,PACAKBASE(r13) + LOAD_HANDLER(r10,unrecov_slb) + mtspr SPRN_SRR0,r10 + ld r10,PACAKMSR(r13) + mtspr SPRN_SRR1,r10 + rfid + b . + +unrecov_slb: + EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB) + DISABLE_INTS + bl .save_nvgprs +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl .unrecoverable_exception + b 1b + + +#ifdef CONFIG_PPC_970_NAP +power4_fixup_nap: + andc r9,r9,r10 + std r9,TI_LOCAL_FLAGS(r11) + ld r10,_LINK(r1) /* make idle task do the */ + std r10,_NIP(r1) /* equivalent of a blr */ + blr +#endif + + .align 7 + .globl alignment_common +alignment_common: + mfspr r10,SPRN_DAR + std r10,PACA_EXGEN+EX_DAR(r13) + mfspr r10,SPRN_DSISR + stw r10,PACA_EXGEN+EX_DSISR(r13) + EXCEPTION_PROLOG_COMMON(0x600, PACA_EXGEN) + ld r3,PACA_EXGEN+EX_DAR(r13) + lwz r4,PACA_EXGEN+EX_DSISR(r13) + std r3,_DAR(r1) + std r4,_DSISR(r1) + bl .save_nvgprs + DISABLE_INTS + addi r3,r1,STACK_FRAME_OVERHEAD + bl .alignment_exception + b .ret_from_except + + .align 7 + .globl program_check_common +program_check_common: + EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN) + bl .save_nvgprs + DISABLE_INTS + addi r3,r1,STACK_FRAME_OVERHEAD + bl .program_check_exception + b .ret_from_except + + .align 7 + .globl fp_unavailable_common +fp_unavailable_common: + EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN) + bne 1f /* if from user, just load it up */ + bl .save_nvgprs + DISABLE_INTS + addi r3,r1,STACK_FRAME_OVERHEAD + bl .kernel_fp_unavailable_exception + BUG_OPCODE +1: bl .load_up_fpu + b fast_exception_return + + .align 7 + .globl altivec_unavailable_common +altivec_unavailable_common: + EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN) +#ifdef CONFIG_ALTIVEC +BEGIN_FTR_SECTION + beq 1f + bl .load_up_altivec + b fast_exception_return +1: +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) +#endif + bl .save_nvgprs + DISABLE_INTS + addi r3,r1,STACK_FRAME_OVERHEAD + bl .altivec_unavailable_exception + b .ret_from_except + + .align 7 + .globl vsx_unavailable_common +vsx_unavailable_common: + EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN) +#ifdef CONFIG_VSX +BEGIN_FTR_SECTION + beq 1f + b .load_up_vsx +1: +END_FTR_SECTION_IFSET(CPU_FTR_VSX) +#endif + bl .save_nvgprs + DISABLE_INTS + addi r3,r1,STACK_FRAME_OVERHEAD + bl .vsx_unavailable_exception + b .ret_from_except + + .align 7 + .globl __end_handlers +__end_handlers: + +/* + * Hash table stuff + */ + .align 7 +_STATIC(do_hash_page) + std r3,_DAR(r1) + std r4,_DSISR(r1) + + andis. r0,r4,0xa410 /* weird error? */ + bne- handle_page_fault /* if not, try to insert a HPTE */ + andis. r0,r4,DSISR_DABRMATCH@h + bne- handle_dabr_fault + +BEGIN_FTR_SECTION + andis. r0,r4,0x0020 /* Is it a segment table fault? */ + bne- do_ste_alloc /* If so handle it */ +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) + + clrrdi r11,r1,THREAD_SHIFT + lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ + andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */ + bne 77f /* then don't call hash_page now */ + /* + * We need to set the _PAGE_USER bit if MSR_PR is set or if we are + * accessing a userspace segment (even from the kernel). We assume + * kernel addresses always have the high bit set. + */ + rlwinm r4,r4,32-25+9,31-9,31-9 /* DSISR_STORE -> _PAGE_RW */ + rotldi r0,r3,15 /* Move high bit into MSR_PR posn */ + orc r0,r12,r0 /* MSR_PR | ~high_bit */ + rlwimi r4,r0,32-13,30,30 /* becomes _PAGE_USER access bit */ + ori r4,r4,1 /* add _PAGE_PRESENT */ + rlwimi r4,r5,22+2,31-2,31-2 /* Set _PAGE_EXEC if trap is 0x400 */ + + /* + * r3 contains the faulting address + * r4 contains the required access permissions + * r5 contains the trap number + * + * at return r3 = 0 for success, 1 for page fault, negative for error + */ + bl .hash_page /* build HPTE if possible */ + cmpdi r3,0 /* see if hash_page succeeded */ + + /* Success */ + beq fast_exc_return_irq /* Return from exception on success */ + + /* Error */ + blt- 13f + +/* Here we have a page fault that hash_page can't handle. */ +handle_page_fault: +11: ld r4,_DAR(r1) + ld r5,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + bl .do_page_fault + cmpdi r3,0 + beq+ 12f + bl .save_nvgprs + mr r5,r3 + addi r3,r1,STACK_FRAME_OVERHEAD + lwz r4,_DAR(r1) + bl .bad_page_fault + b .ret_from_except + +/* We have a data breakpoint exception - handle it */ +handle_dabr_fault: + bl .save_nvgprs + ld r4,_DAR(r1) + ld r5,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + bl .do_dabr +12: b .ret_from_except_lite + + +/* We have a page fault that hash_page could handle but HV refused + * the PTE insertion + */ +13: bl .save_nvgprs + mr r5,r3 + addi r3,r1,STACK_FRAME_OVERHEAD + ld r4,_DAR(r1) + bl .low_hash_fault + b .ret_from_except + +/* + * We come here as a result of a DSI at a point where we don't want + * to call hash_page, such as when we are accessing memory (possibly + * user memory) inside a PMU interrupt that occurred while interrupts + * were soft-disabled. We want to invoke the exception handler for + * the access, or panic if there isn't a handler. + */ +77: bl .save_nvgprs + mr r4,r3 + addi r3,r1,STACK_FRAME_OVERHEAD + li r5,SIGSEGV + bl .bad_page_fault + b .ret_from_except + + /* here we have a segment miss */ +do_ste_alloc: + bl .ste_allocate /* try to insert stab entry */ + cmpdi r3,0 + bne- handle_page_fault + b fast_exception_return + +/* + * r13 points to the PACA, r9 contains the saved CR, + * r11 and r12 contain the saved SRR0 and SRR1. + * r9 - r13 are saved in paca->exslb. + * We assume we aren't going to take any exceptions during this procedure. + * We assume (DAR >> 60) == 0xc. + */ + .align 7 +_GLOBAL(do_stab_bolted) + stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ + std r11,PACA_EXSLB+EX_SRR0(r13) /* save SRR0 in exc. frame */ + + /* Hash to the primary group */ + ld r10,PACASTABVIRT(r13) + mfspr r11,SPRN_DAR + srdi r11,r11,28 + rldimi r10,r11,7,52 /* r10 = first ste of the group */ + + /* Calculate VSID */ + /* This is a kernel address, so protovsid = ESID */ + ASM_VSID_SCRAMBLE(r11, r9, 256M) + rldic r9,r11,12,16 /* r9 = vsid << 12 */ + + /* Search the primary group for a free entry */ +1: ld r11,0(r10) /* Test valid bit of the current ste */ + andi. r11,r11,0x80 + beq 2f + addi r10,r10,16 + andi. r11,r10,0x70 + bne 1b + + /* Stick for only searching the primary group for now. */ + /* At least for now, we use a very simple random castout scheme */ + /* Use the TB as a random number ; OR in 1 to avoid entry 0 */ + mftb r11 + rldic r11,r11,4,57 /* r11 = (r11 << 4) & 0x70 */ + ori r11,r11,0x10 + + /* r10 currently points to an ste one past the group of interest */ + /* make it point to the randomly selected entry */ + subi r10,r10,128 + or r10,r10,r11 /* r10 is the entry to invalidate */ + + isync /* mark the entry invalid */ + ld r11,0(r10) + rldicl r11,r11,56,1 /* clear the valid bit */ + rotldi r11,r11,8 + std r11,0(r10) + sync + + clrrdi r11,r11,28 /* Get the esid part of the ste */ + slbie r11 + +2: std r9,8(r10) /* Store the vsid part of the ste */ + eieio + + mfspr r11,SPRN_DAR /* Get the new esid */ + clrrdi r11,r11,28 /* Permits a full 32b of ESID */ + ori r11,r11,0x90 /* Turn on valid and kp */ + std r11,0(r10) /* Put new entry back into the stab */ + + sync + + /* All done -- return from exception. */ + lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ + ld r11,PACA_EXSLB+EX_SRR0(r13) /* get saved SRR0 */ + + andi. r10,r12,MSR_RI + beq- unrecov_slb + + mtcrf 0x80,r9 /* restore CR */ + + mfmsr r10 + clrrdi r10,r10,2 + mtmsrd r10,1 + + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r12 + ld r9,PACA_EXSLB+EX_R9(r13) + ld r10,PACA_EXSLB+EX_R10(r13) + ld r11,PACA_EXSLB+EX_R11(r13) + ld r12,PACA_EXSLB+EX_R12(r13) + ld r13,PACA_EXSLB+EX_R13(r13) + rfid + b . /* prevent speculative execution */ + +#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) +/* + * Data area reserved for FWNMI option. + * This address (0x7000) is fixed by the RPA. + */ + .= 0x7000 + .globl fwnmi_data_area +fwnmi_data_area: + + /* pseries and powernv need to keep the whole page from + * 0x7000 to 0x8000 free for use by the firmware + */ + . = 0x8000 +#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */ + +/* Space for CPU0's segment table */ + .balign 4096 + .globl initial_stab +initial_stab: + .space 4096 + +#ifdef CONFIG_PPC_POWERNV +_GLOBAL(opal_mc_secondary_handler) + HMT_MEDIUM + SET_SCRATCH0(r13) + GET_PACA(r13) + clrldi r3,r3,2 + tovirt(r3,r3) + std r3,PACA_OPAL_MC_EVT(r13) + ld r13,OPAL_MC_SRR0(r3) + mtspr SPRN_SRR0,r13 + ld r13,OPAL_MC_SRR1(r3) + mtspr SPRN_SRR1,r13 + ld r3,OPAL_MC_GPR3(r3) + GET_SCRATCH0(r13) + b machine_check_pSeries +#endif /* CONFIG_PPC_POWERNV */ diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c new file mode 100644 index 00000000..18bdf74f --- /dev/null +++ b/arch/powerpc/kernel/fadump.c @@ -0,0 +1,1317 @@ +/* + * Firmware Assisted dump: A robust mechanism to get reliable kernel crash + * dump with assistance from firmware. This approach does not use kexec, + * instead firmware assists in booting the kdump kernel while preserving + * memory contents. The most of the code implementation has been adapted + * from phyp assisted dump implementation written by Linas Vepstas and + * Manish Ahuja + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright 2011 IBM Corporation + * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> + */ + +#undef DEBUG +#define pr_fmt(fmt) "fadump: " fmt + +#include <linux/string.h> +#include <linux/memblock.h> +#include <linux/delay.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/crash_dump.h> +#include <linux/kobject.h> +#include <linux/sysfs.h> + +#include <asm/page.h> +#include <asm/prom.h> +#include <asm/rtas.h> +#include <asm/fadump.h> +#include <asm/debug.h> +#include <asm/setup.h> + +static struct fw_dump fw_dump; +static struct fadump_mem_struct fdm; +static const struct fadump_mem_struct *fdm_active; + +static DEFINE_MUTEX(fadump_mutex); +struct fad_crash_memory_ranges crash_memory_ranges[INIT_CRASHMEM_RANGES]; +int crash_mem_ranges; + +/* Scan the Firmware Assisted dump configuration details. */ +int __init early_init_dt_scan_fw_dump(unsigned long node, + const char *uname, int depth, void *data) +{ + __be32 *sections; + int i, num_sections; + unsigned long size; + const int *token; + + if (depth != 1 || strcmp(uname, "rtas") != 0) + return 0; + + /* + * Check if Firmware Assisted dump is supported. if yes, check + * if dump has been initiated on last reboot. + */ + token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL); + if (!token) + return 0; + + fw_dump.fadump_supported = 1; + fw_dump.ibm_configure_kernel_dump = *token; + + /* + * The 'ibm,kernel-dump' rtas node is present only if there is + * dump data waiting for us. + */ + fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL); + if (fdm_active) + fw_dump.dump_active = 1; + + /* Get the sizes required to store dump data for the firmware provided + * dump sections. + * For each dump section type supported, a 32bit cell which defines + * the ID of a supported section followed by two 32 bit cells which + * gives teh size of the section in bytes. + */ + sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes", + &size); + + if (!sections) + return 0; + + num_sections = size / (3 * sizeof(u32)); + + for (i = 0; i < num_sections; i++, sections += 3) { + u32 type = (u32)of_read_number(sections, 1); + + switch (type) { + case FADUMP_CPU_STATE_DATA: + fw_dump.cpu_state_data_size = + of_read_ulong(§ions[1], 2); + break; + case FADUMP_HPTE_REGION: + fw_dump.hpte_region_size = + of_read_ulong(§ions[1], 2); + break; + } + } + return 1; +} + +int is_fadump_active(void) +{ + return fw_dump.dump_active; +} + +/* Print firmware assisted dump configurations for debugging purpose. */ +static void fadump_show_config(void) +{ + pr_debug("Support for firmware-assisted dump (fadump): %s\n", + (fw_dump.fadump_supported ? "present" : "no support")); + + if (!fw_dump.fadump_supported) + return; + + pr_debug("Fadump enabled : %s\n", + (fw_dump.fadump_enabled ? "yes" : "no")); + pr_debug("Dump Active : %s\n", + (fw_dump.dump_active ? "yes" : "no")); + pr_debug("Dump section sizes:\n"); + pr_debug(" CPU state data size: %lx\n", fw_dump.cpu_state_data_size); + pr_debug(" HPTE region size : %lx\n", fw_dump.hpte_region_size); + pr_debug("Boot memory size : %lx\n", fw_dump.boot_memory_size); +} + +static unsigned long init_fadump_mem_struct(struct fadump_mem_struct *fdm, + unsigned long addr) +{ + if (!fdm) + return 0; + + memset(fdm, 0, sizeof(struct fadump_mem_struct)); + addr = addr & PAGE_MASK; + + fdm->header.dump_format_version = 0x00000001; + fdm->header.dump_num_sections = 3; + fdm->header.dump_status_flag = 0; + fdm->header.offset_first_dump_section = + (u32)offsetof(struct fadump_mem_struct, cpu_state_data); + + /* + * Fields for disk dump option. + * We are not using disk dump option, hence set these fields to 0. + */ + fdm->header.dd_block_size = 0; + fdm->header.dd_block_offset = 0; + fdm->header.dd_num_blocks = 0; + fdm->header.dd_offset_disk_path = 0; + + /* set 0 to disable an automatic dump-reboot. */ + fdm->header.max_time_auto = 0; + + /* Kernel dump sections */ + /* cpu state data section. */ + fdm->cpu_state_data.request_flag = FADUMP_REQUEST_FLAG; + fdm->cpu_state_data.source_data_type = FADUMP_CPU_STATE_DATA; + fdm->cpu_state_data.source_address = 0; + fdm->cpu_state_data.source_len = fw_dump.cpu_state_data_size; + fdm->cpu_state_data.destination_address = addr; + addr += fw_dump.cpu_state_data_size; + + /* hpte region section */ + fdm->hpte_region.request_flag = FADUMP_REQUEST_FLAG; + fdm->hpte_region.source_data_type = FADUMP_HPTE_REGION; + fdm->hpte_region.source_address = 0; + fdm->hpte_region.source_len = fw_dump.hpte_region_size; + fdm->hpte_region.destination_address = addr; + addr += fw_dump.hpte_region_size; + + /* RMA region section */ + fdm->rmr_region.request_flag = FADUMP_REQUEST_FLAG; + fdm->rmr_region.source_data_type = FADUMP_REAL_MODE_REGION; + fdm->rmr_region.source_address = RMA_START; + fdm->rmr_region.source_len = fw_dump.boot_memory_size; + fdm->rmr_region.destination_address = addr; + addr += fw_dump.boot_memory_size; + + return addr; +} + +/** + * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM + * + * Function to find the largest memory size we need to reserve during early + * boot process. This will be the size of the memory that is required for a + * kernel to boot successfully. + * + * This function has been taken from phyp-assisted dump feature implementation. + * + * returns larger of 256MB or 5% rounded down to multiples of 256MB. + * + * TODO: Come up with better approach to find out more accurate memory size + * that is required for a kernel to boot successfully. + * + */ +static inline unsigned long fadump_calculate_reserve_size(void) +{ + unsigned long size; + + /* + * Check if the size is specified through fadump_reserve_mem= cmdline + * option. If yes, then use that. + */ + if (fw_dump.reserve_bootvar) + return fw_dump.reserve_bootvar; + + /* divide by 20 to get 5% of value */ + size = memblock_end_of_DRAM() / 20; + + /* round it down in multiples of 256 */ + size = size & ~0x0FFFFFFFUL; + + /* Truncate to memory_limit. We don't want to over reserve the memory.*/ + if (memory_limit && size > memory_limit) + size = memory_limit; + + return (size > MIN_BOOT_MEM ? size : MIN_BOOT_MEM); +} + +/* + * Calculate the total memory size required to be reserved for + * firmware-assisted dump registration. + */ +static unsigned long get_fadump_area_size(void) +{ + unsigned long size = 0; + + size += fw_dump.cpu_state_data_size; + size += fw_dump.hpte_region_size; + size += fw_dump.boot_memory_size; + size += sizeof(struct fadump_crash_info_header); + size += sizeof(struct elfhdr); /* ELF core header.*/ + size += sizeof(struct elf_phdr); /* place holder for cpu notes */ + /* Program headers for crash memory regions. */ + size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2); + + size = PAGE_ALIGN(size); + return size; +} + +int __init fadump_reserve_mem(void) +{ + unsigned long base, size, memory_boundary; + + if (!fw_dump.fadump_enabled) + return 0; + + if (!fw_dump.fadump_supported) { + printk(KERN_INFO "Firmware-assisted dump is not supported on" + " this hardware\n"); + fw_dump.fadump_enabled = 0; + return 0; + } + /* + * Initialize boot memory size + * If dump is active then we have already calculated the size during + * first kernel. + */ + if (fdm_active) + fw_dump.boot_memory_size = fdm_active->rmr_region.source_len; + else + fw_dump.boot_memory_size = fadump_calculate_reserve_size(); + + /* + * Calculate the memory boundary. + * If memory_limit is less than actual memory boundary then reserve + * the memory for fadump beyond the memory_limit and adjust the + * memory_limit accordingly, so that the running kernel can run with + * specified memory_limit. + */ + if (memory_limit && memory_limit < memblock_end_of_DRAM()) { + size = get_fadump_area_size(); + if ((memory_limit + size) < memblock_end_of_DRAM()) + memory_limit += size; + else + memory_limit = memblock_end_of_DRAM(); + printk(KERN_INFO "Adjusted memory_limit for firmware-assisted" + " dump, now %#016llx\n", + (unsigned long long)memory_limit); + } + if (memory_limit) + memory_boundary = memory_limit; + else + memory_boundary = memblock_end_of_DRAM(); + + if (fw_dump.dump_active) { + printk(KERN_INFO "Firmware-assisted dump is active.\n"); + /* + * If last boot has crashed then reserve all the memory + * above boot_memory_size so that we don't touch it until + * dump is written to disk by userspace tool. This memory + * will be released for general use once the dump is saved. + */ + base = fw_dump.boot_memory_size; + size = memory_boundary - base; + memblock_reserve(base, size); + printk(KERN_INFO "Reserved %ldMB of memory at %ldMB " + "for saving crash dump\n", + (unsigned long)(size >> 20), + (unsigned long)(base >> 20)); + + fw_dump.fadumphdr_addr = + fdm_active->rmr_region.destination_address + + fdm_active->rmr_region.source_len; + pr_debug("fadumphdr_addr = %p\n", + (void *) fw_dump.fadumphdr_addr); + } else { + /* Reserve the memory at the top of memory. */ + size = get_fadump_area_size(); + base = memory_boundary - size; + memblock_reserve(base, size); + printk(KERN_INFO "Reserved %ldMB of memory at %ldMB " + "for firmware-assisted dump\n", + (unsigned long)(size >> 20), + (unsigned long)(base >> 20)); + } + fw_dump.reserve_dump_area_start = base; + fw_dump.reserve_dump_area_size = size; + return 1; +} + +/* Look for fadump= cmdline option. */ +static int __init early_fadump_param(char *p) +{ + if (!p) + return 1; + + if (strncmp(p, "on", 2) == 0) + fw_dump.fadump_enabled = 1; + else if (strncmp(p, "off", 3) == 0) + fw_dump.fadump_enabled = 0; + + return 0; +} +early_param("fadump", early_fadump_param); + +/* Look for fadump_reserve_mem= cmdline option */ +static int __init early_fadump_reserve_mem(char *p) +{ + if (p) + fw_dump.reserve_bootvar = memparse(p, &p); + return 0; +} +early_param("fadump_reserve_mem", early_fadump_reserve_mem); + +static void register_fw_dump(struct fadump_mem_struct *fdm) +{ + int rc; + unsigned int wait_time; + + pr_debug("Registering for firmware-assisted kernel dump...\n"); + + /* TODO: Add upper time limit for the delay */ + do { + rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL, + FADUMP_REGISTER, fdm, + sizeof(struct fadump_mem_struct)); + + wait_time = rtas_busy_delay_time(rc); + if (wait_time) + mdelay(wait_time); + + } while (wait_time); + + switch (rc) { + case -1: + printk(KERN_ERR "Failed to register firmware-assisted kernel" + " dump. Hardware Error(%d).\n", rc); + break; + case -3: + printk(KERN_ERR "Failed to register firmware-assisted kernel" + " dump. Parameter Error(%d).\n", rc); + break; + case -9: + printk(KERN_ERR "firmware-assisted kernel dump is already " + " registered."); + fw_dump.dump_registered = 1; + break; + case 0: + printk(KERN_INFO "firmware-assisted kernel dump registration" + " is successful\n"); + fw_dump.dump_registered = 1; + break; + } +} + +void crash_fadump(struct pt_regs *regs, const char *str) +{ + struct fadump_crash_info_header *fdh = NULL; + + if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr) + return; + + fdh = __va(fw_dump.fadumphdr_addr); + crashing_cpu = smp_processor_id(); + fdh->crashing_cpu = crashing_cpu; + crash_save_vmcoreinfo(); + + if (regs) + fdh->regs = *regs; + else + ppc_save_regs(&fdh->regs); + + fdh->cpu_online_mask = *cpu_online_mask; + + /* Call ibm,os-term rtas call to trigger firmware assisted dump */ + rtas_os_term((char *)str); +} + +#define GPR_MASK 0xffffff0000000000 +static inline int fadump_gpr_index(u64 id) +{ + int i = -1; + char str[3]; + + if ((id & GPR_MASK) == REG_ID("GPR")) { + /* get the digits at the end */ + id &= ~GPR_MASK; + id >>= 24; + str[2] = '\0'; + str[1] = id & 0xff; + str[0] = (id >> 8) & 0xff; + sscanf(str, "%d", &i); + if (i > 31) + i = -1; + } + return i; +} + +static inline void fadump_set_regval(struct pt_regs *regs, u64 reg_id, + u64 reg_val) +{ + int i; + + i = fadump_gpr_index(reg_id); + if (i >= 0) + regs->gpr[i] = (unsigned long)reg_val; + else if (reg_id == REG_ID("NIA")) + regs->nip = (unsigned long)reg_val; + else if (reg_id == REG_ID("MSR")) + regs->msr = (unsigned long)reg_val; + else if (reg_id == REG_ID("CTR")) + regs->ctr = (unsigned long)reg_val; + else if (reg_id == REG_ID("LR")) + regs->link = (unsigned long)reg_val; + else if (reg_id == REG_ID("XER")) + regs->xer = (unsigned long)reg_val; + else if (reg_id == REG_ID("CR")) + regs->ccr = (unsigned long)reg_val; + else if (reg_id == REG_ID("DAR")) + regs->dar = (unsigned long)reg_val; + else if (reg_id == REG_ID("DSISR")) + regs->dsisr = (unsigned long)reg_val; +} + +static struct fadump_reg_entry* +fadump_read_registers(struct fadump_reg_entry *reg_entry, struct pt_regs *regs) +{ + memset(regs, 0, sizeof(struct pt_regs)); + + while (reg_entry->reg_id != REG_ID("CPUEND")) { + fadump_set_regval(regs, reg_entry->reg_id, + reg_entry->reg_value); + reg_entry++; + } + reg_entry++; + return reg_entry; +} + +static u32 *fadump_append_elf_note(u32 *buf, char *name, unsigned type, + void *data, size_t data_len) +{ + struct elf_note note; + + note.n_namesz = strlen(name) + 1; + note.n_descsz = data_len; + note.n_type = type; + memcpy(buf, ¬e, sizeof(note)); + buf += (sizeof(note) + 3)/4; + memcpy(buf, name, note.n_namesz); + buf += (note.n_namesz + 3)/4; + memcpy(buf, data, note.n_descsz); + buf += (note.n_descsz + 3)/4; + + return buf; +} + +static void fadump_final_note(u32 *buf) +{ + struct elf_note note; + + note.n_namesz = 0; + note.n_descsz = 0; + note.n_type = 0; + memcpy(buf, ¬e, sizeof(note)); +} + +static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs) +{ + struct elf_prstatus prstatus; + + memset(&prstatus, 0, sizeof(prstatus)); + /* + * FIXME: How do i get PID? Do I really need it? + * prstatus.pr_pid = ???? + */ + elf_core_copy_kernel_regs(&prstatus.pr_reg, regs); + buf = fadump_append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, + &prstatus, sizeof(prstatus)); + return buf; +} + +static void fadump_update_elfcore_header(char *bufp) +{ + struct elfhdr *elf; + struct elf_phdr *phdr; + + elf = (struct elfhdr *)bufp; + bufp += sizeof(struct elfhdr); + + /* First note is a place holder for cpu notes info. */ + phdr = (struct elf_phdr *)bufp; + + if (phdr->p_type == PT_NOTE) { + phdr->p_paddr = fw_dump.cpu_notes_buf; + phdr->p_offset = phdr->p_paddr; + phdr->p_filesz = fw_dump.cpu_notes_buf_size; + phdr->p_memsz = fw_dump.cpu_notes_buf_size; + } + return; +} + +static void *fadump_cpu_notes_buf_alloc(unsigned long size) +{ + void *vaddr; + struct page *page; + unsigned long order, count, i; + + order = get_order(size); + vaddr = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, order); + if (!vaddr) + return NULL; + + count = 1 << order; + page = virt_to_page(vaddr); + for (i = 0; i < count; i++) + SetPageReserved(page + i); + return vaddr; +} + +static void fadump_cpu_notes_buf_free(unsigned long vaddr, unsigned long size) +{ + struct page *page; + unsigned long order, count, i; + + order = get_order(size); + count = 1 << order; + page = virt_to_page(vaddr); + for (i = 0; i < count; i++) + ClearPageReserved(page + i); + __free_pages(page, order); +} + +/* + * Read CPU state dump data and convert it into ELF notes. + * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be + * used to access the data to allow for additional fields to be added without + * affecting compatibility. Each list of registers for a CPU starts with + * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes, + * 8 Byte ASCII identifier and 8 Byte register value. The register entry + * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part + * of register value. For more details refer to PAPR document. + * + * Only for the crashing cpu we ignore the CPU dump data and get exact + * state from fadump crash info structure populated by first kernel at the + * time of crash. + */ +static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm) +{ + struct fadump_reg_save_area_header *reg_header; + struct fadump_reg_entry *reg_entry; + struct fadump_crash_info_header *fdh = NULL; + void *vaddr; + unsigned long addr; + u32 num_cpus, *note_buf; + struct pt_regs regs; + int i, rc = 0, cpu = 0; + + if (!fdm->cpu_state_data.bytes_dumped) + return -EINVAL; + + addr = fdm->cpu_state_data.destination_address; + vaddr = __va(addr); + + reg_header = vaddr; + if (reg_header->magic_number != REGSAVE_AREA_MAGIC) { + printk(KERN_ERR "Unable to read register save area.\n"); + return -ENOENT; + } + pr_debug("--------CPU State Data------------\n"); + pr_debug("Magic Number: %llx\n", reg_header->magic_number); + pr_debug("NumCpuOffset: %x\n", reg_header->num_cpu_offset); + + vaddr += reg_header->num_cpu_offset; + num_cpus = *((u32 *)(vaddr)); + pr_debug("NumCpus : %u\n", num_cpus); + vaddr += sizeof(u32); + reg_entry = (struct fadump_reg_entry *)vaddr; + + /* Allocate buffer to hold cpu crash notes. */ + fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t); + fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size); + note_buf = fadump_cpu_notes_buf_alloc(fw_dump.cpu_notes_buf_size); + if (!note_buf) { + printk(KERN_ERR "Failed to allocate 0x%lx bytes for " + "cpu notes buffer\n", fw_dump.cpu_notes_buf_size); + return -ENOMEM; + } + fw_dump.cpu_notes_buf = __pa(note_buf); + + pr_debug("Allocated buffer for cpu notes of size %ld at %p\n", + (num_cpus * sizeof(note_buf_t)), note_buf); + + if (fw_dump.fadumphdr_addr) + fdh = __va(fw_dump.fadumphdr_addr); + + for (i = 0; i < num_cpus; i++) { + if (reg_entry->reg_id != REG_ID("CPUSTRT")) { + printk(KERN_ERR "Unable to read CPU state data\n"); + rc = -ENOENT; + goto error_out; + } + /* Lower 4 bytes of reg_value contains logical cpu id */ + cpu = reg_entry->reg_value & FADUMP_CPU_ID_MASK; + if (!cpumask_test_cpu(cpu, &fdh->cpu_online_mask)) { + SKIP_TO_NEXT_CPU(reg_entry); + continue; + } + pr_debug("Reading register data for cpu %d...\n", cpu); + if (fdh && fdh->crashing_cpu == cpu) { + regs = fdh->regs; + note_buf = fadump_regs_to_elf_notes(note_buf, ®s); + SKIP_TO_NEXT_CPU(reg_entry); + } else { + reg_entry++; + reg_entry = fadump_read_registers(reg_entry, ®s); + note_buf = fadump_regs_to_elf_notes(note_buf, ®s); + } + } + fadump_final_note(note_buf); + + pr_debug("Updating elfcore header (%llx) with cpu notes\n", + fdh->elfcorehdr_addr); + fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr)); + return 0; + +error_out: + fadump_cpu_notes_buf_free((unsigned long)__va(fw_dump.cpu_notes_buf), + fw_dump.cpu_notes_buf_size); + fw_dump.cpu_notes_buf = 0; + fw_dump.cpu_notes_buf_size = 0; + return rc; + +} + +/* + * Validate and process the dump data stored by firmware before exporting + * it through '/proc/vmcore'. + */ +static int __init process_fadump(const struct fadump_mem_struct *fdm_active) +{ + struct fadump_crash_info_header *fdh; + int rc = 0; + + if (!fdm_active || !fw_dump.fadumphdr_addr) + return -EINVAL; + + /* Check if the dump data is valid. */ + if ((fdm_active->header.dump_status_flag == FADUMP_ERROR_FLAG) || + (fdm_active->cpu_state_data.error_flags != 0) || + (fdm_active->rmr_region.error_flags != 0)) { + printk(KERN_ERR "Dump taken by platform is not valid\n"); + return -EINVAL; + } + if ((fdm_active->rmr_region.bytes_dumped != + fdm_active->rmr_region.source_len) || + !fdm_active->cpu_state_data.bytes_dumped) { + printk(KERN_ERR "Dump taken by platform is incomplete\n"); + return -EINVAL; + } + + /* Validate the fadump crash info header */ + fdh = __va(fw_dump.fadumphdr_addr); + if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) { + printk(KERN_ERR "Crash info header is not valid.\n"); + return -EINVAL; + } + + rc = fadump_build_cpu_notes(fdm_active); + if (rc) + return rc; + + /* + * We are done validating dump info and elfcore header is now ready + * to be exported. set elfcorehdr_addr so that vmcore module will + * export the elfcore header through '/proc/vmcore'. + */ + elfcorehdr_addr = fdh->elfcorehdr_addr; + + return 0; +} + +static inline void fadump_add_crash_memory(unsigned long long base, + unsigned long long end) +{ + if (base == end) + return; + + pr_debug("crash_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n", + crash_mem_ranges, base, end - 1, (end - base)); + crash_memory_ranges[crash_mem_ranges].base = base; + crash_memory_ranges[crash_mem_ranges].size = end - base; + crash_mem_ranges++; +} + +static void fadump_exclude_reserved_area(unsigned long long start, + unsigned long long end) +{ + unsigned long long ra_start, ra_end; + + ra_start = fw_dump.reserve_dump_area_start; + ra_end = ra_start + fw_dump.reserve_dump_area_size; + + if ((ra_start < end) && (ra_end > start)) { + if ((start < ra_start) && (end > ra_end)) { + fadump_add_crash_memory(start, ra_start); + fadump_add_crash_memory(ra_end, end); + } else if (start < ra_start) { + fadump_add_crash_memory(start, ra_start); + } else if (ra_end < end) { + fadump_add_crash_memory(ra_end, end); + } + } else + fadump_add_crash_memory(start, end); +} + +static int fadump_init_elfcore_header(char *bufp) +{ + struct elfhdr *elf; + + elf = (struct elfhdr *) bufp; + bufp += sizeof(struct elfhdr); + memcpy(elf->e_ident, ELFMAG, SELFMAG); + elf->e_ident[EI_CLASS] = ELF_CLASS; + elf->e_ident[EI_DATA] = ELF_DATA; + elf->e_ident[EI_VERSION] = EV_CURRENT; + elf->e_ident[EI_OSABI] = ELF_OSABI; + memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD); + elf->e_type = ET_CORE; + elf->e_machine = ELF_ARCH; + elf->e_version = EV_CURRENT; + elf->e_entry = 0; + elf->e_phoff = sizeof(struct elfhdr); + elf->e_shoff = 0; + elf->e_flags = ELF_CORE_EFLAGS; + elf->e_ehsize = sizeof(struct elfhdr); + elf->e_phentsize = sizeof(struct elf_phdr); + elf->e_phnum = 0; + elf->e_shentsize = 0; + elf->e_shnum = 0; + elf->e_shstrndx = 0; + + return 0; +} + +/* + * Traverse through memblock structure and setup crash memory ranges. These + * ranges will be used create PT_LOAD program headers in elfcore header. + */ +static void fadump_setup_crash_memory_ranges(void) +{ + struct memblock_region *reg; + unsigned long long start, end; + + pr_debug("Setup crash memory ranges.\n"); + crash_mem_ranges = 0; + /* + * add the first memory chunk (RMA_START through boot_memory_size) as + * a separate memory chunk. The reason is, at the time crash firmware + * will move the content of this memory chunk to different location + * specified during fadump registration. We need to create a separate + * program header for this chunk with the correct offset. + */ + fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size); + + for_each_memblock(memory, reg) { + start = (unsigned long long)reg->base; + end = start + (unsigned long long)reg->size; + if (start == RMA_START && end >= fw_dump.boot_memory_size) + start = fw_dump.boot_memory_size; + + /* add this range excluding the reserved dump area. */ + fadump_exclude_reserved_area(start, end); + } +} + +/* + * If the given physical address falls within the boot memory region then + * return the relocated address that points to the dump region reserved + * for saving initial boot memory contents. + */ +static inline unsigned long fadump_relocate(unsigned long paddr) +{ + if (paddr > RMA_START && paddr < fw_dump.boot_memory_size) + return fdm.rmr_region.destination_address + paddr; + else + return paddr; +} + +static int fadump_create_elfcore_headers(char *bufp) +{ + struct elfhdr *elf; + struct elf_phdr *phdr; + int i; + + fadump_init_elfcore_header(bufp); + elf = (struct elfhdr *)bufp; + bufp += sizeof(struct elfhdr); + + /* + * setup ELF PT_NOTE, place holder for cpu notes info. The notes info + * will be populated during second kernel boot after crash. Hence + * this PT_NOTE will always be the first elf note. + * + * NOTE: Any new ELF note addition should be placed after this note. + */ + phdr = (struct elf_phdr *)bufp; + bufp += sizeof(struct elf_phdr); + phdr->p_type = PT_NOTE; + phdr->p_flags = 0; + phdr->p_vaddr = 0; + phdr->p_align = 0; + + phdr->p_offset = 0; + phdr->p_paddr = 0; + phdr->p_filesz = 0; + phdr->p_memsz = 0; + + (elf->e_phnum)++; + + /* setup ELF PT_NOTE for vmcoreinfo */ + phdr = (struct elf_phdr *)bufp; + bufp += sizeof(struct elf_phdr); + phdr->p_type = PT_NOTE; + phdr->p_flags = 0; + phdr->p_vaddr = 0; + phdr->p_align = 0; + + phdr->p_paddr = fadump_relocate(paddr_vmcoreinfo_note()); + phdr->p_offset = phdr->p_paddr; + phdr->p_memsz = vmcoreinfo_max_size; + phdr->p_filesz = vmcoreinfo_max_size; + + /* Increment number of program headers. */ + (elf->e_phnum)++; + + /* setup PT_LOAD sections. */ + + for (i = 0; i < crash_mem_ranges; i++) { + unsigned long long mbase, msize; + mbase = crash_memory_ranges[i].base; + msize = crash_memory_ranges[i].size; + + if (!msize) + continue; + + phdr = (struct elf_phdr *)bufp; + bufp += sizeof(struct elf_phdr); + phdr->p_type = PT_LOAD; + phdr->p_flags = PF_R|PF_W|PF_X; + phdr->p_offset = mbase; + + if (mbase == RMA_START) { + /* + * The entire RMA region will be moved by firmware + * to the specified destination_address. Hence set + * the correct offset. + */ + phdr->p_offset = fdm.rmr_region.destination_address; + } + + phdr->p_paddr = mbase; + phdr->p_vaddr = (unsigned long)__va(mbase); + phdr->p_filesz = msize; + phdr->p_memsz = msize; + phdr->p_align = 0; + + /* Increment number of program headers. */ + (elf->e_phnum)++; + } + return 0; +} + +static unsigned long init_fadump_header(unsigned long addr) +{ + struct fadump_crash_info_header *fdh; + + if (!addr) + return 0; + + fw_dump.fadumphdr_addr = addr; + fdh = __va(addr); + addr += sizeof(struct fadump_crash_info_header); + + memset(fdh, 0, sizeof(struct fadump_crash_info_header)); + fdh->magic_number = FADUMP_CRASH_INFO_MAGIC; + fdh->elfcorehdr_addr = addr; + /* We will set the crashing cpu id in crash_fadump() during crash. */ + fdh->crashing_cpu = CPU_UNKNOWN; + + return addr; +} + +static void register_fadump(void) +{ + unsigned long addr; + void *vaddr; + + /* + * If no memory is reserved then we can not register for firmware- + * assisted dump. + */ + if (!fw_dump.reserve_dump_area_size) + return; + + fadump_setup_crash_memory_ranges(); + + addr = fdm.rmr_region.destination_address + fdm.rmr_region.source_len; + /* Initialize fadump crash info header. */ + addr = init_fadump_header(addr); + vaddr = __va(addr); + + pr_debug("Creating ELF core headers at %#016lx\n", addr); + fadump_create_elfcore_headers(vaddr); + + /* register the future kernel dump with firmware. */ + register_fw_dump(&fdm); +} + +static int fadump_unregister_dump(struct fadump_mem_struct *fdm) +{ + int rc = 0; + unsigned int wait_time; + + pr_debug("Un-register firmware-assisted dump\n"); + + /* TODO: Add upper time limit for the delay */ + do { + rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL, + FADUMP_UNREGISTER, fdm, + sizeof(struct fadump_mem_struct)); + + wait_time = rtas_busy_delay_time(rc); + if (wait_time) + mdelay(wait_time); + } while (wait_time); + + if (rc) { + printk(KERN_ERR "Failed to un-register firmware-assisted dump." + " unexpected error(%d).\n", rc); + return rc; + } + fw_dump.dump_registered = 0; + return 0; +} + +static int fadump_invalidate_dump(struct fadump_mem_struct *fdm) +{ + int rc = 0; + unsigned int wait_time; + + pr_debug("Invalidating firmware-assisted dump registration\n"); + + /* TODO: Add upper time limit for the delay */ + do { + rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL, + FADUMP_INVALIDATE, fdm, + sizeof(struct fadump_mem_struct)); + + wait_time = rtas_busy_delay_time(rc); + if (wait_time) + mdelay(wait_time); + } while (wait_time); + + if (rc) { + printk(KERN_ERR "Failed to invalidate firmware-assisted dump " + "rgistration. unexpected error(%d).\n", rc); + return rc; + } + fw_dump.dump_active = 0; + fdm_active = NULL; + return 0; +} + +void fadump_cleanup(void) +{ + /* Invalidate the registration only if dump is active. */ + if (fw_dump.dump_active) { + init_fadump_mem_struct(&fdm, + fdm_active->cpu_state_data.destination_address); + fadump_invalidate_dump(&fdm); + } +} + +/* + * Release the memory that was reserved in early boot to preserve the memory + * contents. The released memory will be available for general use. + */ +static void fadump_release_memory(unsigned long begin, unsigned long end) +{ + unsigned long addr; + unsigned long ra_start, ra_end; + + ra_start = fw_dump.reserve_dump_area_start; + ra_end = ra_start + fw_dump.reserve_dump_area_size; + + for (addr = begin; addr < end; addr += PAGE_SIZE) { + /* + * exclude the dump reserve area. Will reuse it for next + * fadump registration. + */ + if (addr <= ra_end && ((addr + PAGE_SIZE) > ra_start)) + continue; + + ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT)); + init_page_count(pfn_to_page(addr >> PAGE_SHIFT)); + free_page((unsigned long)__va(addr)); + totalram_pages++; + } +} + +static void fadump_invalidate_release_mem(void) +{ + unsigned long reserved_area_start, reserved_area_end; + unsigned long destination_address; + + mutex_lock(&fadump_mutex); + if (!fw_dump.dump_active) { + mutex_unlock(&fadump_mutex); + return; + } + + destination_address = fdm_active->cpu_state_data.destination_address; + fadump_cleanup(); + mutex_unlock(&fadump_mutex); + + /* + * Save the current reserved memory bounds we will require them + * later for releasing the memory for general use. + */ + reserved_area_start = fw_dump.reserve_dump_area_start; + reserved_area_end = reserved_area_start + + fw_dump.reserve_dump_area_size; + /* + * Setup reserve_dump_area_start and its size so that we can + * reuse this reserved memory for Re-registration. + */ + fw_dump.reserve_dump_area_start = destination_address; + fw_dump.reserve_dump_area_size = get_fadump_area_size(); + + fadump_release_memory(reserved_area_start, reserved_area_end); + if (fw_dump.cpu_notes_buf) { + fadump_cpu_notes_buf_free( + (unsigned long)__va(fw_dump.cpu_notes_buf), + fw_dump.cpu_notes_buf_size); + fw_dump.cpu_notes_buf = 0; + fw_dump.cpu_notes_buf_size = 0; + } + /* Initialize the kernel dump memory structure for FAD registration. */ + init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start); +} + +static ssize_t fadump_release_memory_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + if (!fw_dump.dump_active) + return -EPERM; + + if (buf[0] == '1') { + /* + * Take away the '/proc/vmcore'. We are releasing the dump + * memory, hence it will not be valid anymore. + */ + vmcore_cleanup(); + fadump_invalidate_release_mem(); + + } else + return -EINVAL; + return count; +} + +static ssize_t fadump_enabled_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", fw_dump.fadump_enabled); +} + +static ssize_t fadump_register_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", fw_dump.dump_registered); +} + +static ssize_t fadump_register_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + int ret = 0; + + if (!fw_dump.fadump_enabled || fdm_active) + return -EPERM; + + mutex_lock(&fadump_mutex); + + switch (buf[0]) { + case '0': + if (fw_dump.dump_registered == 0) { + ret = -EINVAL; + goto unlock_out; + } + /* Un-register Firmware-assisted dump */ + fadump_unregister_dump(&fdm); + break; + case '1': + if (fw_dump.dump_registered == 1) { + ret = -EINVAL; + goto unlock_out; + } + /* Register Firmware-assisted dump */ + register_fadump(); + break; + default: + ret = -EINVAL; + break; + } + +unlock_out: + mutex_unlock(&fadump_mutex); + return ret < 0 ? ret : count; +} + +static int fadump_region_show(struct seq_file *m, void *private) +{ + const struct fadump_mem_struct *fdm_ptr; + + if (!fw_dump.fadump_enabled) + return 0; + + mutex_lock(&fadump_mutex); + if (fdm_active) + fdm_ptr = fdm_active; + else { + mutex_unlock(&fadump_mutex); + fdm_ptr = &fdm; + } + + seq_printf(m, + "CPU : [%#016llx-%#016llx] %#llx bytes, " + "Dumped: %#llx\n", + fdm_ptr->cpu_state_data.destination_address, + fdm_ptr->cpu_state_data.destination_address + + fdm_ptr->cpu_state_data.source_len - 1, + fdm_ptr->cpu_state_data.source_len, + fdm_ptr->cpu_state_data.bytes_dumped); + seq_printf(m, + "HPTE: [%#016llx-%#016llx] %#llx bytes, " + "Dumped: %#llx\n", + fdm_ptr->hpte_region.destination_address, + fdm_ptr->hpte_region.destination_address + + fdm_ptr->hpte_region.source_len - 1, + fdm_ptr->hpte_region.source_len, + fdm_ptr->hpte_region.bytes_dumped); + seq_printf(m, + "DUMP: [%#016llx-%#016llx] %#llx bytes, " + "Dumped: %#llx\n", + fdm_ptr->rmr_region.destination_address, + fdm_ptr->rmr_region.destination_address + + fdm_ptr->rmr_region.source_len - 1, + fdm_ptr->rmr_region.source_len, + fdm_ptr->rmr_region.bytes_dumped); + + if (!fdm_active || + (fw_dump.reserve_dump_area_start == + fdm_ptr->cpu_state_data.destination_address)) + goto out; + + /* Dump is active. Show reserved memory region. */ + seq_printf(m, + " : [%#016llx-%#016llx] %#llx bytes, " + "Dumped: %#llx\n", + (unsigned long long)fw_dump.reserve_dump_area_start, + fdm_ptr->cpu_state_data.destination_address - 1, + fdm_ptr->cpu_state_data.destination_address - + fw_dump.reserve_dump_area_start, + fdm_ptr->cpu_state_data.destination_address - + fw_dump.reserve_dump_area_start); +out: + if (fdm_active) + mutex_unlock(&fadump_mutex); + return 0; +} + +static struct kobj_attribute fadump_release_attr = __ATTR(fadump_release_mem, + 0200, NULL, + fadump_release_memory_store); +static struct kobj_attribute fadump_attr = __ATTR(fadump_enabled, + 0444, fadump_enabled_show, + NULL); +static struct kobj_attribute fadump_register_attr = __ATTR(fadump_registered, + 0644, fadump_register_show, + fadump_register_store); + +static int fadump_region_open(struct inode *inode, struct file *file) +{ + return single_open(file, fadump_region_show, inode->i_private); +} + +static const struct file_operations fadump_region_fops = { + .open = fadump_region_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void fadump_init_files(void) +{ + struct dentry *debugfs_file; + int rc = 0; + + rc = sysfs_create_file(kernel_kobj, &fadump_attr.attr); + if (rc) + printk(KERN_ERR "fadump: unable to create sysfs file" + " fadump_enabled (%d)\n", rc); + + rc = sysfs_create_file(kernel_kobj, &fadump_register_attr.attr); + if (rc) + printk(KERN_ERR "fadump: unable to create sysfs file" + " fadump_registered (%d)\n", rc); + + debugfs_file = debugfs_create_file("fadump_region", 0444, + powerpc_debugfs_root, NULL, + &fadump_region_fops); + if (!debugfs_file) + printk(KERN_ERR "fadump: unable to create debugfs file" + " fadump_region\n"); + + if (fw_dump.dump_active) { + rc = sysfs_create_file(kernel_kobj, &fadump_release_attr.attr); + if (rc) + printk(KERN_ERR "fadump: unable to create sysfs file" + " fadump_release_mem (%d)\n", rc); + } + return; +} + +/* + * Prepare for firmware-assisted dump. + */ +int __init setup_fadump(void) +{ + if (!fw_dump.fadump_enabled) + return 0; + + if (!fw_dump.fadump_supported) { + printk(KERN_ERR "Firmware-assisted dump is not supported on" + " this hardware\n"); + return 0; + } + + fadump_show_config(); + /* + * If dump data is available then see if it is valid and prepare for + * saving it to the disk. + */ + if (fw_dump.dump_active) { + /* + * if dump process fails then invalidate the registration + * and release memory before proceeding for re-registration. + */ + if (process_fadump(fdm_active) < 0) + fadump_invalidate_release_mem(); + } + /* Initialize the kernel dump memory structure for FAD registration. */ + else if (fw_dump.reserve_dump_area_size) + init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start); + fadump_init_files(); + + return 1; +} +subsys_initcall(setup_fadump); diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c new file mode 100644 index 00000000..2eae4478 --- /dev/null +++ b/arch/powerpc/kernel/firmware.c @@ -0,0 +1,22 @@ +/* + * Extracted from cputable.c + * + * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) + * + * Modifications for ppc64: + * Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com> + * Copyright (C) 2005 Stephen Rothwell, IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/export.h> +#include <linux/cache.h> + +#include <asm/firmware.h> + +unsigned long powerpc_firmware_features __read_mostly; +EXPORT_SYMBOL_GPL(powerpc_firmware_features); diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S new file mode 100644 index 00000000..de369558 --- /dev/null +++ b/arch/powerpc/kernel/fpu.S @@ -0,0 +1,177 @@ +/* + * FPU support code, moved here from head.S so that it can be used + * by chips which use other head-whatever.S files. + * + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu> + * Copyright (C) 1996 Paul Mackerras. + * Copyright (C) 1997 Dan Malek (dmalek@jlc.net). + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <asm/reg.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/cputable.h> +#include <asm/cache.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/ptrace.h> + +#ifdef CONFIG_VSX +#define REST_32FPVSRS(n,c,base) \ +BEGIN_FTR_SECTION \ + b 2f; \ +END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ + REST_32FPRS(n,base); \ + b 3f; \ +2: REST_32VSRS(n,c,base); \ +3: + +#define SAVE_32FPVSRS(n,c,base) \ +BEGIN_FTR_SECTION \ + b 2f; \ +END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ + SAVE_32FPRS(n,base); \ + b 3f; \ +2: SAVE_32VSRS(n,c,base); \ +3: +#else +#define REST_32FPVSRS(n,b,base) REST_32FPRS(n, base) +#define SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base) +#endif + +/* + * This task wants to use the FPU now. + * On UP, disable FP for the task which had the FPU previously, + * and save its floating-point registers in its thread_struct. + * Load up this task's FP registers from its thread_struct, + * enable the FPU for the current task and return to the task. + */ +_GLOBAL(load_up_fpu) + mfmsr r5 + ori r5,r5,MSR_FP +#ifdef CONFIG_VSX +BEGIN_FTR_SECTION + oris r5,r5,MSR_VSX@h +END_FTR_SECTION_IFSET(CPU_FTR_VSX) +#endif + SYNC + MTMSRD(r5) /* enable use of fpu now */ + isync +/* + * For SMP, we don't do lazy FPU switching because it just gets too + * horrendously complex, especially when a task switches from one CPU + * to another. Instead we call giveup_fpu in switch_to. + */ +#ifndef CONFIG_SMP + LOAD_REG_ADDRBASE(r3, last_task_used_math) + toreal(r3) + PPC_LL r4,ADDROFF(last_task_used_math)(r3) + PPC_LCMPI 0,r4,0 + beq 1f + toreal(r4) + addi r4,r4,THREAD /* want last_task_used_math->thread */ + SAVE_32FPVSRS(0, r5, r4) + mffs fr0 + stfd fr0,THREAD_FPSCR(r4) + PPC_LL r5,PT_REGS(r4) + toreal(r5) + PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5) + li r10,MSR_FP|MSR_FE0|MSR_FE1 + andc r4,r4,r10 /* disable FP for previous task */ + PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#endif /* CONFIG_SMP */ + /* enable use of FP after return */ +#ifdef CONFIG_PPC32 + mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ + lwz r4,THREAD_FPEXC_MODE(r5) + ori r9,r9,MSR_FP /* enable FP for current */ + or r9,r9,r4 +#else + ld r4,PACACURRENT(r13) + addi r5,r4,THREAD /* Get THREAD */ + lwz r4,THREAD_FPEXC_MODE(r5) + ori r12,r12,MSR_FP + or r12,r12,r4 + std r12,_MSR(r1) +#endif + lfd fr0,THREAD_FPSCR(r5) + MTFSF_L(fr0) + REST_32FPVSRS(0, r4, r5) +#ifndef CONFIG_SMP + subi r4,r5,THREAD + fromreal(r4) + PPC_STL r4,ADDROFF(last_task_used_math)(r3) +#endif /* CONFIG_SMP */ + /* restore registers and return */ + /* we haven't used ctr or xer or lr */ + blr + +/* + * giveup_fpu(tsk) + * Disable FP for the task given as the argument, + * and save the floating-point registers in its thread_struct. + * Enables the FPU for use in the kernel on return. + */ +_GLOBAL(giveup_fpu) + mfmsr r5 + ori r5,r5,MSR_FP +#ifdef CONFIG_VSX +BEGIN_FTR_SECTION + oris r5,r5,MSR_VSX@h +END_FTR_SECTION_IFSET(CPU_FTR_VSX) +#endif + SYNC_601 + ISYNC_601 + MTMSRD(r5) /* enable use of fpu now */ + SYNC_601 + isync + PPC_LCMPI 0,r3,0 + beqlr- /* if no previous owner, done */ + addi r3,r3,THREAD /* want THREAD of task */ + PPC_LL r5,PT_REGS(r3) + PPC_LCMPI 0,r5,0 + SAVE_32FPVSRS(0, r4 ,r3) + mffs fr0 + stfd fr0,THREAD_FPSCR(r3) + beq 1f + PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5) + li r3,MSR_FP|MSR_FE0|MSR_FE1 +#ifdef CONFIG_VSX +BEGIN_FTR_SECTION + oris r3,r3,MSR_VSX@h +END_FTR_SECTION_IFSET(CPU_FTR_VSX) +#endif + andc r4,r4,r3 /* disable FP for previous task */ + PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#ifndef CONFIG_SMP + li r5,0 + LOAD_REG_ADDRBASE(r4,last_task_used_math) + PPC_STL r5,ADDROFF(last_task_used_math)(r4) +#endif /* CONFIG_SMP */ + blr + +/* + * These are used in the alignment trap handler when emulating + * single-precision loads and stores. + */ + +_GLOBAL(cvt_fd) + lfs 0,0(r3) + stfd 0,0(r4) + blr + +_GLOBAL(cvt_df) + lfd 0,0(r3) + stfs 0,0(r4) + blr diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/fsl_booke_entry_mapping.S new file mode 100644 index 00000000..a92c79be --- /dev/null +++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S @@ -0,0 +1,235 @@ + +/* 1. Find the index of the entry we're executing in */ + bl invstr /* Find our address */ +invstr: mflr r6 /* Make it accessible */ + mfmsr r7 + rlwinm r4,r7,27,31,31 /* extract MSR[IS] */ + mfspr r7, SPRN_PID0 + slwi r7,r7,16 + or r7,r7,r4 + mtspr SPRN_MAS6,r7 + tlbsx 0,r6 /* search MSR[IS], SPID=PID0 */ + mfspr r7,SPRN_MAS1 + andis. r7,r7,MAS1_VALID@h + bne match_TLB + + mfspr r7,SPRN_MMUCFG + rlwinm r7,r7,21,28,31 /* extract MMUCFG[NPIDS] */ + cmpwi r7,3 + bne match_TLB /* skip if NPIDS != 3 */ + + mfspr r7,SPRN_PID1 + slwi r7,r7,16 + or r7,r7,r4 + mtspr SPRN_MAS6,r7 + tlbsx 0,r6 /* search MSR[IS], SPID=PID1 */ + mfspr r7,SPRN_MAS1 + andis. r7,r7,MAS1_VALID@h + bne match_TLB + mfspr r7, SPRN_PID2 + slwi r7,r7,16 + or r7,r7,r4 + mtspr SPRN_MAS6,r7 + tlbsx 0,r6 /* Fall through, we had to match */ + +match_TLB: + mfspr r7,SPRN_MAS0 + rlwinm r3,r7,16,20,31 /* Extract MAS0(Entry) */ + + mfspr r7,SPRN_MAS1 /* Insure IPROT set */ + oris r7,r7,MAS1_IPROT@h + mtspr SPRN_MAS1,r7 + tlbwe + +/* 2. Invalidate all entries except the entry we're executing in */ + mfspr r9,SPRN_TLB1CFG + andi. r9,r9,0xfff + li r6,0 /* Set Entry counter to 0 */ +1: lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */ + rlwimi r7,r6,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r6) */ + mtspr SPRN_MAS0,r7 + tlbre + mfspr r7,SPRN_MAS1 + rlwinm r7,r7,0,2,31 /* Clear MAS1 Valid and IPROT */ + cmpw r3,r6 + beq skpinv /* Dont update the current execution TLB */ + mtspr SPRN_MAS1,r7 + tlbwe + isync +skpinv: addi r6,r6,1 /* Increment */ + cmpw r6,r9 /* Are we done? */ + bne 1b /* If not, repeat */ + + /* Invalidate TLB0 */ + li r6,0x04 + tlbivax 0,r6 + TLBSYNC + /* Invalidate TLB1 */ + li r6,0x0c + tlbivax 0,r6 + TLBSYNC + +/* 3. Setup a temp mapping and jump to it */ + andi. r5, r3, 0x1 /* Find an entry not used and is non-zero */ + addi r5, r5, 0x1 + lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */ + rlwimi r7,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */ + mtspr SPRN_MAS0,r7 + tlbre + + /* grab and fixup the RPN */ + mfspr r6,SPRN_MAS1 /* extract MAS1[SIZE] */ + rlwinm r6,r6,25,27,31 + li r8,-1 + addi r6,r6,10 + slw r6,r8,r6 /* convert to mask */ + + bl 1f /* Find our address */ +1: mflr r7 + + mfspr r8,SPRN_MAS3 +#ifdef CONFIG_PHYS_64BIT + mfspr r23,SPRN_MAS7 +#endif + and r8,r6,r8 + subfic r9,r6,-4096 + and r9,r9,r7 + + or r25,r8,r9 + ori r8,r25,(MAS3_SX|MAS3_SW|MAS3_SR) + + /* Just modify the entry ID and EPN for the temp mapping */ + lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */ + rlwimi r7,r5,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r5) */ + mtspr SPRN_MAS0,r7 + xori r6,r4,1 /* Setup TMP mapping in the other Address space */ + slwi r6,r6,12 + oris r6,r6,(MAS1_VALID|MAS1_IPROT)@h + ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_4K))@l + mtspr SPRN_MAS1,r6 + mfspr r6,SPRN_MAS2 + li r7,0 /* temp EPN = 0 */ + rlwimi r7,r6,0,20,31 + mtspr SPRN_MAS2,r7 + mtspr SPRN_MAS3,r8 + tlbwe + + xori r6,r4,1 + slwi r6,r6,5 /* setup new context with other address space */ + bl 1f /* Find our address */ +1: mflr r9 + rlwimi r7,r9,0,20,31 + addi r7,r7,(2f - 1b) + mtspr SPRN_SRR0,r7 + mtspr SPRN_SRR1,r6 + rfi +2: +/* 4. Clear out PIDs & Search info */ + li r6,0 + mtspr SPRN_MAS6,r6 + mtspr SPRN_PID0,r6 + + mfspr r7,SPRN_MMUCFG + rlwinm r7,r7,21,28,31 /* extract MMUCFG[NPIDS] */ + cmpwi r7,3 + bne 2f /* skip if NPIDS != 3 */ + + mtspr SPRN_PID1,r6 + mtspr SPRN_PID2,r6 + +/* 5. Invalidate mapping we started in */ +2: + lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */ + rlwimi r7,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */ + mtspr SPRN_MAS0,r7 + tlbre + mfspr r6,SPRN_MAS1 + rlwinm r6,r6,0,2,0 /* clear IPROT */ + mtspr SPRN_MAS1,r6 + tlbwe + /* Invalidate TLB1 */ + li r9,0x0c + tlbivax 0,r9 + TLBSYNC + +/* The mapping only needs to be cache-coherent on SMP */ +#ifdef CONFIG_SMP +#define M_IF_SMP MAS2_M +#else +#define M_IF_SMP 0 +#endif + +#if defined(ENTRY_MAPPING_BOOT_SETUP) + +/* 6. Setup KERNELBASE mapping in TLB1[0] */ + lis r6,0x1000 /* Set MAS0(TLBSEL) = TLB1(1), ESEL = 0 */ + mtspr SPRN_MAS0,r6 + lis r6,(MAS1_VALID|MAS1_IPROT)@h + ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_64M))@l + mtspr SPRN_MAS1,r6 + lis r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_SMP)@h + ori r6,r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_SMP)@l + mtspr SPRN_MAS2,r6 + mtspr SPRN_MAS3,r8 + tlbwe + +/* 7. Jump to KERNELBASE mapping */ + lis r6,(KERNELBASE & ~0xfff)@h + ori r6,r6,(KERNELBASE & ~0xfff)@l + +#elif defined(ENTRY_MAPPING_KEXEC_SETUP) +/* + * 6. Setup a 1:1 mapping in TLB1. Esel 0 is unsued, 1 or 2 contains the tmp + * mapping so we start at 3. We setup 8 mappings, each 256MiB in size. This + * will cover the first 2GiB of memory. + */ + + lis r10, (MAS1_VALID|MAS1_IPROT)@h + ori r10,r10, (MAS1_TSIZE(BOOK3E_PAGESZ_256M))@l + li r11, 0 + li r0, 8 + mtctr r0 + +next_tlb_setup: + addi r0, r11, 3 + rlwinm r0, r0, 16, 4, 15 // Compute esel + rlwinm r9, r11, 28, 0, 3 // Compute [ER]PN + oris r0, r0, (MAS0_TLBSEL(1))@h + mtspr SPRN_MAS0,r0 + mtspr SPRN_MAS1,r10 + mtspr SPRN_MAS2,r9 + ori r9, r9, (MAS3_SX|MAS3_SW|MAS3_SR) + mtspr SPRN_MAS3,r9 + tlbwe + addi r11, r11, 1 + bdnz+ next_tlb_setup + +/* 7. Jump to our 1:1 mapping */ + mr r6, r25 +#else + #error You need to specify the mapping or not use this at all. +#endif + + lis r7,MSR_KERNEL@h + ori r7,r7,MSR_KERNEL@l + bl 1f /* Find our address */ +1: mflr r9 + rlwimi r6,r9,0,20,31 + addi r6,r6,(2f - 1b) + mtspr SPRN_SRR0,r6 + mtspr SPRN_SRR1,r7 + rfi /* start execution out of TLB1[0] entry */ + +/* 8. Clear out the temp mapping */ +2: lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */ + rlwimi r7,r5,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r5) */ + mtspr SPRN_MAS0,r7 + tlbre + mfspr r8,SPRN_MAS1 + rlwinm r8,r8,0,2,0 /* clear IPROT */ + mtspr SPRN_MAS1,r8 + tlbwe + /* Invalidate TLB1 */ + li r9,0x0c + tlbivax 0,r9 + TLBSYNC diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c new file mode 100644 index 00000000..bf99cfa6 --- /dev/null +++ b/arch/powerpc/kernel/ftrace.c @@ -0,0 +1,610 @@ +/* + * Code for replacing ftrace calls with jumps. + * + * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> + * + * Thanks goes out to P.A. Semi, Inc for supplying me with a PPC64 box. + * + * Added function graph tracer code, taken from x86 that was written + * by Frederic Weisbecker, and ported to PPC by Steven Rostedt. + * + */ + +#include <linux/spinlock.h> +#include <linux/hardirq.h> +#include <linux/uaccess.h> +#include <linux/module.h> +#include <linux/ftrace.h> +#include <linux/percpu.h> +#include <linux/init.h> +#include <linux/list.h> + +#include <asm/cacheflush.h> +#include <asm/code-patching.h> +#include <asm/ftrace.h> +#include <asm/syscall.h> + + +#ifdef CONFIG_DYNAMIC_FTRACE +static unsigned int +ftrace_call_replace(unsigned long ip, unsigned long addr, int link) +{ + unsigned int op; + + addr = ppc_function_entry((void *)addr); + + /* if (link) set op to 'bl' else 'b' */ + op = create_branch((unsigned int *)ip, addr, link ? 1 : 0); + + return op; +} + +static int +ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new) +{ + unsigned int replaced; + + /* + * Note: Due to modules and __init, code can + * disappear and change, we need to protect against faulting + * as well as code changing. We do this by using the + * probe_kernel_* functions. + * + * No real locking needed, this code is run through + * kstop_machine, or before SMP starts. + */ + + /* read the text we want to modify */ + if (probe_kernel_read(&replaced, (void *)ip, MCOUNT_INSN_SIZE)) + return -EFAULT; + + /* Make sure it is what we expect it to be */ + if (replaced != old) + return -EINVAL; + + /* replace the text with the new text */ + if (probe_kernel_write((void *)ip, &new, MCOUNT_INSN_SIZE)) + return -EPERM; + + flush_icache_range(ip, ip + 8); + + return 0; +} + +/* + * Helper functions that are the same for both PPC64 and PPC32. + */ +static int test_24bit_addr(unsigned long ip, unsigned long addr) +{ + + /* use the create_branch to verify that this offset can be branched */ + return create_branch((unsigned int *)ip, addr, 0); +} + +#ifdef CONFIG_MODULES + +static int is_bl_op(unsigned int op) +{ + return (op & 0xfc000003) == 0x48000001; +} + +static unsigned long find_bl_target(unsigned long ip, unsigned int op) +{ + static int offset; + + offset = (op & 0x03fffffc); + /* make it signed */ + if (offset & 0x02000000) + offset |= 0xfe000000; + + return ip + (long)offset; +} + +#ifdef CONFIG_PPC64 +static int +__ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned int op; + unsigned int jmp[5]; + unsigned long ptr; + unsigned long ip = rec->ip; + unsigned long tramp; + int offset; + + /* read where this goes */ + if (probe_kernel_read(&op, (void *)ip, sizeof(int))) + return -EFAULT; + + /* Make sure that that this is still a 24bit jump */ + if (!is_bl_op(op)) { + printk(KERN_ERR "Not expected bl: opcode is %x\n", op); + return -EINVAL; + } + + /* lets find where the pointer goes */ + tramp = find_bl_target(ip, op); + + /* + * On PPC64 the trampoline looks like: + * 0x3d, 0x82, 0x00, 0x00, addis r12,r2, <high> + * 0x39, 0x8c, 0x00, 0x00, addi r12,r12, <low> + * Where the bytes 2,3,6 and 7 make up the 32bit offset + * to the TOC that holds the pointer. + * to jump to. + * 0xf8, 0x41, 0x00, 0x28, std r2,40(r1) + * 0xe9, 0x6c, 0x00, 0x20, ld r11,32(r12) + * The actually address is 32 bytes from the offset + * into the TOC. + * 0xe8, 0x4c, 0x00, 0x28, ld r2,40(r12) + */ + + pr_devel("ip:%lx jumps to %lx r2: %lx", ip, tramp, mod->arch.toc); + + /* Find where the trampoline jumps to */ + if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) { + printk(KERN_ERR "Failed to read %lx\n", tramp); + return -EFAULT; + } + + pr_devel(" %08x %08x", jmp[0], jmp[1]); + + /* verify that this is what we expect it to be */ + if (((jmp[0] & 0xffff0000) != 0x3d820000) || + ((jmp[1] & 0xffff0000) != 0x398c0000) || + (jmp[2] != 0xf8410028) || + (jmp[3] != 0xe96c0020) || + (jmp[4] != 0xe84c0028)) { + printk(KERN_ERR "Not a trampoline\n"); + return -EINVAL; + } + + /* The bottom half is signed extended */ + offset = ((unsigned)((unsigned short)jmp[0]) << 16) + + (int)((short)jmp[1]); + + pr_devel(" %x ", offset); + + /* get the address this jumps too */ + tramp = mod->arch.toc + offset + 32; + pr_devel("toc: %lx", tramp); + + if (probe_kernel_read(jmp, (void *)tramp, 8)) { + printk(KERN_ERR "Failed to read %lx\n", tramp); + return -EFAULT; + } + + pr_devel(" %08x %08x\n", jmp[0], jmp[1]); + + ptr = ((unsigned long)jmp[0] << 32) + jmp[1]; + + /* This should match what was called */ + if (ptr != ppc_function_entry((void *)addr)) { + printk(KERN_ERR "addr does not match %lx\n", ptr); + return -EINVAL; + } + + /* + * We want to nop the line, but the next line is + * 0xe8, 0x41, 0x00, 0x28 ld r2,40(r1) + * This needs to be turned to a nop too. + */ + if (probe_kernel_read(&op, (void *)(ip+4), MCOUNT_INSN_SIZE)) + return -EFAULT; + + if (op != 0xe8410028) { + printk(KERN_ERR "Next line is not ld! (%08x)\n", op); + return -EINVAL; + } + + /* + * Milton Miller pointed out that we can not blindly do nops. + * If a task was preempted when calling a trace function, + * the nops will remove the way to restore the TOC in r2 + * and the r2 TOC will get corrupted. + */ + + /* + * Replace: + * bl <tramp> <==== will be replaced with "b 1f" + * ld r2,40(r1) + * 1: + */ + op = 0x48000008; /* b +8 */ + + if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE)) + return -EPERM; + + + flush_icache_range(ip, ip + 8); + + return 0; +} + +#else /* !PPC64 */ +static int +__ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned int op; + unsigned int jmp[4]; + unsigned long ip = rec->ip; + unsigned long tramp; + + if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE)) + return -EFAULT; + + /* Make sure that that this is still a 24bit jump */ + if (!is_bl_op(op)) { + printk(KERN_ERR "Not expected bl: opcode is %x\n", op); + return -EINVAL; + } + + /* lets find where the pointer goes */ + tramp = find_bl_target(ip, op); + + /* + * On PPC32 the trampoline looks like: + * 0x3d, 0x60, 0x00, 0x00 lis r11,sym@ha + * 0x39, 0x6b, 0x00, 0x00 addi r11,r11,sym@l + * 0x7d, 0x69, 0x03, 0xa6 mtctr r11 + * 0x4e, 0x80, 0x04, 0x20 bctr + */ + + pr_devel("ip:%lx jumps to %lx", ip, tramp); + + /* Find where the trampoline jumps to */ + if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) { + printk(KERN_ERR "Failed to read %lx\n", tramp); + return -EFAULT; + } + + pr_devel(" %08x %08x ", jmp[0], jmp[1]); + + /* verify that this is what we expect it to be */ + if (((jmp[0] & 0xffff0000) != 0x3d600000) || + ((jmp[1] & 0xffff0000) != 0x396b0000) || + (jmp[2] != 0x7d6903a6) || + (jmp[3] != 0x4e800420)) { + printk(KERN_ERR "Not a trampoline\n"); + return -EINVAL; + } + + tramp = (jmp[1] & 0xffff) | + ((jmp[0] & 0xffff) << 16); + if (tramp & 0x8000) + tramp -= 0x10000; + + pr_devel(" %lx ", tramp); + + if (tramp != addr) { + printk(KERN_ERR + "Trampoline location %08lx does not match addr\n", + tramp); + return -EINVAL; + } + + op = PPC_INST_NOP; + + if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE)) + return -EPERM; + + flush_icache_range(ip, ip + 8); + + return 0; +} +#endif /* PPC64 */ +#endif /* CONFIG_MODULES */ + +int ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long ip = rec->ip; + unsigned int old, new; + + /* + * If the calling address is more that 24 bits away, + * then we had to use a trampoline to make the call. + * Otherwise just update the call site. + */ + if (test_24bit_addr(ip, addr)) { + /* within range */ + old = ftrace_call_replace(ip, addr, 1); + new = PPC_INST_NOP; + return ftrace_modify_code(ip, old, new); + } + +#ifdef CONFIG_MODULES + /* + * Out of range jumps are called from modules. + * We should either already have a pointer to the module + * or it has been passed in. + */ + if (!rec->arch.mod) { + if (!mod) { + printk(KERN_ERR "No module loaded addr=%lx\n", + addr); + return -EFAULT; + } + rec->arch.mod = mod; + } else if (mod) { + if (mod != rec->arch.mod) { + printk(KERN_ERR + "Record mod %p not equal to passed in mod %p\n", + rec->arch.mod, mod); + return -EINVAL; + } + /* nothing to do if mod == rec->arch.mod */ + } else + mod = rec->arch.mod; + + return __ftrace_make_nop(mod, rec, addr); +#else + /* We should not get here without modules */ + return -EINVAL; +#endif /* CONFIG_MODULES */ +} + +#ifdef CONFIG_MODULES +#ifdef CONFIG_PPC64 +static int +__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned int op[2]; + unsigned long ip = rec->ip; + + /* read where this goes */ + if (probe_kernel_read(op, (void *)ip, MCOUNT_INSN_SIZE * 2)) + return -EFAULT; + + /* + * It should be pointing to two nops or + * b +8; ld r2,40(r1) + */ + if (((op[0] != 0x48000008) || (op[1] != 0xe8410028)) && + ((op[0] != PPC_INST_NOP) || (op[1] != PPC_INST_NOP))) { + printk(KERN_ERR "Expected NOPs but have %x %x\n", op[0], op[1]); + return -EINVAL; + } + + /* If we never set up a trampoline to ftrace_caller, then bail */ + if (!rec->arch.mod->arch.tramp) { + printk(KERN_ERR "No ftrace trampoline\n"); + return -EINVAL; + } + + /* create the branch to the trampoline */ + op[0] = create_branch((unsigned int *)ip, + rec->arch.mod->arch.tramp, BRANCH_SET_LINK); + if (!op[0]) { + printk(KERN_ERR "REL24 out of range!\n"); + return -EINVAL; + } + + /* ld r2,40(r1) */ + op[1] = 0xe8410028; + + pr_devel("write to %lx\n", rec->ip); + + if (probe_kernel_write((void *)ip, op, MCOUNT_INSN_SIZE * 2)) + return -EPERM; + + flush_icache_range(ip, ip + 8); + + return 0; +} +#else +static int +__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned int op; + unsigned long ip = rec->ip; + + /* read where this goes */ + if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE)) + return -EFAULT; + + /* It should be pointing to a nop */ + if (op != PPC_INST_NOP) { + printk(KERN_ERR "Expected NOP but have %x\n", op); + return -EINVAL; + } + + /* If we never set up a trampoline to ftrace_caller, then bail */ + if (!rec->arch.mod->arch.tramp) { + printk(KERN_ERR "No ftrace trampoline\n"); + return -EINVAL; + } + + /* create the branch to the trampoline */ + op = create_branch((unsigned int *)ip, + rec->arch.mod->arch.tramp, BRANCH_SET_LINK); + if (!op) { + printk(KERN_ERR "REL24 out of range!\n"); + return -EINVAL; + } + + pr_devel("write to %lx\n", rec->ip); + + if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE)) + return -EPERM; + + flush_icache_range(ip, ip + 8); + + return 0; +} +#endif /* CONFIG_PPC64 */ +#endif /* CONFIG_MODULES */ + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long ip = rec->ip; + unsigned int old, new; + + /* + * If the calling address is more that 24 bits away, + * then we had to use a trampoline to make the call. + * Otherwise just update the call site. + */ + if (test_24bit_addr(ip, addr)) { + /* within range */ + old = PPC_INST_NOP; + new = ftrace_call_replace(ip, addr, 1); + return ftrace_modify_code(ip, old, new); + } + +#ifdef CONFIG_MODULES + /* + * Out of range jumps are called from modules. + * Being that we are converting from nop, it had better + * already have a module defined. + */ + if (!rec->arch.mod) { + printk(KERN_ERR "No module loaded\n"); + return -EINVAL; + } + + return __ftrace_make_call(rec, addr); +#else + /* We should not get here without modules */ + return -EINVAL; +#endif /* CONFIG_MODULES */ +} + +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + unsigned long ip = (unsigned long)(&ftrace_call); + unsigned int old, new; + int ret; + + old = *(unsigned int *)&ftrace_call; + new = ftrace_call_replace(ip, (unsigned long)func, 1); + ret = ftrace_modify_code(ip, old, new); + + return ret; +} + +int __init ftrace_dyn_arch_init(void *data) +{ + /* caller expects data to be zero */ + unsigned long *p = data; + + *p = 0; + + return 0; +} +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + +#ifdef CONFIG_DYNAMIC_FTRACE +extern void ftrace_graph_call(void); +extern void ftrace_graph_stub(void); + +int ftrace_enable_ftrace_graph_caller(void) +{ + unsigned long ip = (unsigned long)(&ftrace_graph_call); + unsigned long addr = (unsigned long)(&ftrace_graph_caller); + unsigned long stub = (unsigned long)(&ftrace_graph_stub); + unsigned int old, new; + + old = ftrace_call_replace(ip, stub, 0); + new = ftrace_call_replace(ip, addr, 0); + + return ftrace_modify_code(ip, old, new); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + unsigned long ip = (unsigned long)(&ftrace_graph_call); + unsigned long addr = (unsigned long)(&ftrace_graph_caller); + unsigned long stub = (unsigned long)(&ftrace_graph_stub); + unsigned int old, new; + + old = ftrace_call_replace(ip, addr, 0); + new = ftrace_call_replace(ip, stub, 0); + + return ftrace_modify_code(ip, old, new); +} +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_PPC64 +extern void mod_return_to_handler(void); +#endif + +/* + * Hook the return address and push it in the stack of return addrs + * in current thread info. + */ +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) +{ + unsigned long old; + int faulted; + struct ftrace_graph_ent trace; + unsigned long return_hooker = (unsigned long)&return_to_handler; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + +#ifdef CONFIG_PPC64 + /* non core kernel code needs to save and restore the TOC */ + if (REGION_ID(self_addr) != KERNEL_REGION_ID) + return_hooker = (unsigned long)&mod_return_to_handler; +#endif + + return_hooker = ppc_function_entry((void *)return_hooker); + + /* + * Protect against fault, even if it shouldn't + * happen. This tool is too much intrusive to + * ignore such a protection. + */ + asm volatile( + "1: " PPC_LL "%[old], 0(%[parent])\n" + "2: " PPC_STL "%[return_hooker], 0(%[parent])\n" + " li %[faulted], 0\n" + "3:\n" + + ".section .fixup, \"ax\"\n" + "4: li %[faulted], 1\n" + " b 3b\n" + ".previous\n" + + ".section __ex_table,\"a\"\n" + PPC_LONG_ALIGN "\n" + PPC_LONG "1b,4b\n" + PPC_LONG "2b,4b\n" + ".previous" + + : [old] "=&r" (old), [faulted] "=r" (faulted) + : [parent] "r" (parent), [return_hooker] "r" (return_hooker) + : "memory" + ); + + if (unlikely(faulted)) { + ftrace_graph_stop(); + WARN_ON(1); + return; + } + + if (ftrace_push_return_trace(old, self_addr, &trace.depth, 0) == -EBUSY) { + *parent = old; + return; + } + + trace.func = self_addr; + + /* Only trace if the calling function expects to */ + if (!ftrace_graph_entry(&trace)) { + current->curr_ret_stack--; + *parent = old; + } +} +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +#if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PPC64) +unsigned long __init arch_syscall_addr(int nr) +{ + return sys_call_table[nr*2]; +} +#endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S new file mode 100644 index 00000000..dc0488b6 --- /dev/null +++ b/arch/powerpc/kernel/head_32.S @@ -0,0 +1,1296 @@ +/* + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP + * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu> + * Adapted for Power Macintosh by Paul Mackerras. + * Low-level exception handlers and MMU support + * rewritten by Paul Mackerras. + * Copyright (C) 1996 Paul Mackerras. + * MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net). + * + * This file contains the low-level support and setup for the + * PowerPC platform, including trap and interrupt dispatch. + * (The PPC 8xx embedded CPUs use head_8xx.S instead.) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/init.h> +#include <asm/reg.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/cputable.h> +#include <asm/cache.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/ptrace.h> +#include <asm/bug.h> +#include <asm/kvm_book3s_asm.h> + +/* 601 only have IBAT; cr0.eq is set on 601 when using this macro */ +#define LOAD_BAT(n, reg, RA, RB) \ + /* see the comment for clear_bats() -- Cort */ \ + li RA,0; \ + mtspr SPRN_IBAT##n##U,RA; \ + mtspr SPRN_DBAT##n##U,RA; \ + lwz RA,(n*16)+0(reg); \ + lwz RB,(n*16)+4(reg); \ + mtspr SPRN_IBAT##n##U,RA; \ + mtspr SPRN_IBAT##n##L,RB; \ + beq 1f; \ + lwz RA,(n*16)+8(reg); \ + lwz RB,(n*16)+12(reg); \ + mtspr SPRN_DBAT##n##U,RA; \ + mtspr SPRN_DBAT##n##L,RB; \ +1: + + __HEAD + .stabs "arch/powerpc/kernel/",N_SO,0,0,0f + .stabs "head_32.S",N_SO,0,0,0f +0: +_ENTRY(_stext); + +/* + * _start is defined this way because the XCOFF loader in the OpenFirmware + * on the powermac expects the entry point to be a procedure descriptor. + */ +_ENTRY(_start); + /* + * These are here for legacy reasons, the kernel used to + * need to look like a coff function entry for the pmac + * but we're always started by some kind of bootloader now. + * -- Cort + */ + nop /* used by __secondary_hold on prep (mtx) and chrp smp */ + nop /* used by __secondary_hold on prep (mtx) and chrp smp */ + nop + +/* PMAC + * Enter here with the kernel text, data and bss loaded starting at + * 0, running with virtual == physical mapping. + * r5 points to the prom entry point (the client interface handler + * address). Address translation is turned on, with the prom + * managing the hash table. Interrupts are disabled. The stack + * pointer (r1) points to just below the end of the half-meg region + * from 0x380000 - 0x400000, which is mapped in already. + * + * If we are booted from MacOS via BootX, we enter with the kernel + * image loaded somewhere, and the following values in registers: + * r3: 'BooX' (0x426f6f58) + * r4: virtual address of boot_infos_t + * r5: 0 + * + * PREP + * This is jumped to on prep systems right after the kernel is relocated + * to its proper place in memory by the boot loader. The expected layout + * of the regs is: + * r3: ptr to residual data + * r4: initrd_start or if no initrd then 0 + * r5: initrd_end - unused if r4 is 0 + * r6: Start of command line string + * r7: End of command line string + * + * This just gets a minimal mmu environment setup so we can call + * start_here() to do the real work. + * -- Cort + */ + + .globl __start +__start: +/* + * We have to do any OF calls before we map ourselves to KERNELBASE, + * because OF may have I/O devices mapped into that area + * (particularly on CHRP). + */ + cmpwi 0,r5,0 + beq 1f + +#ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE + /* find out where we are now */ + bcl 20,31,$+4 +0: mflr r8 /* r8 = runtime addr here */ + addis r8,r8,(_stext - 0b)@ha + addi r8,r8,(_stext - 0b)@l /* current runtime base addr */ + bl prom_init +#endif /* CONFIG_PPC_OF_BOOT_TRAMPOLINE */ + + /* We never return. We also hit that trap if trying to boot + * from OF while CONFIG_PPC_OF_BOOT_TRAMPOLINE isn't selected */ + trap + +/* + * Check for BootX signature when supporting PowerMac and branch to + * appropriate trampoline if it's present + */ +#ifdef CONFIG_PPC_PMAC +1: lis r31,0x426f + ori r31,r31,0x6f58 + cmpw 0,r3,r31 + bne 1f + bl bootx_init + trap +#endif /* CONFIG_PPC_PMAC */ + +1: mr r31,r3 /* save device tree ptr */ + li r24,0 /* cpu # */ + +/* + * early_init() does the early machine identification and does + * the necessary low-level setup and clears the BSS + * -- Cort <cort@fsmlabs.com> + */ + bl early_init + +/* Switch MMU off, clear BATs and flush TLB. At this point, r3 contains + * the physical address we are running at, returned by early_init() + */ + bl mmu_off +__after_mmu_off: + bl clear_bats + bl flush_tlbs + + bl initial_bats +#if defined(CONFIG_BOOTX_TEXT) + bl setup_disp_bat +#endif +#ifdef CONFIG_PPC_EARLY_DEBUG_CPM + bl setup_cpm_bat +#endif +#ifdef CONFIG_PPC_EARLY_DEBUG_USBGECKO + bl setup_usbgecko_bat +#endif + +/* + * Call setup_cpu for CPU 0 and initialize 6xx Idle + */ + bl reloc_offset + li r24,0 /* cpu# */ + bl call_setup_cpu /* Call setup_cpu for this CPU */ +#ifdef CONFIG_6xx + bl reloc_offset + bl init_idle_6xx +#endif /* CONFIG_6xx */ + + +/* + * We need to run with _start at physical address 0. + * On CHRP, we are loaded at 0x10000 since OF on CHRP uses + * the exception vectors at 0 (and therefore this copy + * overwrites OF's exception vectors with our own). + * The MMU is off at this point. + */ + bl reloc_offset + mr r26,r3 + addis r4,r3,KERNELBASE@h /* current address of _start */ + lis r5,PHYSICAL_START@h + cmplw 0,r4,r5 /* already running at PHYSICAL_START? */ + bne relocate_kernel +/* + * we now have the 1st 16M of ram mapped with the bats. + * prep needs the mmu to be turned on here, but pmac already has it on. + * this shouldn't bother the pmac since it just gets turned on again + * as we jump to our code at KERNELBASE. -- Cort + * Actually no, pmac doesn't have it on any more. BootX enters with MMU + * off, and in other cases, we now turn it off before changing BATs above. + */ +turn_on_mmu: + mfmsr r0 + ori r0,r0,MSR_DR|MSR_IR + mtspr SPRN_SRR1,r0 + lis r0,start_here@h + ori r0,r0,start_here@l + mtspr SPRN_SRR0,r0 + SYNC + RFI /* enables MMU */ + +/* + * We need __secondary_hold as a place to hold the other cpus on + * an SMP machine, even when we are running a UP kernel. + */ + . = 0xc0 /* for prep bootloader */ + li r3,1 /* MTX only has 1 cpu */ + .globl __secondary_hold +__secondary_hold: + /* tell the master we're here */ + stw r3,__secondary_hold_acknowledge@l(0) +#ifdef CONFIG_SMP +100: lwz r4,0(0) + /* wait until we're told to start */ + cmpw 0,r4,r3 + bne 100b + /* our cpu # was at addr 0 - go */ + mr r24,r3 /* cpu # */ + b __secondary_start +#else + b . +#endif /* CONFIG_SMP */ + + .globl __secondary_hold_spinloop +__secondary_hold_spinloop: + .long 0 + .globl __secondary_hold_acknowledge +__secondary_hold_acknowledge: + .long -1 + +/* + * Exception entry code. This code runs with address translation + * turned off, i.e. using physical addresses. + * We assume sprg3 has the physical address of the current + * task's thread_struct. + */ +#define EXCEPTION_PROLOG \ + mtspr SPRN_SPRG_SCRATCH0,r10; \ + mtspr SPRN_SPRG_SCRATCH1,r11; \ + mfcr r10; \ + EXCEPTION_PROLOG_1; \ + EXCEPTION_PROLOG_2 + +#define EXCEPTION_PROLOG_1 \ + mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \ + andi. r11,r11,MSR_PR; \ + tophys(r11,r1); /* use tophys(r1) if kernel */ \ + beq 1f; \ + mfspr r11,SPRN_SPRG_THREAD; \ + lwz r11,THREAD_INFO-THREAD(r11); \ + addi r11,r11,THREAD_SIZE; \ + tophys(r11,r11); \ +1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */ + + +#define EXCEPTION_PROLOG_2 \ + CLR_TOP32(r11); \ + stw r10,_CCR(r11); /* save registers */ \ + stw r12,GPR12(r11); \ + stw r9,GPR9(r11); \ + mfspr r10,SPRN_SPRG_SCRATCH0; \ + stw r10,GPR10(r11); \ + mfspr r12,SPRN_SPRG_SCRATCH1; \ + stw r12,GPR11(r11); \ + mflr r10; \ + stw r10,_LINK(r11); \ + mfspr r12,SPRN_SRR0; \ + mfspr r9,SPRN_SRR1; \ + stw r1,GPR1(r11); \ + stw r1,0(r11); \ + tovirt(r1,r11); /* set new kernel sp */ \ + li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \ + MTMSRD(r10); /* (except for mach check in rtas) */ \ + stw r0,GPR0(r11); \ + lis r10,STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */ \ + addi r10,r10,STACK_FRAME_REGS_MARKER@l; \ + stw r10,8(r11); \ + SAVE_4GPRS(3, r11); \ + SAVE_2GPRS(7, r11) + +/* + * Note: code which follows this uses cr0.eq (set if from kernel), + * r11, r12 (SRR0), and r9 (SRR1). + * + * Note2: once we have set r1 we are in a position to take exceptions + * again, and we could thus set MSR:RI at that point. + */ + +/* + * Exception vectors. + */ +#define EXCEPTION(n, label, hdlr, xfer) \ + . = n; \ + DO_KVM n; \ +label: \ + EXCEPTION_PROLOG; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + xfer(n, hdlr) + +#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret) \ + li r10,trap; \ + stw r10,_TRAP(r11); \ + li r10,MSR_KERNEL; \ + copyee(r10, r9); \ + bl tfer; \ +i##n: \ + .long hdlr; \ + .long ret + +#define COPY_EE(d, s) rlwimi d,s,0,16,16 +#define NOCOPY(d, s) + +#define EXC_XFER_STD(n, hdlr) \ + EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \ + ret_from_except_full) + +#define EXC_XFER_LITE(n, hdlr) \ + EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \ + ret_from_except) + +#define EXC_XFER_EE(n, hdlr) \ + EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE, transfer_to_handler_full, \ + ret_from_except_full) + +#define EXC_XFER_EE_LITE(n, hdlr) \ + EXC_XFER_TEMPLATE(n, hdlr, n+1, COPY_EE, transfer_to_handler, \ + ret_from_except) + +/* System reset */ +/* core99 pmac starts the seconary here by changing the vector, and + putting it back to what it was (unknown_exception) when done. */ + EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD) + +/* Machine check */ +/* + * On CHRP, this is complicated by the fact that we could get a + * machine check inside RTAS, and we have no guarantee that certain + * critical registers will have the values we expect. The set of + * registers that might have bad values includes all the GPRs + * and all the BATs. We indicate that we are in RTAS by putting + * a non-zero value, the address of the exception frame to use, + * in SPRG2. The machine check handler checks SPRG2 and uses its + * value if it is non-zero. If we ever needed to free up SPRG2, + * we could use a field in the thread_info or thread_struct instead. + * (Other exception handlers assume that r1 is a valid kernel stack + * pointer when we take an exception from supervisor mode.) + * -- paulus. + */ + . = 0x200 + DO_KVM 0x200 + mtspr SPRN_SPRG_SCRATCH0,r10 + mtspr SPRN_SPRG_SCRATCH1,r11 + mfcr r10 +#ifdef CONFIG_PPC_CHRP + mfspr r11,SPRN_SPRG_RTAS + cmpwi 0,r11,0 + bne 7f +#endif /* CONFIG_PPC_CHRP */ + EXCEPTION_PROLOG_1 +7: EXCEPTION_PROLOG_2 + addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef CONFIG_PPC_CHRP + mfspr r4,SPRN_SPRG_RTAS + cmpwi cr1,r4,0 + bne cr1,1f +#endif + EXC_XFER_STD(0x200, machine_check_exception) +#ifdef CONFIG_PPC_CHRP +1: b machine_check_in_rtas +#endif + +/* Data access exception. */ + . = 0x300 + DO_KVM 0x300 +DataAccess: + EXCEPTION_PROLOG + mfspr r10,SPRN_DSISR + stw r10,_DSISR(r11) + andis. r0,r10,0xa470 /* weird error? */ + bne 1f /* if not, try to put a PTE */ + mfspr r4,SPRN_DAR /* into the hash table */ + rlwinm r3,r10,32-15,21,21 /* DSISR_STORE -> _PAGE_RW */ + bl hash_page +1: lwz r5,_DSISR(r11) /* get DSISR value */ + mfspr r4,SPRN_DAR + EXC_XFER_LITE(0x300, handle_page_fault) + + +/* Instruction access exception. */ + . = 0x400 + DO_KVM 0x400 +InstructionAccess: + EXCEPTION_PROLOG + andis. r0,r9,0x4000 /* no pte found? */ + beq 1f /* if so, try to put a PTE */ + li r3,0 /* into the hash table */ + mr r4,r12 /* SRR0 is fault address */ + bl hash_page +1: mr r4,r12 + mr r5,r9 + EXC_XFER_LITE(0x400, handle_page_fault) + +/* External interrupt */ + EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) + +/* Alignment exception */ + . = 0x600 + DO_KVM 0x600 +Alignment: + EXCEPTION_PROLOG + mfspr r4,SPRN_DAR + stw r4,_DAR(r11) + mfspr r5,SPRN_DSISR + stw r5,_DSISR(r11) + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_EE(0x600, alignment_exception) + +/* Program check exception */ + EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) + +/* Floating-point unavailable */ + . = 0x800 + DO_KVM 0x800 +FPUnavailable: +BEGIN_FTR_SECTION +/* + * Certain Freescale cores don't have a FPU and treat fp instructions + * as a FP Unavailable exception. Redirect to illegal/emulation handling. + */ + b ProgramCheck +END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) + EXCEPTION_PROLOG + beq 1f + bl load_up_fpu /* if from user, just load it up */ + b fast_exception_return +1: addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception) + +/* Decrementer */ + EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) + + EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE) + +/* System call */ + . = 0xc00 + DO_KVM 0xc00 +SystemCall: + EXCEPTION_PROLOG + EXC_XFER_EE_LITE(0xc00, DoSyscall) + +/* Single step - not used on 601 */ + EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) + EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE) + +/* + * The Altivec unavailable trap is at 0x0f20. Foo. + * We effectively remap it to 0x3000. + * We include an altivec unavailable exception vector even if + * not configured for Altivec, so that you can't panic a + * non-altivec kernel running on a machine with altivec just + * by executing an altivec instruction. + */ + . = 0xf00 + DO_KVM 0xf00 + b PerformanceMonitor + + . = 0xf20 + DO_KVM 0xf20 + b AltiVecUnavailable + +/* + * Handle TLB miss for instruction on 603/603e. + * Note: we get an alternate set of r0 - r3 to use automatically. + */ + . = 0x1000 +InstructionTLBMiss: +/* + * r0: scratch + * r1: linux style pte ( later becomes ppc hardware pte ) + * r2: ptr to linux-style pte + * r3: scratch + */ + /* Get PTE (linux-style) and check access */ + mfspr r3,SPRN_IMISS + lis r1,PAGE_OFFSET@h /* check if kernel address */ + cmplw 0,r1,r3 + mfspr r2,SPRN_SPRG_THREAD + li r1,_PAGE_USER|_PAGE_PRESENT /* low addresses tested as user */ + lwz r2,PGDIR(r2) + bge- 112f + mfspr r2,SPRN_SRR1 /* and MSR_PR bit from SRR1 */ + rlwimi r1,r2,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */ + lis r2,swapper_pg_dir@ha /* if kernel address, use */ + addi r2,r2,swapper_pg_dir@l /* kernel page table */ +112: tophys(r2,r2) + rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r2) /* get pmd entry */ + rlwinm. r2,r2,0,0,19 /* extract address of pte page */ + beq- InstructionAddressInvalid /* return if no mapping */ + rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ + lwz r0,0(r2) /* get linux-style pte */ + andc. r1,r1,r0 /* check access & ~permission */ + bne- InstructionAddressInvalid /* return if access not permitted */ + ori r0,r0,_PAGE_ACCESSED /* set _PAGE_ACCESSED in pte */ + /* + * NOTE! We are assuming this is not an SMP system, otherwise + * we would need to update the pte atomically with lwarx/stwcx. + */ + stw r0,0(r2) /* update PTE (accessed bit) */ + /* Convert linux-style PTE to low word of PPC-style PTE */ + rlwinm r1,r0,32-10,31,31 /* _PAGE_RW -> PP lsb */ + rlwinm r2,r0,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */ + and r1,r1,r2 /* writable if _RW and _DIRTY */ + rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ + rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */ + ori r1,r1,0xe04 /* clear out reserved bits */ + andc r1,r0,r1 /* PP = user? (rw&dirty? 2: 3): 0 */ +BEGIN_FTR_SECTION + rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ +END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) + mtspr SPRN_RPA,r1 + tlbli r3 + mfspr r3,SPRN_SRR1 /* Need to restore CR0 */ + mtcrf 0x80,r3 + rfi +InstructionAddressInvalid: + mfspr r3,SPRN_SRR1 + rlwinm r1,r3,9,6,6 /* Get load/store bit */ + + addis r1,r1,0x2000 + mtspr SPRN_DSISR,r1 /* (shouldn't be needed) */ + andi. r2,r3,0xFFFF /* Clear upper bits of SRR1 */ + or r2,r2,r1 + mtspr SPRN_SRR1,r2 + mfspr r1,SPRN_IMISS /* Get failing address */ + rlwinm. r2,r2,0,31,31 /* Check for little endian access */ + rlwimi r2,r2,1,30,30 /* change 1 -> 3 */ + xor r1,r1,r2 + mtspr SPRN_DAR,r1 /* Set fault address */ + mfmsr r0 /* Restore "normal" registers */ + xoris r0,r0,MSR_TGPR>>16 + mtcrf 0x80,r3 /* Restore CR0 */ + mtmsr r0 + b InstructionAccess + +/* + * Handle TLB miss for DATA Load operation on 603/603e + */ + . = 0x1100 +DataLoadTLBMiss: +/* + * r0: scratch + * r1: linux style pte ( later becomes ppc hardware pte ) + * r2: ptr to linux-style pte + * r3: scratch + */ + /* Get PTE (linux-style) and check access */ + mfspr r3,SPRN_DMISS + lis r1,PAGE_OFFSET@h /* check if kernel address */ + cmplw 0,r1,r3 + mfspr r2,SPRN_SPRG_THREAD + li r1,_PAGE_USER|_PAGE_PRESENT /* low addresses tested as user */ + lwz r2,PGDIR(r2) + bge- 112f + mfspr r2,SPRN_SRR1 /* and MSR_PR bit from SRR1 */ + rlwimi r1,r2,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */ + lis r2,swapper_pg_dir@ha /* if kernel address, use */ + addi r2,r2,swapper_pg_dir@l /* kernel page table */ +112: tophys(r2,r2) + rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r2) /* get pmd entry */ + rlwinm. r2,r2,0,0,19 /* extract address of pte page */ + beq- DataAddressInvalid /* return if no mapping */ + rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ + lwz r0,0(r2) /* get linux-style pte */ + andc. r1,r1,r0 /* check access & ~permission */ + bne- DataAddressInvalid /* return if access not permitted */ + ori r0,r0,_PAGE_ACCESSED /* set _PAGE_ACCESSED in pte */ + /* + * NOTE! We are assuming this is not an SMP system, otherwise + * we would need to update the pte atomically with lwarx/stwcx. + */ + stw r0,0(r2) /* update PTE (accessed bit) */ + /* Convert linux-style PTE to low word of PPC-style PTE */ + rlwinm r1,r0,32-10,31,31 /* _PAGE_RW -> PP lsb */ + rlwinm r2,r0,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */ + and r1,r1,r2 /* writable if _RW and _DIRTY */ + rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ + rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */ + ori r1,r1,0xe04 /* clear out reserved bits */ + andc r1,r0,r1 /* PP = user? (rw&dirty? 2: 3): 0 */ +BEGIN_FTR_SECTION + rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ +END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) + mtspr SPRN_RPA,r1 + mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ + mtcrf 0x80,r2 +BEGIN_MMU_FTR_SECTION + li r0,1 + mfspr r1,SPRN_SPRG_603_LRU + rlwinm r2,r3,20,27,31 /* Get Address bits 15:19 */ + slw r0,r0,r2 + xor r1,r0,r1 + srw r0,r1,r2 + mtspr SPRN_SPRG_603_LRU,r1 + mfspr r2,SPRN_SRR1 + rlwimi r2,r0,31-14,14,14 + mtspr SPRN_SRR1,r2 +END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) + tlbld r3 + rfi +DataAddressInvalid: + mfspr r3,SPRN_SRR1 + rlwinm r1,r3,9,6,6 /* Get load/store bit */ + addis r1,r1,0x2000 + mtspr SPRN_DSISR,r1 + andi. r2,r3,0xFFFF /* Clear upper bits of SRR1 */ + mtspr SPRN_SRR1,r2 + mfspr r1,SPRN_DMISS /* Get failing address */ + rlwinm. r2,r2,0,31,31 /* Check for little endian access */ + beq 20f /* Jump if big endian */ + xori r1,r1,3 +20: mtspr SPRN_DAR,r1 /* Set fault address */ + mfmsr r0 /* Restore "normal" registers */ + xoris r0,r0,MSR_TGPR>>16 + mtcrf 0x80,r3 /* Restore CR0 */ + mtmsr r0 + b DataAccess + +/* + * Handle TLB miss for DATA Store on 603/603e + */ + . = 0x1200 +DataStoreTLBMiss: +/* + * r0: scratch + * r1: linux style pte ( later becomes ppc hardware pte ) + * r2: ptr to linux-style pte + * r3: scratch + */ + /* Get PTE (linux-style) and check access */ + mfspr r3,SPRN_DMISS + lis r1,PAGE_OFFSET@h /* check if kernel address */ + cmplw 0,r1,r3 + mfspr r2,SPRN_SPRG_THREAD + li r1,_PAGE_RW|_PAGE_USER|_PAGE_PRESENT /* access flags */ + lwz r2,PGDIR(r2) + bge- 112f + mfspr r2,SPRN_SRR1 /* and MSR_PR bit from SRR1 */ + rlwimi r1,r2,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */ + lis r2,swapper_pg_dir@ha /* if kernel address, use */ + addi r2,r2,swapper_pg_dir@l /* kernel page table */ +112: tophys(r2,r2) + rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r2) /* get pmd entry */ + rlwinm. r2,r2,0,0,19 /* extract address of pte page */ + beq- DataAddressInvalid /* return if no mapping */ + rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ + lwz r0,0(r2) /* get linux-style pte */ + andc. r1,r1,r0 /* check access & ~permission */ + bne- DataAddressInvalid /* return if access not permitted */ + ori r0,r0,_PAGE_ACCESSED|_PAGE_DIRTY + /* + * NOTE! We are assuming this is not an SMP system, otherwise + * we would need to update the pte atomically with lwarx/stwcx. + */ + stw r0,0(r2) /* update PTE (accessed/dirty bits) */ + /* Convert linux-style PTE to low word of PPC-style PTE */ + rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ + li r1,0xe05 /* clear out reserved bits & PP lsb */ + andc r1,r0,r1 /* PP = user? 2: 0 */ +BEGIN_FTR_SECTION + rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ +END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) + mtspr SPRN_RPA,r1 + mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ + mtcrf 0x80,r2 +BEGIN_MMU_FTR_SECTION + li r0,1 + mfspr r1,SPRN_SPRG_603_LRU + rlwinm r2,r3,20,27,31 /* Get Address bits 15:19 */ + slw r0,r0,r2 + xor r1,r0,r1 + srw r0,r1,r2 + mtspr SPRN_SPRG_603_LRU,r1 + mfspr r2,SPRN_SRR1 + rlwimi r2,r0,31-14,14,14 + mtspr SPRN_SRR1,r2 +END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) + tlbld r3 + rfi + +#ifndef CONFIG_ALTIVEC +#define altivec_assist_exception unknown_exception +#endif + + EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_EE) + EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_EE) + EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_EE) + EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD) + EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_EE) + EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2f00, MOLTrampoline, unknown_exception, EXC_XFER_EE_LITE) + + .globl mol_trampoline + .set mol_trampoline, i0x2f00 + + . = 0x3000 + +AltiVecUnavailable: + EXCEPTION_PROLOG +#ifdef CONFIG_ALTIVEC + beq 1f + bl load_up_altivec /* if from user, just load it up */ + b fast_exception_return +#endif /* CONFIG_ALTIVEC */ +1: addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_EE_LITE(0xf20, altivec_unavailable_exception) + +PerformanceMonitor: + EXCEPTION_PROLOG + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_STD(0xf00, performance_monitor_exception) + + +/* + * This code is jumped to from the startup code to copy + * the kernel image to physical address PHYSICAL_START. + */ +relocate_kernel: + addis r9,r26,klimit@ha /* fetch klimit */ + lwz r25,klimit@l(r9) + addis r25,r25,-KERNELBASE@h + lis r3,PHYSICAL_START@h /* Destination base address */ + li r6,0 /* Destination offset */ + li r5,0x4000 /* # bytes of memory to copy */ + bl copy_and_flush /* copy the first 0x4000 bytes */ + addi r0,r3,4f@l /* jump to the address of 4f */ + mtctr r0 /* in copy and do the rest. */ + bctr /* jump to the copy */ +4: mr r5,r25 + bl copy_and_flush /* copy the rest */ + b turn_on_mmu + +/* + * Copy routine used to copy the kernel to start at physical address 0 + * and flush and invalidate the caches as needed. + * r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset + * on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5. + */ +_ENTRY(copy_and_flush) + addi r5,r5,-4 + addi r6,r6,-4 +4: li r0,L1_CACHE_BYTES/4 + mtctr r0 +3: addi r6,r6,4 /* copy a cache line */ + lwzx r0,r6,r4 + stwx r0,r6,r3 + bdnz 3b + dcbst r6,r3 /* write it to memory */ + sync + icbi r6,r3 /* flush the icache line */ + cmplw 0,r6,r5 + blt 4b + sync /* additional sync needed on g4 */ + isync + addi r5,r5,4 + addi r6,r6,4 + blr + +#ifdef CONFIG_SMP + .globl __secondary_start_mpc86xx +__secondary_start_mpc86xx: + mfspr r3, SPRN_PIR + stw r3, __secondary_hold_acknowledge@l(0) + mr r24, r3 /* cpu # */ + b __secondary_start + + .globl __secondary_start_pmac_0 +__secondary_start_pmac_0: + /* NB the entries for cpus 0, 1, 2 must each occupy 8 bytes. */ + li r24,0 + b 1f + li r24,1 + b 1f + li r24,2 + b 1f + li r24,3 +1: + /* on powersurge, we come in here with IR=0 and DR=1, and DBAT 0 + set to map the 0xf0000000 - 0xffffffff region */ + mfmsr r0 + rlwinm r0,r0,0,28,26 /* clear DR (0x10) */ + SYNC + mtmsr r0 + isync + + .globl __secondary_start +__secondary_start: + /* Copy some CPU settings from CPU 0 */ + bl __restore_cpu_setup + + lis r3,-KERNELBASE@h + mr r4,r24 + bl call_setup_cpu /* Call setup_cpu for this CPU */ +#ifdef CONFIG_6xx + lis r3,-KERNELBASE@h + bl init_idle_6xx +#endif /* CONFIG_6xx */ + + /* get current_thread_info and current */ + lis r1,secondary_ti@ha + tophys(r1,r1) + lwz r1,secondary_ti@l(r1) + tophys(r2,r1) + lwz r2,TI_TASK(r2) + + /* stack */ + addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + li r0,0 + tophys(r3,r1) + stw r0,0(r3) + + /* load up the MMU */ + bl load_up_mmu + + /* ptr to phys current thread */ + tophys(r4,r2) + addi r4,r4,THREAD /* phys address of our thread_struct */ + CLR_TOP32(r4) + mtspr SPRN_SPRG_THREAD,r4 + li r3,0 + mtspr SPRN_SPRG_RTAS,r3 /* 0 => not in RTAS */ + + /* enable MMU and jump to start_secondary */ + li r4,MSR_KERNEL + FIX_SRR1(r4,r5) + lis r3,start_secondary@h + ori r3,r3,start_secondary@l + mtspr SPRN_SRR0,r3 + mtspr SPRN_SRR1,r4 + SYNC + RFI +#endif /* CONFIG_SMP */ + +#ifdef CONFIG_KVM_BOOK3S_HANDLER +#include "../kvm/book3s_rmhandlers.S" +#endif + +/* + * Those generic dummy functions are kept for CPUs not + * included in CONFIG_6xx + */ +#if !defined(CONFIG_6xx) +_ENTRY(__save_cpu_setup) + blr +_ENTRY(__restore_cpu_setup) + blr +#endif /* !defined(CONFIG_6xx) */ + + +/* + * Load stuff into the MMU. Intended to be called with + * IR=0 and DR=0. + */ +load_up_mmu: + sync /* Force all PTE updates to finish */ + isync + tlbia /* Clear all TLB entries */ + sync /* wait for tlbia/tlbie to finish */ + TLBSYNC /* ... on all CPUs */ + /* Load the SDR1 register (hash table base & size) */ + lis r6,_SDR1@ha + tophys(r6,r6) + lwz r6,_SDR1@l(r6) + mtspr SPRN_SDR1,r6 + li r0,16 /* load up segment register values */ + mtctr r0 /* for context 0 */ + lis r3,0x2000 /* Ku = 1, VSID = 0 */ + li r4,0 +3: mtsrin r3,r4 + addi r3,r3,0x111 /* increment VSID */ + addis r4,r4,0x1000 /* address of next segment */ + bdnz 3b + +/* Load the BAT registers with the values set up by MMU_init. + MMU_init takes care of whether we're on a 601 or not. */ + mfpvr r3 + srwi r3,r3,16 + cmpwi r3,1 + lis r3,BATS@ha + addi r3,r3,BATS@l + tophys(r3,r3) + LOAD_BAT(0,r3,r4,r5) + LOAD_BAT(1,r3,r4,r5) + LOAD_BAT(2,r3,r4,r5) + LOAD_BAT(3,r3,r4,r5) +BEGIN_MMU_FTR_SECTION + LOAD_BAT(4,r3,r4,r5) + LOAD_BAT(5,r3,r4,r5) + LOAD_BAT(6,r3,r4,r5) + LOAD_BAT(7,r3,r4,r5) +END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) + blr + +/* + * This is where the main kernel code starts. + */ +start_here: + /* ptr to current */ + lis r2,init_task@h + ori r2,r2,init_task@l + /* Set up for using our exception vectors */ + /* ptr to phys current thread */ + tophys(r4,r2) + addi r4,r4,THREAD /* init task's THREAD */ + CLR_TOP32(r4) + mtspr SPRN_SPRG_THREAD,r4 + li r3,0 + mtspr SPRN_SPRG_RTAS,r3 /* 0 => not in RTAS */ + + /* stack */ + lis r1,init_thread_union@ha + addi r1,r1,init_thread_union@l + li r0,0 + stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) +/* + * Do early platform-specific initialization, + * and set up the MMU. + */ + li r3,0 + mr r4,r31 + bl machine_init + bl __save_cpu_setup + bl MMU_init + +/* + * Go back to running unmapped so we can load up new values + * for SDR1 (hash table pointer) and the segment registers + * and change to using our exception vectors. + */ + lis r4,2f@h + ori r4,r4,2f@l + tophys(r4,r4) + li r3,MSR_KERNEL & ~(MSR_IR|MSR_DR) + FIX_SRR1(r3,r5) + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r3 + SYNC + RFI +/* Load up the kernel context */ +2: bl load_up_mmu + +#ifdef CONFIG_BDI_SWITCH + /* Add helper information for the Abatron bdiGDB debugger. + * We do this here because we know the mmu is disabled, and + * will be enabled for real in just a few instructions. + */ + lis r5, abatron_pteptrs@h + ori r5, r5, abatron_pteptrs@l + stw r5, 0xf0(r0) /* This much match your Abatron config */ + lis r6, swapper_pg_dir@h + ori r6, r6, swapper_pg_dir@l + tophys(r5, r5) + stw r6, 0(r5) +#endif /* CONFIG_BDI_SWITCH */ + +/* Now turn on the MMU for real! */ + li r4,MSR_KERNEL + FIX_SRR1(r4,r5) + lis r3,start_kernel@h + ori r3,r3,start_kernel@l + mtspr SPRN_SRR0,r3 + mtspr SPRN_SRR1,r4 + SYNC + RFI + +/* + * void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next); + * + * Set up the segment registers for a new context. + */ +_ENTRY(switch_mmu_context) + lwz r3,MMCONTEXTID(r4) + cmpwi cr0,r3,0 + blt- 4f + mulli r3,r3,897 /* multiply context by skew factor */ + rlwinm r3,r3,4,8,27 /* VSID = (context & 0xfffff) << 4 */ + addis r3,r3,0x6000 /* Set Ks, Ku bits */ + li r0,NUM_USER_SEGMENTS + mtctr r0 + +#ifdef CONFIG_BDI_SWITCH + /* Context switch the PTE pointer for the Abatron BDI2000. + * The PGDIR is passed as second argument. + */ + lwz r4,MM_PGD(r4) + lis r5, KERNELBASE@h + lwz r5, 0xf0(r5) + stw r4, 0x4(r5) +#endif + li r4,0 + isync +3: + mtsrin r3,r4 + addi r3,r3,0x111 /* next VSID */ + rlwinm r3,r3,0,8,3 /* clear out any overflow from VSID field */ + addis r4,r4,0x1000 /* address of next segment */ + bdnz 3b + sync + isync + blr +4: trap + EMIT_BUG_ENTRY 4b,__FILE__,__LINE__,0 + blr + +/* + * An undocumented "feature" of 604e requires that the v bit + * be cleared before changing BAT values. + * + * Also, newer IBM firmware does not clear bat3 and 4 so + * this makes sure it's done. + * -- Cort + */ +clear_bats: + li r10,0 + mfspr r9,SPRN_PVR + rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */ + cmpwi r9, 1 + beq 1f + + mtspr SPRN_DBAT0U,r10 + mtspr SPRN_DBAT0L,r10 + mtspr SPRN_DBAT1U,r10 + mtspr SPRN_DBAT1L,r10 + mtspr SPRN_DBAT2U,r10 + mtspr SPRN_DBAT2L,r10 + mtspr SPRN_DBAT3U,r10 + mtspr SPRN_DBAT3L,r10 +1: + mtspr SPRN_IBAT0U,r10 + mtspr SPRN_IBAT0L,r10 + mtspr SPRN_IBAT1U,r10 + mtspr SPRN_IBAT1L,r10 + mtspr SPRN_IBAT2U,r10 + mtspr SPRN_IBAT2L,r10 + mtspr SPRN_IBAT3U,r10 + mtspr SPRN_IBAT3L,r10 +BEGIN_MMU_FTR_SECTION + /* Here's a tweak: at this point, CPU setup have + * not been called yet, so HIGH_BAT_EN may not be + * set in HID0 for the 745x processors. However, it + * seems that doesn't affect our ability to actually + * write to these SPRs. + */ + mtspr SPRN_DBAT4U,r10 + mtspr SPRN_DBAT4L,r10 + mtspr SPRN_DBAT5U,r10 + mtspr SPRN_DBAT5L,r10 + mtspr SPRN_DBAT6U,r10 + mtspr SPRN_DBAT6L,r10 + mtspr SPRN_DBAT7U,r10 + mtspr SPRN_DBAT7L,r10 + mtspr SPRN_IBAT4U,r10 + mtspr SPRN_IBAT4L,r10 + mtspr SPRN_IBAT5U,r10 + mtspr SPRN_IBAT5L,r10 + mtspr SPRN_IBAT6U,r10 + mtspr SPRN_IBAT6L,r10 + mtspr SPRN_IBAT7U,r10 + mtspr SPRN_IBAT7L,r10 +END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) + blr + +flush_tlbs: + lis r10, 0x40 +1: addic. r10, r10, -0x1000 + tlbie r10 + bgt 1b + sync + blr + +mmu_off: + addi r4, r3, __after_mmu_off - _start + mfmsr r3 + andi. r0,r3,MSR_DR|MSR_IR /* MMU enabled? */ + beqlr + andc r3,r3,r0 + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r3 + sync + RFI + +/* + * On 601, we use 3 BATs to map up to 24M of RAM at _PAGE_OFFSET + * (we keep one for debugging) and on others, we use one 256M BAT. + */ +initial_bats: + lis r11,PAGE_OFFSET@h + mfspr r9,SPRN_PVR + rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */ + cmpwi 0,r9,1 + bne 4f + ori r11,r11,4 /* set up BAT registers for 601 */ + li r8,0x7f /* valid, block length = 8MB */ + mtspr SPRN_IBAT0U,r11 /* N.B. 601 has valid bit in */ + mtspr SPRN_IBAT0L,r8 /* lower BAT register */ + addis r11,r11,0x800000@h + addis r8,r8,0x800000@h + mtspr SPRN_IBAT1U,r11 + mtspr SPRN_IBAT1L,r8 + addis r11,r11,0x800000@h + addis r8,r8,0x800000@h + mtspr SPRN_IBAT2U,r11 + mtspr SPRN_IBAT2L,r8 + isync + blr + +4: tophys(r8,r11) +#ifdef CONFIG_SMP + ori r8,r8,0x12 /* R/W access, M=1 */ +#else + ori r8,r8,2 /* R/W access */ +#endif /* CONFIG_SMP */ + ori r11,r11,BL_256M<<2|0x2 /* set up BAT registers for 604 */ + + mtspr SPRN_DBAT0L,r8 /* N.B. 6xx (not 601) have valid */ + mtspr SPRN_DBAT0U,r11 /* bit in upper BAT register */ + mtspr SPRN_IBAT0L,r8 + mtspr SPRN_IBAT0U,r11 + isync + blr + + +#ifdef CONFIG_BOOTX_TEXT +setup_disp_bat: + /* + * setup the display bat prepared for us in prom.c + */ + mflr r8 + bl reloc_offset + mtlr r8 + addis r8,r3,disp_BAT@ha + addi r8,r8,disp_BAT@l + cmpwi cr0,r8,0 + beqlr + lwz r11,0(r8) + lwz r8,4(r8) + mfspr r9,SPRN_PVR + rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */ + cmpwi 0,r9,1 + beq 1f + mtspr SPRN_DBAT3L,r8 + mtspr SPRN_DBAT3U,r11 + blr +1: mtspr SPRN_IBAT3L,r8 + mtspr SPRN_IBAT3U,r11 + blr +#endif /* CONFIG_BOOTX_TEXT */ + +#ifdef CONFIG_PPC_EARLY_DEBUG_CPM +setup_cpm_bat: + lis r8, 0xf000 + ori r8, r8, 0x002a + mtspr SPRN_DBAT1L, r8 + + lis r11, 0xf000 + ori r11, r11, (BL_1M << 2) | 2 + mtspr SPRN_DBAT1U, r11 + + blr +#endif + +#ifdef CONFIG_PPC_EARLY_DEBUG_USBGECKO +setup_usbgecko_bat: + /* prepare a BAT for early io */ +#if defined(CONFIG_GAMECUBE) + lis r8, 0x0c00 +#elif defined(CONFIG_WII) + lis r8, 0x0d00 +#else +#error Invalid platform for USB Gecko based early debugging. +#endif + /* + * The virtual address used must match the virtual address + * associated to the fixmap entry FIX_EARLY_DEBUG_BASE. + */ + lis r11, 0xfffe /* top 128K */ + ori r8, r8, 0x002a /* uncached, guarded ,rw */ + ori r11, r11, 0x2 /* 128K, Vs=1, Vp=0 */ + mtspr SPRN_DBAT1L, r8 + mtspr SPRN_DBAT1U, r11 + blr +#endif + +#ifdef CONFIG_8260 +/* Jump into the system reset for the rom. + * We first disable the MMU, and then jump to the ROM reset address. + * + * r3 is the board info structure, r4 is the location for starting. + * I use this for building a small kernel that can load other kernels, + * rather than trying to write or rely on a rom monitor that can tftp load. + */ + .globl m8260_gorom +m8260_gorom: + mfmsr r0 + rlwinm r0,r0,0,17,15 /* clear MSR_EE in r0 */ + sync + mtmsr r0 + sync + mfspr r11, SPRN_HID0 + lis r10, 0 + ori r10,r10,HID0_ICE|HID0_DCE + andc r11, r11, r10 + mtspr SPRN_HID0, r11 + isync + li r5, MSR_ME|MSR_RI + lis r6,2f@h + addis r6,r6,-KERNELBASE@h + ori r6,r6,2f@l + mtspr SPRN_SRR0,r6 + mtspr SPRN_SRR1,r5 + isync + sync + rfi +2: + mtlr r4 + blr +#endif + + +/* + * We put a few things here that have to be page-aligned. + * This stuff goes at the beginning of the data segment, + * which is page-aligned. + */ + .data + .globl sdata +sdata: + .globl empty_zero_page +empty_zero_page: + .space 4096 + + .globl swapper_pg_dir +swapper_pg_dir: + .space PGD_TABLE_SIZE + + .globl intercept_table +intercept_table: + .long 0, 0, i0x200, i0x300, i0x400, 0, i0x600, i0x700 + .long i0x800, 0, 0, 0, 0, i0xd00, 0, 0 + .long 0, 0, 0, i0x1300, 0, 0, 0, 0 + .long 0, 0, 0, 0, 0, 0, 0, 0 + .long 0, 0, 0, 0, 0, 0, 0, 0 + .long 0, 0, 0, 0, 0, 0, 0, 0 + +/* Room for two PTE pointers, usually the kernel and current user pointers + * to their respective root page table. + */ +abatron_pteptrs: + .space 8 diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S new file mode 100644 index 00000000..4989661b --- /dev/null +++ b/arch/powerpc/kernel/head_40x.S @@ -0,0 +1,998 @@ +/* + * Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org> + * Initial PowerPC version. + * Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu> + * Rewritten for PReP + * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au> + * Low-level exception handers, MMU support, and rewrite. + * Copyright (c) 1997 Dan Malek <dmalek@jlc.net> + * PowerPC 8xx modifications. + * Copyright (c) 1998-1999 TiVo, Inc. + * PowerPC 403GCX modifications. + * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu> + * PowerPC 403GCX/405GP modifications. + * Copyright 2000 MontaVista Software Inc. + * PPC405 modifications + * PowerPC 403GCX/405GP modifications. + * Author: MontaVista Software, Inc. + * frank_rowand@mvista.com or source@mvista.com + * debbie_chu@mvista.com + * + * + * Module name: head_4xx.S + * + * Description: + * Kernel execution entry point code. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/init.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/ptrace.h> + +/* As with the other PowerPC ports, it is expected that when code + * execution begins here, the following registers contain valid, yet + * optional, information: + * + * r3 - Board info structure pointer (DRAM, frequency, MAC address, etc.) + * r4 - Starting address of the init RAM disk + * r5 - Ending address of the init RAM disk + * r6 - Start of kernel command line string (e.g. "mem=96m") + * r7 - End of kernel command line string + * + * This is all going to change RSN when we add bi_recs....... -- Dan + */ + __HEAD +_ENTRY(_stext); +_ENTRY(_start); + + mr r31,r3 /* save device tree ptr */ + + /* We have to turn on the MMU right away so we get cache modes + * set correctly. + */ + bl initial_mmu + +/* We now have the lower 16 Meg mapped into TLB entries, and the caches + * ready to work. + */ +turn_on_mmu: + lis r0,MSR_KERNEL@h + ori r0,r0,MSR_KERNEL@l + mtspr SPRN_SRR1,r0 + lis r0,start_here@h + ori r0,r0,start_here@l + mtspr SPRN_SRR0,r0 + SYNC + rfi /* enables MMU */ + b . /* prevent prefetch past rfi */ + +/* + * This area is used for temporarily saving registers during the + * critical exception prolog. + */ + . = 0xc0 +crit_save: +_ENTRY(crit_r10) + .space 4 +_ENTRY(crit_r11) + .space 4 +_ENTRY(crit_srr0) + .space 4 +_ENTRY(crit_srr1) + .space 4 +_ENTRY(saved_ksp_limit) + .space 4 + +/* + * Exception vector entry code. This code runs with address translation + * turned off (i.e. using physical addresses). We assume SPRG_THREAD has + * the physical address of the current task thread_struct. + * Note that we have to have decremented r1 before we write to any fields + * of the exception frame, since a critical interrupt could occur at any + * time, and it will write to the area immediately below the current r1. + */ +#define NORMAL_EXCEPTION_PROLOG \ + mtspr SPRN_SPRG_SCRATCH0,r10; /* save two registers to work with */\ + mtspr SPRN_SPRG_SCRATCH1,r11; \ + mtspr SPRN_SPRG_SCRATCH2,r1; \ + mfcr r10; /* save CR in r10 for now */\ + mfspr r11,SPRN_SRR1; /* check whether user or kernel */\ + andi. r11,r11,MSR_PR; \ + beq 1f; \ + mfspr r1,SPRN_SPRG_THREAD; /* if from user, start at top of */\ + lwz r1,THREAD_INFO-THREAD(r1); /* this thread's kernel stack */\ + addi r1,r1,THREAD_SIZE; \ +1: subi r1,r1,INT_FRAME_SIZE; /* Allocate an exception frame */\ + tophys(r11,r1); \ + stw r10,_CCR(r11); /* save various registers */\ + stw r12,GPR12(r11); \ + stw r9,GPR9(r11); \ + mfspr r10,SPRN_SPRG_SCRATCH0; \ + stw r10,GPR10(r11); \ + mfspr r12,SPRN_SPRG_SCRATCH1; \ + stw r12,GPR11(r11); \ + mflr r10; \ + stw r10,_LINK(r11); \ + mfspr r10,SPRN_SPRG_SCRATCH2; \ + mfspr r12,SPRN_SRR0; \ + stw r10,GPR1(r11); \ + mfspr r9,SPRN_SRR1; \ + stw r10,0(r11); \ + rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ + stw r0,GPR0(r11); \ + SAVE_4GPRS(3, r11); \ + SAVE_2GPRS(7, r11) + +/* + * Exception prolog for critical exceptions. This is a little different + * from the normal exception prolog above since a critical exception + * can potentially occur at any point during normal exception processing. + * Thus we cannot use the same SPRG registers as the normal prolog above. + * Instead we use a couple of words of memory at low physical addresses. + * This is OK since we don't support SMP on these processors. + */ +#define CRITICAL_EXCEPTION_PROLOG \ + stw r10,crit_r10@l(0); /* save two registers to work with */\ + stw r11,crit_r11@l(0); \ + mfcr r10; /* save CR in r10 for now */\ + mfspr r11,SPRN_SRR3; /* check whether user or kernel */\ + andi. r11,r11,MSR_PR; \ + lis r11,critirq_ctx@ha; \ + tophys(r11,r11); \ + lwz r11,critirq_ctx@l(r11); \ + beq 1f; \ + /* COMING FROM USER MODE */ \ + mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\ + lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\ +1: addi r11,r11,THREAD_SIZE-INT_FRAME_SIZE; /* Alloc an excpt frm */\ + tophys(r11,r11); \ + stw r10,_CCR(r11); /* save various registers */\ + stw r12,GPR12(r11); \ + stw r9,GPR9(r11); \ + mflr r10; \ + stw r10,_LINK(r11); \ + mfspr r12,SPRN_DEAR; /* save DEAR and ESR in the frame */\ + stw r12,_DEAR(r11); /* since they may have had stuff */\ + mfspr r9,SPRN_ESR; /* in them at the point where the */\ + stw r9,_ESR(r11); /* exception was taken */\ + mfspr r12,SPRN_SRR2; \ + stw r1,GPR1(r11); \ + mfspr r9,SPRN_SRR3; \ + stw r1,0(r11); \ + tovirt(r1,r11); \ + rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ + stw r0,GPR0(r11); \ + SAVE_4GPRS(3, r11); \ + SAVE_2GPRS(7, r11) + + /* + * State at this point: + * r9 saved in stack frame, now saved SRR3 & ~MSR_WE + * r10 saved in crit_r10 and in stack frame, trashed + * r11 saved in crit_r11 and in stack frame, + * now phys stack/exception frame pointer + * r12 saved in stack frame, now saved SRR2 + * CR saved in stack frame, CR0.EQ = !SRR3.PR + * LR, DEAR, ESR in stack frame + * r1 saved in stack frame, now virt stack/excframe pointer + * r0, r3-r8 saved in stack frame + */ + +/* + * Exception vectors. + */ +#define START_EXCEPTION(n, label) \ + . = n; \ +label: + +#define EXCEPTION(n, label, hdlr, xfer) \ + START_EXCEPTION(n, label); \ + NORMAL_EXCEPTION_PROLOG; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + xfer(n, hdlr) + +#define CRITICAL_EXCEPTION(n, label, hdlr) \ + START_EXCEPTION(n, label); \ + CRITICAL_EXCEPTION_PROLOG; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ + NOCOPY, crit_transfer_to_handler, \ + ret_from_crit_exc) + +#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret) \ + li r10,trap; \ + stw r10,_TRAP(r11); \ + lis r10,msr@h; \ + ori r10,r10,msr@l; \ + copyee(r10, r9); \ + bl tfer; \ + .long hdlr; \ + .long ret + +#define COPY_EE(d, s) rlwimi d,s,0,16,16 +#define NOCOPY(d, s) + +#define EXC_XFER_STD(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \ + ret_from_except_full) + +#define EXC_XFER_LITE(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \ + ret_from_except) + +#define EXC_XFER_EE(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \ + ret_from_except_full) + +#define EXC_XFER_EE_LITE(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \ + ret_from_except) + + +/* + * 0x0100 - Critical Interrupt Exception + */ + CRITICAL_EXCEPTION(0x0100, CriticalInterrupt, unknown_exception) + +/* + * 0x0200 - Machine Check Exception + */ + CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception) + +/* + * 0x0300 - Data Storage Exception + * This happens for just a few reasons. U0 set (but we don't do that), + * or zone protection fault (user violation, write to protected page). + * If this is just an update of modified status, we do that quickly + * and exit. Otherwise, we call heavywight functions to do the work. + */ + START_EXCEPTION(0x0300, DataStorage) + mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */ + mtspr SPRN_SPRG_SCRATCH1, r11 +#ifdef CONFIG_403GCX + stw r12, 0(r0) + stw r9, 4(r0) + mfcr r11 + mfspr r12, SPRN_PID + stw r11, 8(r0) + stw r12, 12(r0) +#else + mtspr SPRN_SPRG_SCRATCH3, r12 + mtspr SPRN_SPRG_SCRATCH4, r9 + mfcr r11 + mfspr r12, SPRN_PID + mtspr SPRN_SPRG_SCRATCH6, r11 + mtspr SPRN_SPRG_SCRATCH5, r12 +#endif + + /* First, check if it was a zone fault (which means a user + * tried to access a kernel or read-protected page - always + * a SEGV). All other faults here must be stores, so no + * need to check ESR_DST as well. */ + mfspr r10, SPRN_ESR + andis. r10, r10, ESR_DIZ@h + bne 2f + + mfspr r10, SPRN_DEAR /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11, PAGE_OFFSET@h + cmplw r10, r11 + blt+ 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + li r9, 0 + mtspr SPRN_PID, r9 /* TLB will have 0 TID */ + b 4f + + /* Get the PGD for the current thread. + */ +3: + mfspr r11,SPRN_SPRG_THREAD + lwz r11,PGDIR(r11) +4: + tophys(r11, r11) + rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ + lwz r11, 0(r11) /* Get L1 entry */ + rlwinm. r12, r11, 0, 0, 19 /* Extract L2 (pte) base address */ + beq 2f /* Bail if no table */ + + rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */ + lwz r11, 0(r12) /* Get Linux PTE */ + + andi. r9, r11, _PAGE_RW /* Is it writeable? */ + beq 2f /* Bail if not */ + + /* Update 'changed'. + */ + ori r11, r11, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE + stw r11, 0(r12) /* Update Linux page table */ + + /* Most of the Linux PTE is ready to load into the TLB LO. + * We set ZSEL, where only the LS-bit determines user access. + * We set execute, because we don't have the granularity to + * properly set this at the page level (Linux problem). + * If shared is set, we cause a zero PID->TID load. + * Many of these bits are software only. Bits we don't set + * here we (properly should) assume have the appropriate value. + */ + li r12, 0x0ce2 + andc r11, r11, r12 /* Make sure 20, 21 are zero */ + + /* find the TLB index that caused the fault. It has to be here. + */ + tlbsx r9, 0, r10 + + tlbwe r11, r9, TLB_DATA /* Load TLB LO */ + + /* Done...restore registers and get out of here. + */ +#ifdef CONFIG_403GCX + lwz r12, 12(r0) + lwz r11, 8(r0) + mtspr SPRN_PID, r12 + mtcr r11 + lwz r9, 4(r0) + lwz r12, 0(r0) +#else + mfspr r12, SPRN_SPRG_SCRATCH5 + mfspr r11, SPRN_SPRG_SCRATCH6 + mtspr SPRN_PID, r12 + mtcr r11 + mfspr r9, SPRN_SPRG_SCRATCH4 + mfspr r12, SPRN_SPRG_SCRATCH3 +#endif + mfspr r11, SPRN_SPRG_SCRATCH1 + mfspr r10, SPRN_SPRG_SCRATCH0 + PPC405_ERR77_SYNC + rfi /* Should sync shadow TLBs */ + b . /* prevent prefetch past rfi */ + +2: + /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ +#ifdef CONFIG_403GCX + lwz r12, 12(r0) + lwz r11, 8(r0) + mtspr SPRN_PID, r12 + mtcr r11 + lwz r9, 4(r0) + lwz r12, 0(r0) +#else + mfspr r12, SPRN_SPRG_SCRATCH5 + mfspr r11, SPRN_SPRG_SCRATCH6 + mtspr SPRN_PID, r12 + mtcr r11 + mfspr r9, SPRN_SPRG_SCRATCH4 + mfspr r12, SPRN_SPRG_SCRATCH3 +#endif + mfspr r11, SPRN_SPRG_SCRATCH1 + mfspr r10, SPRN_SPRG_SCRATCH0 + b DataAccess + +/* + * 0x0400 - Instruction Storage Exception + * This is caused by a fetch from non-execute or guarded pages. + */ + START_EXCEPTION(0x0400, InstructionAccess) + NORMAL_EXCEPTION_PROLOG + mr r4,r12 /* Pass SRR0 as arg2 */ + li r5,0 /* Pass zero as arg3 */ + EXC_XFER_LITE(0x400, handle_page_fault) + +/* 0x0500 - External Interrupt Exception */ + EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) + +/* 0x0600 - Alignment Exception */ + START_EXCEPTION(0x0600, Alignment) + NORMAL_EXCEPTION_PROLOG + mfspr r4,SPRN_DEAR /* Grab the DEAR and save it */ + stw r4,_DEAR(r11) + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_EE(0x600, alignment_exception) + +/* 0x0700 - Program Exception */ + START_EXCEPTION(0x0700, ProgramCheck) + NORMAL_EXCEPTION_PROLOG + mfspr r4,SPRN_ESR /* Grab the ESR and save it */ + stw r4,_ESR(r11) + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_STD(0x700, program_check_exception) + + EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_EE) + +/* 0x0C00 - System Call Exception */ + START_EXCEPTION(0x0C00, SystemCall) + NORMAL_EXCEPTION_PROLOG + EXC_XFER_EE_LITE(0xc00, DoSyscall) + + EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_EE) + +/* 0x1000 - Programmable Interval Timer (PIT) Exception */ + START_EXCEPTION(0x1000, Decrementer) + NORMAL_EXCEPTION_PROLOG + lis r0,TSR_PIS@h + mtspr SPRN_TSR,r0 /* Clear the PIT exception */ + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_LITE(0x1000, timer_interrupt) + +#if 0 +/* NOTE: + * FIT and WDT handlers are not implemented yet. + */ + +/* 0x1010 - Fixed Interval Timer (FIT) Exception +*/ + STND_EXCEPTION(0x1010, FITException, unknown_exception) + +/* 0x1020 - Watchdog Timer (WDT) Exception +*/ +#ifdef CONFIG_BOOKE_WDT + CRITICAL_EXCEPTION(0x1020, WDTException, WatchdogException) +#else + CRITICAL_EXCEPTION(0x1020, WDTException, unknown_exception) +#endif +#endif + +/* 0x1100 - Data TLB Miss Exception + * As the name implies, translation is not in the MMU, so search the + * page tables and fix it. The only purpose of this function is to + * load TLB entries from the page table if they exist. + */ + START_EXCEPTION(0x1100, DTLBMiss) + mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */ + mtspr SPRN_SPRG_SCRATCH1, r11 +#ifdef CONFIG_403GCX + stw r12, 0(r0) + stw r9, 4(r0) + mfcr r11 + mfspr r12, SPRN_PID + stw r11, 8(r0) + stw r12, 12(r0) +#else + mtspr SPRN_SPRG_SCRATCH3, r12 + mtspr SPRN_SPRG_SCRATCH4, r9 + mfcr r11 + mfspr r12, SPRN_PID + mtspr SPRN_SPRG_SCRATCH6, r11 + mtspr SPRN_SPRG_SCRATCH5, r12 +#endif + mfspr r10, SPRN_DEAR /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11, PAGE_OFFSET@h + cmplw r10, r11 + blt+ 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + li r9, 0 + mtspr SPRN_PID, r9 /* TLB will have 0 TID */ + b 4f + + /* Get the PGD for the current thread. + */ +3: + mfspr r11,SPRN_SPRG_THREAD + lwz r11,PGDIR(r11) +4: + tophys(r11, r11) + rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ + lwz r12, 0(r11) /* Get L1 entry */ + andi. r9, r12, _PMD_PRESENT /* Check if it points to a PTE page */ + beq 2f /* Bail if no table */ + + rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */ + lwz r11, 0(r12) /* Get Linux PTE */ + andi. r9, r11, _PAGE_PRESENT + beq 5f + + ori r11, r11, _PAGE_ACCESSED + stw r11, 0(r12) + + /* Create TLB tag. This is the faulting address plus a static + * set of bits. These are size, valid, E, U0. + */ + li r12, 0x00c0 + rlwimi r10, r12, 0, 20, 31 + + b finish_tlb_load + +2: /* Check for possible large-page pmd entry */ + rlwinm. r9, r12, 2, 22, 24 + beq 5f + + /* Create TLB tag. This is the faulting address, plus a static + * set of bits (valid, E, U0) plus the size from the PMD. + */ + ori r9, r9, 0x40 + rlwimi r10, r9, 0, 20, 31 + mr r11, r12 + + b finish_tlb_load + +5: + /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ +#ifdef CONFIG_403GCX + lwz r12, 12(r0) + lwz r11, 8(r0) + mtspr SPRN_PID, r12 + mtcr r11 + lwz r9, 4(r0) + lwz r12, 0(r0) +#else + mfspr r12, SPRN_SPRG_SCRATCH5 + mfspr r11, SPRN_SPRG_SCRATCH6 + mtspr SPRN_PID, r12 + mtcr r11 + mfspr r9, SPRN_SPRG_SCRATCH4 + mfspr r12, SPRN_SPRG_SCRATCH3 +#endif + mfspr r11, SPRN_SPRG_SCRATCH1 + mfspr r10, SPRN_SPRG_SCRATCH0 + b DataAccess + +/* 0x1200 - Instruction TLB Miss Exception + * Nearly the same as above, except we get our information from different + * registers and bailout to a different point. + */ + START_EXCEPTION(0x1200, ITLBMiss) + mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */ + mtspr SPRN_SPRG_SCRATCH1, r11 +#ifdef CONFIG_403GCX + stw r12, 0(r0) + stw r9, 4(r0) + mfcr r11 + mfspr r12, SPRN_PID + stw r11, 8(r0) + stw r12, 12(r0) +#else + mtspr SPRN_SPRG_SCRATCH3, r12 + mtspr SPRN_SPRG_SCRATCH4, r9 + mfcr r11 + mfspr r12, SPRN_PID + mtspr SPRN_SPRG_SCRATCH6, r11 + mtspr SPRN_SPRG_SCRATCH5, r12 +#endif + mfspr r10, SPRN_SRR0 /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11, PAGE_OFFSET@h + cmplw r10, r11 + blt+ 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + li r9, 0 + mtspr SPRN_PID, r9 /* TLB will have 0 TID */ + b 4f + + /* Get the PGD for the current thread. + */ +3: + mfspr r11,SPRN_SPRG_THREAD + lwz r11,PGDIR(r11) +4: + tophys(r11, r11) + rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ + lwz r12, 0(r11) /* Get L1 entry */ + andi. r9, r12, _PMD_PRESENT /* Check if it points to a PTE page */ + beq 2f /* Bail if no table */ + + rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */ + lwz r11, 0(r12) /* Get Linux PTE */ + andi. r9, r11, _PAGE_PRESENT + beq 5f + + ori r11, r11, _PAGE_ACCESSED + stw r11, 0(r12) + + /* Create TLB tag. This is the faulting address plus a static + * set of bits. These are size, valid, E, U0. + */ + li r12, 0x00c0 + rlwimi r10, r12, 0, 20, 31 + + b finish_tlb_load + +2: /* Check for possible large-page pmd entry */ + rlwinm. r9, r12, 2, 22, 24 + beq 5f + + /* Create TLB tag. This is the faulting address, plus a static + * set of bits (valid, E, U0) plus the size from the PMD. + */ + ori r9, r9, 0x40 + rlwimi r10, r9, 0, 20, 31 + mr r11, r12 + + b finish_tlb_load + +5: + /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ +#ifdef CONFIG_403GCX + lwz r12, 12(r0) + lwz r11, 8(r0) + mtspr SPRN_PID, r12 + mtcr r11 + lwz r9, 4(r0) + lwz r12, 0(r0) +#else + mfspr r12, SPRN_SPRG_SCRATCH5 + mfspr r11, SPRN_SPRG_SCRATCH6 + mtspr SPRN_PID, r12 + mtcr r11 + mfspr r9, SPRN_SPRG_SCRATCH4 + mfspr r12, SPRN_SPRG_SCRATCH3 +#endif + mfspr r11, SPRN_SPRG_SCRATCH1 + mfspr r10, SPRN_SPRG_SCRATCH0 + b InstructionAccess + + EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE) +#ifdef CONFIG_IBM405_ERR51 + /* 405GP errata 51 */ + START_EXCEPTION(0x1700, Trap_17) + b DTLBMiss +#else + EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE) +#endif + EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_EE) + +/* Check for a single step debug exception while in an exception + * handler before state has been saved. This is to catch the case + * where an instruction that we are trying to single step causes + * an exception (eg ITLB/DTLB miss) and thus the first instruction of + * the exception handler generates a single step debug exception. + * + * If we get a debug trap on the first instruction of an exception handler, + * we reset the MSR_DE in the _exception handler's_ MSR (the debug trap is + * a critical exception, so we are using SPRN_CSRR1 to manipulate the MSR). + * The exception handler was handling a non-critical interrupt, so it will + * save (and later restore) the MSR via SPRN_SRR1, which will still have + * the MSR_DE bit set. + */ + /* 0x2000 - Debug Exception */ + START_EXCEPTION(0x2000, DebugTrap) + CRITICAL_EXCEPTION_PROLOG + + /* + * If this is a single step or branch-taken exception in an + * exception entry sequence, it was probably meant to apply to + * the code where the exception occurred (since exception entry + * doesn't turn off DE automatically). We simulate the effect + * of turning off DE on entry to an exception handler by turning + * off DE in the SRR3 value and clearing the debug status. + */ + mfspr r10,SPRN_DBSR /* check single-step/branch taken */ + andis. r10,r10,DBSR_IC@h + beq+ 2f + + andi. r10,r9,MSR_IR|MSR_PR /* check supervisor + MMU off */ + beq 1f /* branch and fix it up */ + + mfspr r10,SPRN_SRR2 /* Faulting instruction address */ + cmplwi r10,0x2100 + bgt+ 2f /* address above exception vectors */ + + /* here it looks like we got an inappropriate debug exception. */ +1: rlwinm r9,r9,0,~MSR_DE /* clear DE in the SRR3 value */ + lis r10,DBSR_IC@h /* clear the IC event */ + mtspr SPRN_DBSR,r10 + /* restore state and get out */ + lwz r10,_CCR(r11) + lwz r0,GPR0(r11) + lwz r1,GPR1(r11) + mtcrf 0x80,r10 + mtspr SPRN_SRR2,r12 + mtspr SPRN_SRR3,r9 + lwz r9,GPR9(r11) + lwz r12,GPR12(r11) + lwz r10,crit_r10@l(0) + lwz r11,crit_r11@l(0) + PPC405_ERR77_SYNC + rfci + b . + + /* continue normal handling for a critical exception... */ +2: mfspr r4,SPRN_DBSR + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_TEMPLATE(DebugException, 0x2002, \ + (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ + NOCOPY, crit_transfer_to_handler, ret_from_crit_exc) + +/* + * The other Data TLB exceptions bail out to this point + * if they can't resolve the lightweight TLB fault. + */ +DataAccess: + NORMAL_EXCEPTION_PROLOG + mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ + stw r5,_ESR(r11) + mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ + EXC_XFER_LITE(0x300, handle_page_fault) + +/* Other PowerPC processors, namely those derived from the 6xx-series + * have vectors from 0x2100 through 0x2F00 defined, but marked as reserved. + * However, for the 4xx-series processors these are neither defined nor + * reserved. + */ + + /* Damn, I came up one instruction too many to fit into the + * exception space :-). Both the instruction and data TLB + * miss get to this point to load the TLB. + * r10 - TLB_TAG value + * r11 - Linux PTE + * r12, r9 - available to use + * PID - loaded with proper value when we get here + * Upon exit, we reload everything and RFI. + * Actually, it will fit now, but oh well.....a common place + * to load the TLB. + */ +tlb_4xx_index: + .long 0 +finish_tlb_load: + /* load the next available TLB index. + */ + lwz r9, tlb_4xx_index@l(0) + addi r9, r9, 1 + andi. r9, r9, (PPC40X_TLB_SIZE-1) + stw r9, tlb_4xx_index@l(0) + +6: + /* + * Clear out the software-only bits in the PTE to generate the + * TLB_DATA value. These are the bottom 2 bits of the RPM, the + * top 3 bits of the zone field, and M. + */ + li r12, 0x0ce2 + andc r11, r11, r12 + + tlbwe r11, r9, TLB_DATA /* Load TLB LO */ + tlbwe r10, r9, TLB_TAG /* Load TLB HI */ + + /* Done...restore registers and get out of here. + */ +#ifdef CONFIG_403GCX + lwz r12, 12(r0) + lwz r11, 8(r0) + mtspr SPRN_PID, r12 + mtcr r11 + lwz r9, 4(r0) + lwz r12, 0(r0) +#else + mfspr r12, SPRN_SPRG_SCRATCH5 + mfspr r11, SPRN_SPRG_SCRATCH6 + mtspr SPRN_PID, r12 + mtcr r11 + mfspr r9, SPRN_SPRG_SCRATCH4 + mfspr r12, SPRN_SPRG_SCRATCH3 +#endif + mfspr r11, SPRN_SPRG_SCRATCH1 + mfspr r10, SPRN_SPRG_SCRATCH0 + PPC405_ERR77_SYNC + rfi /* Should sync shadow TLBs */ + b . /* prevent prefetch past rfi */ + +/* extern void giveup_fpu(struct task_struct *prev) + * + * The PowerPC 4xx family of processors do not have an FPU, so this just + * returns. + */ +_ENTRY(giveup_fpu) + blr + +/* This is where the main kernel code starts. + */ +start_here: + + /* ptr to current */ + lis r2,init_task@h + ori r2,r2,init_task@l + + /* ptr to phys current thread */ + tophys(r4,r2) + addi r4,r4,THREAD /* init task's THREAD */ + mtspr SPRN_SPRG_THREAD,r4 + + /* stack */ + lis r1,init_thread_union@ha + addi r1,r1,init_thread_union@l + li r0,0 + stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + + bl early_init /* We have to do this with MMU on */ + +/* + * Decide what sort of machine this is and initialize the MMU. + */ + li r3,0 + mr r4,r31 + bl machine_init + bl MMU_init + +/* Go back to running unmapped so we can load up new values + * and change to using our exception vectors. + * On the 4xx, all we have to do is invalidate the TLB to clear + * the old 16M byte TLB mappings. + */ + lis r4,2f@h + ori r4,r4,2f@l + tophys(r4,r4) + lis r3,(MSR_KERNEL & ~(MSR_IR|MSR_DR))@h + ori r3,r3,(MSR_KERNEL & ~(MSR_IR|MSR_DR))@l + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r3 + rfi + b . /* prevent prefetch past rfi */ + +/* Load up the kernel context */ +2: + sync /* Flush to memory before changing TLB */ + tlbia + isync /* Flush shadow TLBs */ + + /* set up the PTE pointers for the Abatron bdiGDB. + */ + lis r6, swapper_pg_dir@h + ori r6, r6, swapper_pg_dir@l + lis r5, abatron_pteptrs@h + ori r5, r5, abatron_pteptrs@l + stw r5, 0xf0(r0) /* Must match your Abatron config file */ + tophys(r5,r5) + stw r6, 0(r5) + +/* Now turn on the MMU for real! */ + lis r4,MSR_KERNEL@h + ori r4,r4,MSR_KERNEL@l + lis r3,start_kernel@h + ori r3,r3,start_kernel@l + mtspr SPRN_SRR0,r3 + mtspr SPRN_SRR1,r4 + rfi /* enable MMU and jump to start_kernel */ + b . /* prevent prefetch past rfi */ + +/* Set up the initial MMU state so we can do the first level of + * kernel initialization. This maps the first 16 MBytes of memory 1:1 + * virtual to physical and more importantly sets the cache mode. + */ +initial_mmu: + tlbia /* Invalidate all TLB entries */ + isync + + /* We should still be executing code at physical address 0x0000xxxx + * at this point. However, start_here is at virtual address + * 0xC000xxxx. So, set up a TLB mapping to cover this once + * translation is enabled. + */ + + lis r3,KERNELBASE@h /* Load the kernel virtual address */ + ori r3,r3,KERNELBASE@l + tophys(r4,r3) /* Load the kernel physical address */ + + iccci r0,r3 /* Invalidate the i-cache before use */ + + /* Load the kernel PID. + */ + li r0,0 + mtspr SPRN_PID,r0 + sync + + /* Configure and load one entry into TLB slots 63 */ + clrrwi r4,r4,10 /* Mask off the real page number */ + ori r4,r4,(TLB_WR | TLB_EX) /* Set the write and execute bits */ + + clrrwi r3,r3,10 /* Mask off the effective page number */ + ori r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_16M)) + + li r0,63 /* TLB slot 63 */ + + tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */ + tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */ + +#if defined(CONFIG_SERIAL_TEXT_DEBUG) && defined(SERIAL_DEBUG_IO_BASE) + + /* Load a TLB entry for the UART, so that ppc4xx_progress() can use + * the UARTs nice and early. We use a 4k real==virtual mapping. */ + + lis r3,SERIAL_DEBUG_IO_BASE@h + ori r3,r3,SERIAL_DEBUG_IO_BASE@l + mr r4,r3 + clrrwi r4,r4,12 + ori r4,r4,(TLB_WR|TLB_I|TLB_M|TLB_G) + + clrrwi r3,r3,12 + ori r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_4K)) + + li r0,0 /* TLB slot 0 */ + tlbwe r4,r0,TLB_DATA + tlbwe r3,r0,TLB_TAG +#endif /* CONFIG_SERIAL_DEBUG_TEXT && SERIAL_DEBUG_IO_BASE */ + + isync + + /* Establish the exception vector base + */ + lis r4,KERNELBASE@h /* EVPR only uses the high 16-bits */ + tophys(r0,r4) /* Use the physical address */ + mtspr SPRN_EVPR,r0 + + blr + +_GLOBAL(abort) + mfspr r13,SPRN_DBCR0 + oris r13,r13,DBCR0_RST_SYSTEM@h + mtspr SPRN_DBCR0,r13 + +_GLOBAL(set_context) + +#ifdef CONFIG_BDI_SWITCH + /* Context switch the PTE pointer for the Abatron BDI2000. + * The PGDIR is the second parameter. + */ + lis r5, KERNELBASE@h + lwz r5, 0xf0(r5) + stw r4, 0x4(r5) +#endif + sync + mtspr SPRN_PID,r3 + isync /* Need an isync to flush shadow */ + /* TLBs after changing PID */ + blr + +/* We put a few things here that have to be page-aligned. This stuff + * goes at the beginning of the data segment, which is page-aligned. + */ + .data + .align 12 + .globl sdata +sdata: + .globl empty_zero_page +empty_zero_page: + .space 4096 + .globl swapper_pg_dir +swapper_pg_dir: + .space PGD_TABLE_SIZE + +/* Room for two PTE pointers, usually the kernel and current user pointers + * to their respective root page table. + */ +abatron_pteptrs: + .space 8 diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S new file mode 100644 index 00000000..7dd2981b --- /dev/null +++ b/arch/powerpc/kernel/head_44x.S @@ -0,0 +1,1285 @@ +/* + * Kernel execution entry point code. + * + * Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org> + * Initial PowerPC version. + * Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu> + * Rewritten for PReP + * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au> + * Low-level exception handers, MMU support, and rewrite. + * Copyright (c) 1997 Dan Malek <dmalek@jlc.net> + * PowerPC 8xx modifications. + * Copyright (c) 1998-1999 TiVo, Inc. + * PowerPC 403GCX modifications. + * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu> + * PowerPC 403GCX/405GP modifications. + * Copyright 2000 MontaVista Software Inc. + * PPC405 modifications + * PowerPC 403GCX/405GP modifications. + * Author: MontaVista Software, Inc. + * frank_rowand@mvista.com or source@mvista.com + * debbie_chu@mvista.com + * Copyright 2002-2005 MontaVista Software, Inc. + * PowerPC 44x support, Matt Porter <mporter@kernel.crashing.org> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include <linux/init.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/ptrace.h> +#include <asm/synch.h> +#include "head_booke.h" + + +/* As with the other PowerPC ports, it is expected that when code + * execution begins here, the following registers contain valid, yet + * optional, information: + * + * r3 - Board info structure pointer (DRAM, frequency, MAC address, etc.) + * r4 - Starting address of the init RAM disk + * r5 - Ending address of the init RAM disk + * r6 - Start of kernel command line string (e.g. "mem=128") + * r7 - End of kernel command line string + * + */ + __HEAD +_ENTRY(_stext); +_ENTRY(_start); + /* + * Reserve a word at a fixed location to store the address + * of abatron_pteptrs + */ + nop + mr r31,r3 /* save device tree ptr */ + li r24,0 /* CPU number */ + +#ifdef CONFIG_RELOCATABLE +/* + * Relocate ourselves to the current runtime address. + * This is called only by the Boot CPU. + * "relocate" is called with our current runtime virutal + * address. + * r21 will be loaded with the physical runtime address of _stext + */ + bl 0f /* Get our runtime address */ +0: mflr r21 /* Make it accessible */ + addis r21,r21,(_stext - 0b)@ha + addi r21,r21,(_stext - 0b)@l /* Get our current runtime base */ + + /* + * We have the runtime (virutal) address of our base. + * We calculate our shift of offset from a 256M page. + * We could map the 256M page we belong to at PAGE_OFFSET and + * get going from there. + */ + lis r4,KERNELBASE@h + ori r4,r4,KERNELBASE@l + rlwinm r6,r21,0,4,31 /* r6 = PHYS_START % 256M */ + rlwinm r5,r4,0,4,31 /* r5 = KERNELBASE % 256M */ + subf r3,r5,r6 /* r3 = r6 - r5 */ + add r3,r4,r3 /* Required Virutal Address */ + + bl relocate +#endif + + bl init_cpu_state + + /* + * This is where the main kernel code starts. + */ + + /* ptr to current */ + lis r2,init_task@h + ori r2,r2,init_task@l + + /* ptr to current thread */ + addi r4,r2,THREAD /* init task's THREAD */ + mtspr SPRN_SPRG_THREAD,r4 + + /* stack */ + lis r1,init_thread_union@h + ori r1,r1,init_thread_union@l + li r0,0 + stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + + bl early_init + +#ifdef CONFIG_RELOCATABLE + /* + * Relocatable kernel support based on processing of dynamic + * relocation entries. + * + * r25 will contain RPN/ERPN for the start address of memory + * r21 will contain the current offset of _stext + */ + lis r3,kernstart_addr@ha + la r3,kernstart_addr@l(r3) + + /* + * Compute the kernstart_addr. + * kernstart_addr => (r6,r8) + * kernstart_addr & ~0xfffffff => (r6,r7) + */ + rlwinm r6,r25,0,28,31 /* ERPN. Bits 32-35 of Address */ + rlwinm r7,r25,0,0,3 /* RPN - assuming 256 MB page size */ + rlwinm r8,r21,0,4,31 /* r8 = (_stext & 0xfffffff) */ + or r8,r7,r8 /* Compute the lower 32bit of kernstart_addr */ + + /* Store kernstart_addr */ + stw r6,0(r3) /* higher 32bit */ + stw r8,4(r3) /* lower 32bit */ + + /* + * Compute the virt_phys_offset : + * virt_phys_offset = stext.run - kernstart_addr + * + * stext.run = (KERNELBASE & ~0xfffffff) + (kernstart_addr & 0xfffffff) + * When we relocate, we have : + * + * (kernstart_addr & 0xfffffff) = (stext.run & 0xfffffff) + * + * hence: + * virt_phys_offset = (KERNELBASE & ~0xfffffff) - (kernstart_addr & ~0xfffffff) + * + */ + + /* KERNELBASE&~0xfffffff => (r4,r5) */ + li r4, 0 /* higer 32bit */ + lis r5,KERNELBASE@h + rlwinm r5,r5,0,0,3 /* Align to 256M, lower 32bit */ + + /* + * 64bit subtraction. + */ + subfc r5,r7,r5 + subfe r4,r6,r4 + + /* Store virt_phys_offset */ + lis r3,virt_phys_offset@ha + la r3,virt_phys_offset@l(r3) + + stw r4,0(r3) + stw r5,4(r3) + +#elif defined(CONFIG_DYNAMIC_MEMSTART) + /* + * Mapping based, page aligned dynamic kernel loading. + * + * r25 will contain RPN/ERPN for the start address of memory + * + * Add the difference between KERNELBASE and PAGE_OFFSET to the + * start of physical memory to get kernstart_addr. + */ + lis r3,kernstart_addr@ha + la r3,kernstart_addr@l(r3) + + lis r4,KERNELBASE@h + ori r4,r4,KERNELBASE@l + lis r5,PAGE_OFFSET@h + ori r5,r5,PAGE_OFFSET@l + subf r4,r5,r4 + + rlwinm r6,r25,0,28,31 /* ERPN */ + rlwinm r7,r25,0,0,3 /* RPN - assuming 256 MB page size */ + add r7,r7,r4 + + stw r6,0(r3) + stw r7,4(r3) +#endif + +/* + * Decide what sort of machine this is and initialize the MMU. + */ + li r3,0 + mr r4,r31 + bl machine_init + bl MMU_init + + /* Setup PTE pointers for the Abatron bdiGDB */ + lis r6, swapper_pg_dir@h + ori r6, r6, swapper_pg_dir@l + lis r5, abatron_pteptrs@h + ori r5, r5, abatron_pteptrs@l + lis r4, KERNELBASE@h + ori r4, r4, KERNELBASE@l + stw r5, 0(r4) /* Save abatron_pteptrs at a fixed location */ + stw r6, 0(r5) + + /* Clear the Machine Check Syndrome Register */ + li r0,0 + mtspr SPRN_MCSR,r0 + + /* Let's move on */ + lis r4,start_kernel@h + ori r4,r4,start_kernel@l + lis r3,MSR_KERNEL@h + ori r3,r3,MSR_KERNEL@l + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r3 + rfi /* change context and jump to start_kernel */ + +/* + * Interrupt vector entry code + * + * The Book E MMUs are always on so we don't need to handle + * interrupts in real mode as with previous PPC processors. In + * this case we handle interrupts in the kernel virtual address + * space. + * + * Interrupt vectors are dynamically placed relative to the + * interrupt prefix as determined by the address of interrupt_base. + * The interrupt vectors offsets are programmed using the labels + * for each interrupt vector entry. + * + * Interrupt vectors must be aligned on a 16 byte boundary. + * We align on a 32 byte cache line boundary for good measure. + */ + +interrupt_base: + /* Critical Input Interrupt */ + CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception) + + /* Machine Check Interrupt */ + CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception) + MCHECK_EXCEPTION(0x0210, MachineCheckA, machine_check_exception) + + /* Data Storage Interrupt */ + DATA_STORAGE_EXCEPTION + + /* Instruction Storage Interrupt */ + INSTRUCTION_STORAGE_EXCEPTION + + /* External Input Interrupt */ + EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE) + + /* Alignment Interrupt */ + ALIGNMENT_EXCEPTION + + /* Program Interrupt */ + PROGRAM_EXCEPTION + + /* Floating Point Unavailable Interrupt */ +#ifdef CONFIG_PPC_FPU + FP_UNAVAILABLE_EXCEPTION +#else + EXCEPTION(0x2010, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE) +#endif + /* System Call Interrupt */ + START_EXCEPTION(SystemCall) + NORMAL_EXCEPTION_PROLOG + EXC_XFER_EE_LITE(0x0c00, DoSyscall) + + /* Auxiliary Processor Unavailable Interrupt */ + EXCEPTION(0x2020, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE) + + /* Decrementer Interrupt */ + DECREMENTER_EXCEPTION + + /* Fixed Internal Timer Interrupt */ + /* TODO: Add FIT support */ + EXCEPTION(0x1010, FixedIntervalTimer, unknown_exception, EXC_XFER_EE) + + /* Watchdog Timer Interrupt */ + /* TODO: Add watchdog support */ +#ifdef CONFIG_BOOKE_WDT + CRITICAL_EXCEPTION(0x1020, WatchdogTimer, WatchdogException) +#else + CRITICAL_EXCEPTION(0x1020, WatchdogTimer, unknown_exception) +#endif + + /* Data TLB Error Interrupt */ + START_EXCEPTION(DataTLBError44x) + mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */ + mtspr SPRN_SPRG_WSCRATCH1, r11 + mtspr SPRN_SPRG_WSCRATCH2, r12 + mtspr SPRN_SPRG_WSCRATCH3, r13 + mfcr r11 + mtspr SPRN_SPRG_WSCRATCH4, r11 + mfspr r10, SPRN_DEAR /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11, PAGE_OFFSET@h + cmplw r10, r11 + blt+ 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + + mfspr r12,SPRN_MMUCR + rlwinm r12,r12,0,0,23 /* Clear TID */ + + b 4f + + /* Get the PGD for the current thread */ +3: + mfspr r11,SPRN_SPRG_THREAD + lwz r11,PGDIR(r11) + + /* Load PID into MMUCR TID */ + mfspr r12,SPRN_MMUCR + mfspr r13,SPRN_PID /* Get PID */ + rlwimi r12,r13,0,24,31 /* Set TID */ + +4: + mtspr SPRN_MMUCR,r12 + + /* Mask of required permission bits. Note that while we + * do copy ESR:ST to _PAGE_RW position as trying to write + * to an RO page is pretty common, we don't do it with + * _PAGE_DIRTY. We could do it, but it's a fairly rare + * event so I'd rather take the overhead when it happens + * rather than adding an instruction here. We should measure + * whether the whole thing is worth it in the first place + * as we could avoid loading SPRN_ESR completely in the first + * place... + * + * TODO: Is it worth doing that mfspr & rlwimi in the first + * place or can we save a couple of instructions here ? + */ + mfspr r12,SPRN_ESR + li r13,_PAGE_PRESENT|_PAGE_ACCESSED + rlwimi r13,r12,10,30,30 + + /* Load the PTE */ + /* Compute pgdir/pmd offset */ + rlwinm r12, r10, PPC44x_PGD_OFF_SHIFT, PPC44x_PGD_OFF_MASK_BIT, 29 + lwzx r11, r12, r11 /* Get pgd/pmd entry */ + rlwinm. r12, r11, 0, 0, 20 /* Extract pt base address */ + beq 2f /* Bail if no table */ + + /* Compute pte address */ + rlwimi r12, r10, PPC44x_PTE_ADD_SHIFT, PPC44x_PTE_ADD_MASK_BIT, 28 + lwz r11, 0(r12) /* Get high word of pte entry */ + lwz r12, 4(r12) /* Get low word of pte entry */ + + lis r10,tlb_44x_index@ha + + andc. r13,r13,r12 /* Check permission */ + + /* Load the next available TLB index */ + lwz r13,tlb_44x_index@l(r10) + + bne 2f /* Bail if permission mismach */ + + /* Increment, rollover, and store TLB index */ + addi r13,r13,1 + + /* Compare with watermark (instruction gets patched) */ + .globl tlb_44x_patch_hwater_D +tlb_44x_patch_hwater_D: + cmpwi 0,r13,1 /* reserve entries */ + ble 5f + li r13,0 +5: + /* Store the next available TLB index */ + stw r13,tlb_44x_index@l(r10) + + /* Re-load the faulting address */ + mfspr r10,SPRN_DEAR + + /* Jump to common tlb load */ + b finish_tlb_load_44x + +2: + /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ + mfspr r11, SPRN_SPRG_RSCRATCH4 + mtcr r11 + mfspr r13, SPRN_SPRG_RSCRATCH3 + mfspr r12, SPRN_SPRG_RSCRATCH2 + mfspr r11, SPRN_SPRG_RSCRATCH1 + mfspr r10, SPRN_SPRG_RSCRATCH0 + b DataStorage + + /* Instruction TLB Error Interrupt */ + /* + * Nearly the same as above, except we get our + * information from different registers and bailout + * to a different point. + */ + START_EXCEPTION(InstructionTLBError44x) + mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */ + mtspr SPRN_SPRG_WSCRATCH1, r11 + mtspr SPRN_SPRG_WSCRATCH2, r12 + mtspr SPRN_SPRG_WSCRATCH3, r13 + mfcr r11 + mtspr SPRN_SPRG_WSCRATCH4, r11 + mfspr r10, SPRN_SRR0 /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11, PAGE_OFFSET@h + cmplw r10, r11 + blt+ 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + + mfspr r12,SPRN_MMUCR + rlwinm r12,r12,0,0,23 /* Clear TID */ + + b 4f + + /* Get the PGD for the current thread */ +3: + mfspr r11,SPRN_SPRG_THREAD + lwz r11,PGDIR(r11) + + /* Load PID into MMUCR TID */ + mfspr r12,SPRN_MMUCR + mfspr r13,SPRN_PID /* Get PID */ + rlwimi r12,r13,0,24,31 /* Set TID */ + +4: + mtspr SPRN_MMUCR,r12 + + /* Make up the required permissions */ + li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC + + /* Compute pgdir/pmd offset */ + rlwinm r12, r10, PPC44x_PGD_OFF_SHIFT, PPC44x_PGD_OFF_MASK_BIT, 29 + lwzx r11, r12, r11 /* Get pgd/pmd entry */ + rlwinm. r12, r11, 0, 0, 20 /* Extract pt base address */ + beq 2f /* Bail if no table */ + + /* Compute pte address */ + rlwimi r12, r10, PPC44x_PTE_ADD_SHIFT, PPC44x_PTE_ADD_MASK_BIT, 28 + lwz r11, 0(r12) /* Get high word of pte entry */ + lwz r12, 4(r12) /* Get low word of pte entry */ + + lis r10,tlb_44x_index@ha + + andc. r13,r13,r12 /* Check permission */ + + /* Load the next available TLB index */ + lwz r13,tlb_44x_index@l(r10) + + bne 2f /* Bail if permission mismach */ + + /* Increment, rollover, and store TLB index */ + addi r13,r13,1 + + /* Compare with watermark (instruction gets patched) */ + .globl tlb_44x_patch_hwater_I +tlb_44x_patch_hwater_I: + cmpwi 0,r13,1 /* reserve entries */ + ble 5f + li r13,0 +5: + /* Store the next available TLB index */ + stw r13,tlb_44x_index@l(r10) + + /* Re-load the faulting address */ + mfspr r10,SPRN_SRR0 + + /* Jump to common TLB load point */ + b finish_tlb_load_44x + +2: + /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ + mfspr r11, SPRN_SPRG_RSCRATCH4 + mtcr r11 + mfspr r13, SPRN_SPRG_RSCRATCH3 + mfspr r12, SPRN_SPRG_RSCRATCH2 + mfspr r11, SPRN_SPRG_RSCRATCH1 + mfspr r10, SPRN_SPRG_RSCRATCH0 + b InstructionStorage + +/* + * Both the instruction and data TLB miss get to this + * point to load the TLB. + * r10 - EA of fault + * r11 - PTE high word value + * r12 - PTE low word value + * r13 - TLB index + * MMUCR - loaded with proper value when we get here + * Upon exit, we reload everything and RFI. + */ +finish_tlb_load_44x: + /* Combine RPN & ERPN an write WS 0 */ + rlwimi r11,r12,0,0,31-PAGE_SHIFT + tlbwe r11,r13,PPC44x_TLB_XLAT + + /* + * Create WS1. This is the faulting address (EPN), + * page size, and valid flag. + */ + li r11,PPC44x_TLB_VALID | PPC44x_TLBE_SIZE + /* Insert valid and page size */ + rlwimi r10,r11,0,PPC44x_PTE_ADD_MASK_BIT,31 + tlbwe r10,r13,PPC44x_TLB_PAGEID /* Write PAGEID */ + + /* And WS 2 */ + li r10,0xf85 /* Mask to apply from PTE */ + rlwimi r10,r12,29,30,30 /* DIRTY -> SW position */ + and r11,r12,r10 /* Mask PTE bits to keep */ + andi. r10,r12,_PAGE_USER /* User page ? */ + beq 1f /* nope, leave U bits empty */ + rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */ +1: tlbwe r11,r13,PPC44x_TLB_ATTRIB /* Write ATTRIB */ + + /* Done...restore registers and get out of here. + */ + mfspr r11, SPRN_SPRG_RSCRATCH4 + mtcr r11 + mfspr r13, SPRN_SPRG_RSCRATCH3 + mfspr r12, SPRN_SPRG_RSCRATCH2 + mfspr r11, SPRN_SPRG_RSCRATCH1 + mfspr r10, SPRN_SPRG_RSCRATCH0 + rfi /* Force context change */ + +/* TLB error interrupts for 476 + */ +#ifdef CONFIG_PPC_47x + START_EXCEPTION(DataTLBError47x) + mtspr SPRN_SPRG_WSCRATCH0,r10 /* Save some working registers */ + mtspr SPRN_SPRG_WSCRATCH1,r11 + mtspr SPRN_SPRG_WSCRATCH2,r12 + mtspr SPRN_SPRG_WSCRATCH3,r13 + mfcr r11 + mtspr SPRN_SPRG_WSCRATCH4,r11 + mfspr r10,SPRN_DEAR /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11,PAGE_OFFSET@h + cmplw cr0,r10,r11 + blt+ 3f + lis r11,swapper_pg_dir@h + ori r11,r11, swapper_pg_dir@l + li r12,0 /* MMUCR = 0 */ + b 4f + + /* Get the PGD for the current thread and setup MMUCR */ +3: mfspr r11,SPRN_SPRG3 + lwz r11,PGDIR(r11) + mfspr r12,SPRN_PID /* Get PID */ +4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */ + + /* Mask of required permission bits. Note that while we + * do copy ESR:ST to _PAGE_RW position as trying to write + * to an RO page is pretty common, we don't do it with + * _PAGE_DIRTY. We could do it, but it's a fairly rare + * event so I'd rather take the overhead when it happens + * rather than adding an instruction here. We should measure + * whether the whole thing is worth it in the first place + * as we could avoid loading SPRN_ESR completely in the first + * place... + * + * TODO: Is it worth doing that mfspr & rlwimi in the first + * place or can we save a couple of instructions here ? + */ + mfspr r12,SPRN_ESR + li r13,_PAGE_PRESENT|_PAGE_ACCESSED + rlwimi r13,r12,10,30,30 + + /* Load the PTE */ + /* Compute pgdir/pmd offset */ + rlwinm r12,r10,PPC44x_PGD_OFF_SHIFT,PPC44x_PGD_OFF_MASK_BIT,29 + lwzx r11,r12,r11 /* Get pgd/pmd entry */ + + /* Word 0 is EPN,V,TS,DSIZ */ + li r12,PPC47x_TLB0_VALID | PPC47x_TLBE_SIZE + rlwimi r10,r12,0,32-PAGE_SHIFT,31 /* Insert valid and page size*/ + li r12,0 + tlbwe r10,r12,0 + + /* XXX can we do better ? Need to make sure tlbwe has established + * latch V bit in MMUCR0 before the PTE is loaded further down */ +#ifdef CONFIG_SMP + isync +#endif + + rlwinm. r12,r11,0,0,20 /* Extract pt base address */ + /* Compute pte address */ + rlwimi r12,r10,PPC44x_PTE_ADD_SHIFT,PPC44x_PTE_ADD_MASK_BIT,28 + beq 2f /* Bail if no table */ + lwz r11,0(r12) /* Get high word of pte entry */ + + /* XXX can we do better ? maybe insert a known 0 bit from r11 into the + * bottom of r12 to create a data dependency... We can also use r10 + * as destination nowadays + */ +#ifdef CONFIG_SMP + lwsync +#endif + lwz r12,4(r12) /* Get low word of pte entry */ + + andc. r13,r13,r12 /* Check permission */ + + /* Jump to common tlb load */ + beq finish_tlb_load_47x + +2: /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ + mfspr r11,SPRN_SPRG_RSCRATCH4 + mtcr r11 + mfspr r13,SPRN_SPRG_RSCRATCH3 + mfspr r12,SPRN_SPRG_RSCRATCH2 + mfspr r11,SPRN_SPRG_RSCRATCH1 + mfspr r10,SPRN_SPRG_RSCRATCH0 + b DataStorage + + /* Instruction TLB Error Interrupt */ + /* + * Nearly the same as above, except we get our + * information from different registers and bailout + * to a different point. + */ + START_EXCEPTION(InstructionTLBError47x) + mtspr SPRN_SPRG_WSCRATCH0,r10 /* Save some working registers */ + mtspr SPRN_SPRG_WSCRATCH1,r11 + mtspr SPRN_SPRG_WSCRATCH2,r12 + mtspr SPRN_SPRG_WSCRATCH3,r13 + mfcr r11 + mtspr SPRN_SPRG_WSCRATCH4,r11 + mfspr r10,SPRN_SRR0 /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11,PAGE_OFFSET@h + cmplw cr0,r10,r11 + blt+ 3f + lis r11,swapper_pg_dir@h + ori r11,r11, swapper_pg_dir@l + li r12,0 /* MMUCR = 0 */ + b 4f + + /* Get the PGD for the current thread and setup MMUCR */ +3: mfspr r11,SPRN_SPRG_THREAD + lwz r11,PGDIR(r11) + mfspr r12,SPRN_PID /* Get PID */ +4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */ + + /* Make up the required permissions */ + li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC + + /* Load PTE */ + /* Compute pgdir/pmd offset */ + rlwinm r12,r10,PPC44x_PGD_OFF_SHIFT,PPC44x_PGD_OFF_MASK_BIT,29 + lwzx r11,r12,r11 /* Get pgd/pmd entry */ + + /* Word 0 is EPN,V,TS,DSIZ */ + li r12,PPC47x_TLB0_VALID | PPC47x_TLBE_SIZE + rlwimi r10,r12,0,32-PAGE_SHIFT,31 /* Insert valid and page size*/ + li r12,0 + tlbwe r10,r12,0 + + /* XXX can we do better ? Need to make sure tlbwe has established + * latch V bit in MMUCR0 before the PTE is loaded further down */ +#ifdef CONFIG_SMP + isync +#endif + + rlwinm. r12,r11,0,0,20 /* Extract pt base address */ + /* Compute pte address */ + rlwimi r12,r10,PPC44x_PTE_ADD_SHIFT,PPC44x_PTE_ADD_MASK_BIT,28 + beq 2f /* Bail if no table */ + + lwz r11,0(r12) /* Get high word of pte entry */ + /* XXX can we do better ? maybe insert a known 0 bit from r11 into the + * bottom of r12 to create a data dependency... We can also use r10 + * as destination nowadays + */ +#ifdef CONFIG_SMP + lwsync +#endif + lwz r12,4(r12) /* Get low word of pte entry */ + + andc. r13,r13,r12 /* Check permission */ + + /* Jump to common TLB load point */ + beq finish_tlb_load_47x + +2: /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ + mfspr r11, SPRN_SPRG_RSCRATCH4 + mtcr r11 + mfspr r13, SPRN_SPRG_RSCRATCH3 + mfspr r12, SPRN_SPRG_RSCRATCH2 + mfspr r11, SPRN_SPRG_RSCRATCH1 + mfspr r10, SPRN_SPRG_RSCRATCH0 + b InstructionStorage + +/* + * Both the instruction and data TLB miss get to this + * point to load the TLB. + * r10 - free to use + * r11 - PTE high word value + * r12 - PTE low word value + * r13 - free to use + * MMUCR - loaded with proper value when we get here + * Upon exit, we reload everything and RFI. + */ +finish_tlb_load_47x: + /* Combine RPN & ERPN an write WS 1 */ + rlwimi r11,r12,0,0,31-PAGE_SHIFT + tlbwe r11,r13,1 + + /* And make up word 2 */ + li r10,0xf85 /* Mask to apply from PTE */ + rlwimi r10,r12,29,30,30 /* DIRTY -> SW position */ + and r11,r12,r10 /* Mask PTE bits to keep */ + andi. r10,r12,_PAGE_USER /* User page ? */ + beq 1f /* nope, leave U bits empty */ + rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */ +1: tlbwe r11,r13,2 + + /* Done...restore registers and get out of here. + */ + mfspr r11, SPRN_SPRG_RSCRATCH4 + mtcr r11 + mfspr r13, SPRN_SPRG_RSCRATCH3 + mfspr r12, SPRN_SPRG_RSCRATCH2 + mfspr r11, SPRN_SPRG_RSCRATCH1 + mfspr r10, SPRN_SPRG_RSCRATCH0 + rfi + +#endif /* CONFIG_PPC_47x */ + + /* Debug Interrupt */ + /* + * This statement needs to exist at the end of the IVPR + * definition just in case you end up taking a debug + * exception within another exception. + */ + DEBUG_CRIT_EXCEPTION + +/* + * Global functions + */ + +/* + * Adjust the machine check IVOR on 440A cores + */ +_GLOBAL(__fixup_440A_mcheck) + li r3,MachineCheckA@l + mtspr SPRN_IVOR1,r3 + sync + blr + +/* + * extern void giveup_altivec(struct task_struct *prev) + * + * The 44x core does not have an AltiVec unit. + */ +_GLOBAL(giveup_altivec) + blr + +/* + * extern void giveup_fpu(struct task_struct *prev) + * + * The 44x core does not have an FPU. + */ +#ifndef CONFIG_PPC_FPU +_GLOBAL(giveup_fpu) + blr +#endif + +_GLOBAL(set_context) + +#ifdef CONFIG_BDI_SWITCH + /* Context switch the PTE pointer for the Abatron BDI2000. + * The PGDIR is the second parameter. + */ + lis r5, abatron_pteptrs@h + ori r5, r5, abatron_pteptrs@l + stw r4, 0x4(r5) +#endif + mtspr SPRN_PID,r3 + isync /* Force context change */ + blr + +/* + * Init CPU state. This is called at boot time or for secondary CPUs + * to setup initial TLB entries, setup IVORs, etc... + * + */ +_GLOBAL(init_cpu_state) + mflr r22 +#ifdef CONFIG_PPC_47x + /* We use the PVR to differenciate 44x cores from 476 */ + mfspr r3,SPRN_PVR + srwi r3,r3,16 + cmplwi cr0,r3,PVR_476FPE@h + beq head_start_47x + cmplwi cr0,r3,PVR_476@h + beq head_start_47x + cmplwi cr0,r3,PVR_476_ISS@h + beq head_start_47x +#endif /* CONFIG_PPC_47x */ + +/* + * In case the firmware didn't do it, we apply some workarounds + * that are good for all 440 core variants here + */ + mfspr r3,SPRN_CCR0 + rlwinm r3,r3,0,0,27 /* disable icache prefetch */ + isync + mtspr SPRN_CCR0,r3 + isync + sync + +/* + * Set up the initial MMU state for 44x + * + * We are still executing code at the virtual address + * mappings set by the firmware for the base of RAM. + * + * We first invalidate all TLB entries but the one + * we are running from. We then load the KERNELBASE + * mappings so we can begin to use kernel addresses + * natively and so the interrupt vector locations are + * permanently pinned (necessary since Book E + * implementations always have translation enabled). + * + * TODO: Use the known TLB entry we are running from to + * determine which physical region we are located + * in. This can be used to determine where in RAM + * (on a shared CPU system) or PCI memory space + * (on a DRAMless system) we are located. + * For now, we assume a perfect world which means + * we are located at the base of DRAM (physical 0). + */ + +/* + * Search TLB for entry that we are currently using. + * Invalidate all entries but the one we are using. + */ + /* Load our current PID->MMUCR TID and MSR IS->MMUCR STS */ + mfspr r3,SPRN_PID /* Get PID */ + mfmsr r4 /* Get MSR */ + andi. r4,r4,MSR_IS@l /* TS=1? */ + beq wmmucr /* If not, leave STS=0 */ + oris r3,r3,PPC44x_MMUCR_STS@h /* Set STS=1 */ +wmmucr: mtspr SPRN_MMUCR,r3 /* Put MMUCR */ + sync + + bl invstr /* Find our address */ +invstr: mflr r5 /* Make it accessible */ + tlbsx r23,0,r5 /* Find entry we are in */ + li r4,0 /* Start at TLB entry 0 */ + li r3,0 /* Set PAGEID inval value */ +1: cmpw r23,r4 /* Is this our entry? */ + beq skpinv /* If so, skip the inval */ + tlbwe r3,r4,PPC44x_TLB_PAGEID /* If not, inval the entry */ +skpinv: addi r4,r4,1 /* Increment */ + cmpwi r4,64 /* Are we done? */ + bne 1b /* If not, repeat */ + isync /* If so, context change */ + +/* + * Configure and load pinned entry into TLB slot 63. + */ +#ifdef CONFIG_NONSTATIC_KERNEL + /* + * In case of a NONSTATIC_KERNEL we reuse the TLB XLAT + * entries of the initial mapping set by the boot loader. + * The XLAT entry is stored in r25 + */ + + /* Read the XLAT entry for our current mapping */ + tlbre r25,r23,PPC44x_TLB_XLAT + + lis r3,KERNELBASE@h + ori r3,r3,KERNELBASE@l + + /* Use our current RPN entry */ + mr r4,r25 +#else + + lis r3,PAGE_OFFSET@h + ori r3,r3,PAGE_OFFSET@l + + /* Kernel is at the base of RAM */ + li r4, 0 /* Load the kernel physical address */ +#endif + + /* Load the kernel PID = 0 */ + li r0,0 + mtspr SPRN_PID,r0 + sync + + /* Initialize MMUCR */ + li r5,0 + mtspr SPRN_MMUCR,r5 + sync + + /* pageid fields */ + clrrwi r3,r3,10 /* Mask off the effective page number */ + ori r3,r3,PPC44x_TLB_VALID | PPC44x_TLB_256M + + /* xlat fields */ + clrrwi r4,r4,10 /* Mask off the real page number */ + /* ERPN is 0 for first 4GB page */ + + /* attrib fields */ + /* Added guarded bit to protect against speculative loads/stores */ + li r5,0 + ori r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G) + + li r0,63 /* TLB slot 63 */ + + tlbwe r3,r0,PPC44x_TLB_PAGEID /* Load the pageid fields */ + tlbwe r4,r0,PPC44x_TLB_XLAT /* Load the translation fields */ + tlbwe r5,r0,PPC44x_TLB_ATTRIB /* Load the attrib/access fields */ + + /* Force context change */ + mfmsr r0 + mtspr SPRN_SRR1, r0 + lis r0,3f@h + ori r0,r0,3f@l + mtspr SPRN_SRR0,r0 + sync + rfi + + /* If necessary, invalidate original entry we used */ +3: cmpwi r23,63 + beq 4f + li r6,0 + tlbwe r6,r23,PPC44x_TLB_PAGEID + isync + +4: +#ifdef CONFIG_PPC_EARLY_DEBUG_44x + /* Add UART mapping for early debug. */ + + /* pageid fields */ + lis r3,PPC44x_EARLY_DEBUG_VIRTADDR@h + ori r3,r3,PPC44x_TLB_VALID|PPC44x_TLB_TS|PPC44x_TLB_64K + + /* xlat fields */ + lis r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW@h + ori r4,r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSHIGH + + /* attrib fields */ + li r5,(PPC44x_TLB_SW|PPC44x_TLB_SR|PPC44x_TLB_I|PPC44x_TLB_G) + li r0,62 /* TLB slot 0 */ + + tlbwe r3,r0,PPC44x_TLB_PAGEID + tlbwe r4,r0,PPC44x_TLB_XLAT + tlbwe r5,r0,PPC44x_TLB_ATTRIB + + /* Force context change */ + isync +#endif /* CONFIG_PPC_EARLY_DEBUG_44x */ + + /* Establish the interrupt vector offsets */ + SET_IVOR(0, CriticalInput); + SET_IVOR(1, MachineCheck); + SET_IVOR(2, DataStorage); + SET_IVOR(3, InstructionStorage); + SET_IVOR(4, ExternalInput); + SET_IVOR(5, Alignment); + SET_IVOR(6, Program); + SET_IVOR(7, FloatingPointUnavailable); + SET_IVOR(8, SystemCall); + SET_IVOR(9, AuxillaryProcessorUnavailable); + SET_IVOR(10, Decrementer); + SET_IVOR(11, FixedIntervalTimer); + SET_IVOR(12, WatchdogTimer); + SET_IVOR(13, DataTLBError44x); + SET_IVOR(14, InstructionTLBError44x); + SET_IVOR(15, DebugCrit); + + b head_start_common + + +#ifdef CONFIG_PPC_47x + +#ifdef CONFIG_SMP + +/* Entry point for secondary 47x processors */ +_GLOBAL(start_secondary_47x) + mr r24,r3 /* CPU number */ + + bl init_cpu_state + + /* Now we need to bolt the rest of kernel memory which + * is done in C code. We must be careful because our task + * struct or our stack can (and will probably) be out + * of reach of the initial 256M TLB entry, so we use a + * small temporary stack in .bss for that. This works + * because only one CPU at a time can be in this code + */ + lis r1,temp_boot_stack@h + ori r1,r1,temp_boot_stack@l + addi r1,r1,1024-STACK_FRAME_OVERHEAD + li r0,0 + stw r0,0(r1) + bl mmu_init_secondary + + /* Now we can get our task struct and real stack pointer */ + + /* Get current_thread_info and current */ + lis r1,secondary_ti@ha + lwz r1,secondary_ti@l(r1) + lwz r2,TI_TASK(r1) + + /* Current stack pointer */ + addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + li r0,0 + stw r0,0(r1) + + /* Kernel stack for exception entry in SPRG3 */ + addi r4,r2,THREAD /* init task's THREAD */ + mtspr SPRN_SPRG3,r4 + + b start_secondary + +#endif /* CONFIG_SMP */ + +/* + * Set up the initial MMU state for 44x + * + * We are still executing code at the virtual address + * mappings set by the firmware for the base of RAM. + */ + +head_start_47x: + /* Load our current PID->MMUCR TID and MSR IS->MMUCR STS */ + mfspr r3,SPRN_PID /* Get PID */ + mfmsr r4 /* Get MSR */ + andi. r4,r4,MSR_IS@l /* TS=1? */ + beq 1f /* If not, leave STS=0 */ + oris r3,r3,PPC47x_MMUCR_STS@h /* Set STS=1 */ +1: mtspr SPRN_MMUCR,r3 /* Put MMUCR */ + sync + + /* Find the entry we are running from */ + bl 1f +1: mflr r23 + tlbsx r23,0,r23 + tlbre r24,r23,0 + tlbre r25,r23,1 + tlbre r26,r23,2 + +/* + * Cleanup time + */ + + /* Initialize MMUCR */ + li r5,0 + mtspr SPRN_MMUCR,r5 + sync + +clear_all_utlb_entries: + + #; Set initial values. + + addis r3,0,0x8000 + addi r4,0,0 + addi r5,0,0 + b clear_utlb_entry + + #; Align the loop to speed things up. + + .align 6 + +clear_utlb_entry: + + tlbwe r4,r3,0 + tlbwe r5,r3,1 + tlbwe r5,r3,2 + addis r3,r3,0x2000 + cmpwi r3,0 + bne clear_utlb_entry + addis r3,0,0x8000 + addis r4,r4,0x100 + cmpwi r4,0 + bne clear_utlb_entry + + #; Restore original entry. + + oris r23,r23,0x8000 /* specify the way */ + tlbwe r24,r23,0 + tlbwe r25,r23,1 + tlbwe r26,r23,2 + +/* + * Configure and load pinned entry into TLB for the kernel core + */ + + lis r3,PAGE_OFFSET@h + ori r3,r3,PAGE_OFFSET@l + + /* Load the kernel PID = 0 */ + li r0,0 + mtspr SPRN_PID,r0 + sync + + /* Word 0 */ + clrrwi r3,r3,12 /* Mask off the effective page number */ + ori r3,r3,PPC47x_TLB0_VALID | PPC47x_TLB0_256M + + /* Word 1 - use r25. RPN is the same as the original entry */ + + /* Word 2 */ + li r5,0 + ori r5,r5,PPC47x_TLB2_S_RWX +#ifdef CONFIG_SMP + ori r5,r5,PPC47x_TLB2_M +#endif + + /* We write to way 0 and bolted 0 */ + lis r0,0x8800 + tlbwe r3,r0,0 + tlbwe r25,r0,1 + tlbwe r5,r0,2 + +/* + * Configure SSPCR, ISPCR and USPCR for now to search everything, we can fix + * them up later + */ + LOAD_REG_IMMEDIATE(r3, 0x9abcdef0) + mtspr SPRN_SSPCR,r3 + mtspr SPRN_USPCR,r3 + LOAD_REG_IMMEDIATE(r3, 0x12345670) + mtspr SPRN_ISPCR,r3 + + /* Force context change */ + mfmsr r0 + mtspr SPRN_SRR1, r0 + lis r0,3f@h + ori r0,r0,3f@l + mtspr SPRN_SRR0,r0 + sync + rfi + + /* Invalidate original entry we used */ +3: + rlwinm r24,r24,0,21,19 /* clear the "valid" bit */ + tlbwe r24,r23,0 + addi r24,0,0 + tlbwe r24,r23,1 + tlbwe r24,r23,2 + isync /* Clear out the shadow TLB entries */ + +#ifdef CONFIG_PPC_EARLY_DEBUG_44x + /* Add UART mapping for early debug. */ + + /* Word 0 */ + lis r3,PPC44x_EARLY_DEBUG_VIRTADDR@h + ori r3,r3,PPC47x_TLB0_VALID | PPC47x_TLB0_TS | PPC47x_TLB0_1M + + /* Word 1 */ + lis r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW@h + ori r4,r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSHIGH + + /* Word 2 */ + li r5,(PPC47x_TLB2_S_RW | PPC47x_TLB2_IMG) + + /* Bolted in way 0, bolt slot 5, we -hope- we don't hit the same + * congruence class as the kernel, we need to make sure of it at + * some point + */ + lis r0,0x8d00 + tlbwe r3,r0,0 + tlbwe r4,r0,1 + tlbwe r5,r0,2 + + /* Force context change */ + isync +#endif /* CONFIG_PPC_EARLY_DEBUG_44x */ + + /* Establish the interrupt vector offsets */ + SET_IVOR(0, CriticalInput); + SET_IVOR(1, MachineCheckA); + SET_IVOR(2, DataStorage); + SET_IVOR(3, InstructionStorage); + SET_IVOR(4, ExternalInput); + SET_IVOR(5, Alignment); + SET_IVOR(6, Program); + SET_IVOR(7, FloatingPointUnavailable); + SET_IVOR(8, SystemCall); + SET_IVOR(9, AuxillaryProcessorUnavailable); + SET_IVOR(10, Decrementer); + SET_IVOR(11, FixedIntervalTimer); + SET_IVOR(12, WatchdogTimer); + SET_IVOR(13, DataTLBError47x); + SET_IVOR(14, InstructionTLBError47x); + SET_IVOR(15, DebugCrit); + + /* We configure icbi to invalidate 128 bytes at a time since the + * current 32-bit kernel code isn't too happy with icache != dcache + * block size + */ + mfspr r3,SPRN_CCR0 + oris r3,r3,0x0020 + mtspr SPRN_CCR0,r3 + isync + +#endif /* CONFIG_PPC_47x */ + +/* + * Here we are back to code that is common between 44x and 47x + * + * We proceed to further kernel initialization and return to the + * main kernel entry + */ +head_start_common: + /* Establish the interrupt vector base */ + lis r4,interrupt_base@h /* IVPR only uses the high 16-bits */ + mtspr SPRN_IVPR,r4 + + /* + * If the kernel was loaded at a non-zero 256 MB page, we need to + * mask off the most significant 4 bits to get the relative address + * from the start of physical memory + */ + rlwinm r22,r22,0,4,31 + addis r22,r22,PAGE_OFFSET@h + mtlr r22 + isync + blr + +/* + * We put a few things here that have to be page-aligned. This stuff + * goes at the beginning of the data segment, which is page-aligned. + */ + .data + .align PAGE_SHIFT + .globl sdata +sdata: + .globl empty_zero_page +empty_zero_page: + .space PAGE_SIZE + +/* + * To support >32-bit physical addresses, we use an 8KB pgdir. + */ + .globl swapper_pg_dir +swapper_pg_dir: + .space PGD_TABLE_SIZE + +/* + * Room for two PTE pointers, usually the kernel and current user pointers + * to their respective root page table. + */ +abatron_pteptrs: + .space 8 + +#ifdef CONFIG_SMP + .align 12 +temp_boot_stack: + .space 1024 +#endif /* CONFIG_SMP */ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S new file mode 100644 index 00000000..58bddee8 --- /dev/null +++ b/arch/powerpc/kernel/head_64.S @@ -0,0 +1,789 @@ +/* + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP + * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu> + * Adapted for Power Macintosh by Paul Mackerras. + * Low-level exception handlers and MMU support + * rewritten by Paul Mackerras. + * Copyright (C) 1996 Paul Mackerras. + * + * Adapted for 64bit PowerPC by Dave Engebretsen, Peter Bergner, and + * Mike Corrigan {engebret|bergner|mikejc}@us.ibm.com + * + * This file contains the entry point for the 64-bit kernel along + * with some early initialization code common to all 64-bit powerpc + * variants. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/threads.h> +#include <asm/reg.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/bug.h> +#include <asm/cputable.h> +#include <asm/setup.h> +#include <asm/hvcall.h> +#include <asm/thread_info.h> +#include <asm/firmware.h> +#include <asm/page_64.h> +#include <asm/irqflags.h> +#include <asm/kvm_book3s_asm.h> +#include <asm/ptrace.h> +#include <asm/hw_irq.h> + +/* The physical memory is laid out such that the secondary processor + * spin code sits at 0x0000...0x00ff. On server, the vectors follow + * using the layout described in exceptions-64s.S + */ + +/* + * Entering into this code we make the following assumptions: + * + * For pSeries or server processors: + * 1. The MMU is off & open firmware is running in real mode. + * 2. The kernel is entered at __start + * -or- For OPAL entry: + * 1. The MMU is off, processor in HV mode, primary CPU enters at 0 + * with device-tree in gpr3. We also get OPAL base in r8 and + * entry in r9 for debugging purposes + * 2. Secondary processors enter at 0x60 with PIR in gpr3 + * + * For Book3E processors: + * 1. The MMU is on running in AS0 in a state defined in ePAPR + * 2. The kernel is entered at __start + */ + + .text + .globl _stext +_stext: +_GLOBAL(__start) + /* NOP this out unconditionally */ +BEGIN_FTR_SECTION + b .__start_initialization_multiplatform +END_FTR_SECTION(0, 1) + + /* Catch branch to 0 in real mode */ + trap + + /* Secondary processors spin on this value until it becomes nonzero. + * When it does it contains the real address of the descriptor + * of the function that the cpu should jump to to continue + * initialization. + */ + .globl __secondary_hold_spinloop +__secondary_hold_spinloop: + .llong 0x0 + + /* Secondary processors write this value with their cpu # */ + /* after they enter the spin loop immediately below. */ + .globl __secondary_hold_acknowledge +__secondary_hold_acknowledge: + .llong 0x0 + +#ifdef CONFIG_RELOCATABLE + /* This flag is set to 1 by a loader if the kernel should run + * at the loaded address instead of the linked address. This + * is used by kexec-tools to keep the the kdump kernel in the + * crash_kernel region. The loader is responsible for + * observing the alignment requirement. + */ + /* Do not move this variable as kexec-tools knows about it. */ + . = 0x5c + .globl __run_at_load +__run_at_load: + .long 0x72756e30 /* "run0" -- relocate to 0 by default */ +#endif + + . = 0x60 +/* + * The following code is used to hold secondary processors + * in a spin loop after they have entered the kernel, but + * before the bulk of the kernel has been relocated. This code + * is relocated to physical address 0x60 before prom_init is run. + * All of it must fit below the first exception vector at 0x100. + * Use .globl here not _GLOBAL because we want __secondary_hold + * to be the actual text address, not a descriptor. + */ + .globl __secondary_hold +__secondary_hold: +#ifndef CONFIG_PPC_BOOK3E + mfmsr r24 + ori r24,r24,MSR_RI + mtmsrd r24 /* RI on */ +#endif + /* Grab our physical cpu number */ + mr r24,r3 + + /* Tell the master cpu we're here */ + /* Relocation is off & we are located at an address less */ + /* than 0x100, so only need to grab low order offset. */ + std r24,__secondary_hold_acknowledge-_stext(0) + sync + + /* All secondary cpus wait here until told to start. */ +100: ld r4,__secondary_hold_spinloop-_stext(0) + cmpdi 0,r4,0 + beq 100b + +#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC) + ld r4,0(r4) /* deref function descriptor */ + mtctr r4 + mr r3,r24 + li r4,0 + /* Make sure that patched code is visible */ + isync + bctr +#else + BUG_OPCODE +#endif + +/* This value is used to mark exception frames on the stack. */ + .section ".toc","aw" +exception_marker: + .tc ID_72656773_68657265[TC],0x7265677368657265 + .text + +/* + * On server, we include the exception vectors code here as it + * relies on absolute addressing which is only possible within + * this compilation unit + */ +#ifdef CONFIG_PPC_BOOK3S +#include "exceptions-64s.S" +#endif + +_GLOBAL(generic_secondary_thread_init) + mr r24,r3 + + /* turn on 64-bit mode */ + bl .enable_64b_mode + + /* get a valid TOC pointer, wherever we're mapped at */ + bl .relative_toc + +#ifdef CONFIG_PPC_BOOK3E + /* Book3E initialization */ + mr r3,r24 + bl .book3e_secondary_thread_init +#endif + b generic_secondary_common_init + +/* + * On pSeries and most other platforms, secondary processors spin + * in the following code. + * At entry, r3 = this processor's number (physical cpu id) + * + * On Book3E, r4 = 1 to indicate that the initial TLB entry for + * this core already exists (setup via some other mechanism such + * as SCOM before entry). + */ +_GLOBAL(generic_secondary_smp_init) + mr r24,r3 + mr r25,r4 + + /* turn on 64-bit mode */ + bl .enable_64b_mode + + /* get a valid TOC pointer, wherever we're mapped at */ + bl .relative_toc + +#ifdef CONFIG_PPC_BOOK3E + /* Book3E initialization */ + mr r3,r24 + mr r4,r25 + bl .book3e_secondary_core_init +#endif + +generic_secondary_common_init: + /* Set up a paca value for this processor. Since we have the + * physical cpu id in r24, we need to search the pacas to find + * which logical id maps to our physical one. + */ + LOAD_REG_ADDR(r13, paca) /* Load paca pointer */ + ld r13,0(r13) /* Get base vaddr of paca array */ +#ifndef CONFIG_SMP + addi r13,r13,PACA_SIZE /* know r13 if used accidentally */ + b .kexec_wait /* wait for next kernel if !SMP */ +#else + LOAD_REG_ADDR(r7, nr_cpu_ids) /* Load nr_cpu_ids address */ + lwz r7,0(r7) /* also the max paca allocated */ + li r5,0 /* logical cpu id */ +1: lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */ + cmpw r6,r24 /* Compare to our id */ + beq 2f + addi r13,r13,PACA_SIZE /* Loop to next PACA on miss */ + addi r5,r5,1 + cmpw r5,r7 /* Check if more pacas exist */ + blt 1b + + mr r3,r24 /* not found, copy phys to r3 */ + b .kexec_wait /* next kernel might do better */ + +2: SET_PACA(r13) +#ifdef CONFIG_PPC_BOOK3E + addi r12,r13,PACA_EXTLB /* and TLB exc frame in another */ + mtspr SPRN_SPRG_TLB_EXFRAME,r12 +#endif + + /* From now on, r24 is expected to be logical cpuid */ + mr r24,r5 + + /* See if we need to call a cpu state restore handler */ + LOAD_REG_ADDR(r23, cur_cpu_spec) + ld r23,0(r23) + ld r23,CPU_SPEC_RESTORE(r23) + cmpdi 0,r23,0 + beq 3f + ld r23,0(r23) + mtctr r23 + bctrl + +3: LOAD_REG_ADDR(r3, spinning_secondaries) /* Decrement spinning_secondaries */ + lwarx r4,0,r3 + subi r4,r4,1 + stwcx. r4,0,r3 + bne 3b + isync + +4: HMT_LOW + lbz r23,PACAPROCSTART(r13) /* Test if this processor should */ + /* start. */ + cmpwi 0,r23,0 + beq 4b /* Loop until told to go */ + + sync /* order paca.run and cur_cpu_spec */ + isync /* In case code patching happened */ + + /* Create a temp kernel stack for use before relocation is on. */ + ld r1,PACAEMERGSP(r13) + subi r1,r1,STACK_FRAME_OVERHEAD + + b __secondary_start +#endif /* SMP */ + +/* + * Turn the MMU off. + * Assumes we're mapped EA == RA if the MMU is on. + */ +#ifdef CONFIG_PPC_BOOK3S +_STATIC(__mmu_off) + mfmsr r3 + andi. r0,r3,MSR_IR|MSR_DR + beqlr + mflr r4 + andc r3,r3,r0 + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r3 + sync + rfid + b . /* prevent speculative execution */ +#endif + + +/* + * Here is our main kernel entry point. We support currently 2 kind of entries + * depending on the value of r5. + * + * r5 != NULL -> OF entry, we go to prom_init, "legacy" parameter content + * in r3...r7 + * + * r5 == NULL -> kexec style entry. r3 is a physical pointer to the + * DT block, r4 is a physical pointer to the kernel itself + * + */ +_GLOBAL(__start_initialization_multiplatform) + /* Make sure we are running in 64 bits mode */ + bl .enable_64b_mode + + /* Get TOC pointer (current runtime address) */ + bl .relative_toc + + /* find out where we are now */ + bcl 20,31,$+4 +0: mflr r26 /* r26 = runtime addr here */ + addis r26,r26,(_stext - 0b)@ha + addi r26,r26,(_stext - 0b)@l /* current runtime base addr */ + + /* + * Are we booted from a PROM Of-type client-interface ? + */ + cmpldi cr0,r5,0 + beq 1f + b .__boot_from_prom /* yes -> prom */ +1: + /* Save parameters */ + mr r31,r3 + mr r30,r4 +#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL + /* Save OPAL entry */ + mr r28,r8 + mr r29,r9 +#endif + +#ifdef CONFIG_PPC_BOOK3E + bl .start_initialization_book3e + b .__after_prom_start +#else + /* Setup some critical 970 SPRs before switching MMU off */ + mfspr r0,SPRN_PVR + srwi r0,r0,16 + cmpwi r0,0x39 /* 970 */ + beq 1f + cmpwi r0,0x3c /* 970FX */ + beq 1f + cmpwi r0,0x44 /* 970MP */ + beq 1f + cmpwi r0,0x45 /* 970GX */ + bne 2f +1: bl .__cpu_preinit_ppc970 +2: + + /* Switch off MMU if not already off */ + bl .__mmu_off + b .__after_prom_start +#endif /* CONFIG_PPC_BOOK3E */ + +_INIT_STATIC(__boot_from_prom) +#ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE + /* Save parameters */ + mr r31,r3 + mr r30,r4 + mr r29,r5 + mr r28,r6 + mr r27,r7 + + /* + * Align the stack to 16-byte boundary + * Depending on the size and layout of the ELF sections in the initial + * boot binary, the stack pointer may be unaligned on PowerMac + */ + rldicr r1,r1,0,59 + +#ifdef CONFIG_RELOCATABLE + /* Relocate code for where we are now */ + mr r3,r26 + bl .relocate +#endif + + /* Restore parameters */ + mr r3,r31 + mr r4,r30 + mr r5,r29 + mr r6,r28 + mr r7,r27 + + /* Do all of the interaction with OF client interface */ + mr r8,r26 + bl .prom_init +#endif /* #CONFIG_PPC_OF_BOOT_TRAMPOLINE */ + + /* We never return. We also hit that trap if trying to boot + * from OF while CONFIG_PPC_OF_BOOT_TRAMPOLINE isn't selected */ + trap + +_STATIC(__after_prom_start) +#ifdef CONFIG_RELOCATABLE + /* process relocations for the final address of the kernel */ + lis r25,PAGE_OFFSET@highest /* compute virtual base of kernel */ + sldi r25,r25,32 + lwz r7,__run_at_load-_stext(r26) + cmplwi cr0,r7,1 /* flagged to stay where we are ? */ + bne 1f + add r25,r25,r26 +1: mr r3,r25 + bl .relocate +#endif + +/* + * We need to run with _stext at physical address PHYSICAL_START. + * This will leave some code in the first 256B of + * real memory, which are reserved for software use. + * + * Note: This process overwrites the OF exception vectors. + */ + li r3,0 /* target addr */ +#ifdef CONFIG_PPC_BOOK3E + tovirt(r3,r3) /* on booke, we already run at PAGE_OFFSET */ +#endif + mr. r4,r26 /* In some cases the loader may */ + beq 9f /* have already put us at zero */ + li r6,0x100 /* Start offset, the first 0x100 */ + /* bytes were copied earlier. */ +#ifdef CONFIG_PPC_BOOK3E + tovirt(r6,r6) /* on booke, we already run at PAGE_OFFSET */ +#endif + +#ifdef CONFIG_CRASH_DUMP +/* + * Check if the kernel has to be running as relocatable kernel based on the + * variable __run_at_load, if it is set the kernel is treated as relocatable + * kernel, otherwise it will be moved to PHYSICAL_START + */ + lwz r7,__run_at_load-_stext(r26) + cmplwi cr0,r7,1 + bne 3f + + li r5,__end_interrupts - _stext /* just copy interrupts */ + b 5f +3: +#endif + lis r5,(copy_to_here - _stext)@ha + addi r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */ + + bl .copy_and_flush /* copy the first n bytes */ + /* this includes the code being */ + /* executed here. */ + addis r8,r3,(4f - _stext)@ha /* Jump to the copy of this code */ + addi r8,r8,(4f - _stext)@l /* that we just made */ + mtctr r8 + bctr + +p_end: .llong _end - _stext + +4: /* Now copy the rest of the kernel up to _end */ + addis r5,r26,(p_end - _stext)@ha + ld r5,(p_end - _stext)@l(r5) /* get _end */ +5: bl .copy_and_flush /* copy the rest */ + +9: b .start_here_multiplatform + +/* + * Copy routine used to copy the kernel to start at physical address 0 + * and flush and invalidate the caches as needed. + * r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset + * on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5. + * + * Note: this routine *only* clobbers r0, r6 and lr + */ +_GLOBAL(copy_and_flush) + addi r5,r5,-8 + addi r6,r6,-8 +4: li r0,8 /* Use the smallest common */ + /* denominator cache line */ + /* size. This results in */ + /* extra cache line flushes */ + /* but operation is correct. */ + /* Can't get cache line size */ + /* from NACA as it is being */ + /* moved too. */ + + mtctr r0 /* put # words/line in ctr */ +3: addi r6,r6,8 /* copy a cache line */ + ldx r0,r6,r4 + stdx r0,r6,r3 + bdnz 3b + dcbst r6,r3 /* write it to memory */ + sync + icbi r6,r3 /* flush the icache line */ + cmpld 0,r6,r5 + blt 4b + sync + addi r5,r5,8 + addi r6,r6,8 + blr + +.align 8 +copy_to_here: + +#ifdef CONFIG_SMP +#ifdef CONFIG_PPC_PMAC +/* + * On PowerMac, secondary processors starts from the reset vector, which + * is temporarily turned into a call to one of the functions below. + */ + .section ".text"; + .align 2 ; + + .globl __secondary_start_pmac_0 +__secondary_start_pmac_0: + /* NB the entries for cpus 0, 1, 2 must each occupy 8 bytes. */ + li r24,0 + b 1f + li r24,1 + b 1f + li r24,2 + b 1f + li r24,3 +1: + +_GLOBAL(pmac_secondary_start) + /* turn on 64-bit mode */ + bl .enable_64b_mode + + li r0,0 + mfspr r3,SPRN_HID4 + rldimi r3,r0,40,23 /* clear bit 23 (rm_ci) */ + sync + mtspr SPRN_HID4,r3 + isync + sync + slbia + + /* get TOC pointer (real address) */ + bl .relative_toc + + /* Copy some CPU settings from CPU 0 */ + bl .__restore_cpu_ppc970 + + /* pSeries do that early though I don't think we really need it */ + mfmsr r3 + ori r3,r3,MSR_RI + mtmsrd r3 /* RI on */ + + /* Set up a paca value for this processor. */ + LOAD_REG_ADDR(r4,paca) /* Load paca pointer */ + ld r4,0(r4) /* Get base vaddr of paca array */ + mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */ + add r13,r13,r4 /* for this processor. */ + SET_PACA(r13) /* Save vaddr of paca in an SPRG*/ + + /* Mark interrupts soft and hard disabled (they might be enabled + * in the PACA when doing hotplug) + */ + li r0,0 + stb r0,PACASOFTIRQEN(r13) + li r0,PACA_IRQ_HARD_DIS + stb r0,PACAIRQHAPPENED(r13) + + /* Create a temp kernel stack for use before relocation is on. */ + ld r1,PACAEMERGSP(r13) + subi r1,r1,STACK_FRAME_OVERHEAD + + b __secondary_start + +#endif /* CONFIG_PPC_PMAC */ + +/* + * This function is called after the master CPU has released the + * secondary processors. The execution environment is relocation off. + * The paca for this processor has the following fields initialized at + * this point: + * 1. Processor number + * 2. Segment table pointer (virtual address) + * On entry the following are set: + * r1 = stack pointer (real addr of temp stack) + * r24 = cpu# (in Linux terms) + * r13 = paca virtual address + * SPRG_PACA = paca virtual address + */ + .section ".text"; + .align 2 ; + + .globl __secondary_start +__secondary_start: + /* Set thread priority to MEDIUM */ + HMT_MEDIUM + + /* Initialize the kernel stack */ + LOAD_REG_ADDR(r3, current_set) + sldi r28,r24,3 /* get current_set[cpu#] */ + ldx r14,r3,r28 + addi r14,r14,THREAD_SIZE-STACK_FRAME_OVERHEAD + std r14,PACAKSAVE(r13) + + /* Do early setup for that CPU (stab, slb, hash table pointer) */ + bl .early_setup_secondary + + /* + * setup the new stack pointer, but *don't* use this until + * translation is on. + */ + mr r1, r14 + + /* Clear backchain so we get nice backtraces */ + li r7,0 + mtlr r7 + + /* Mark interrupts soft and hard disabled (they might be enabled + * in the PACA when doing hotplug) + */ + stb r7,PACASOFTIRQEN(r13) + li r0,PACA_IRQ_HARD_DIS + stb r0,PACAIRQHAPPENED(r13) + + /* enable MMU and jump to start_secondary */ + LOAD_REG_ADDR(r3, .start_secondary_prolog) + LOAD_REG_IMMEDIATE(r4, MSR_KERNEL) + + mtspr SPRN_SRR0,r3 + mtspr SPRN_SRR1,r4 + RFI + b . /* prevent speculative execution */ + +/* + * Running with relocation on at this point. All we want to do is + * zero the stack back-chain pointer and get the TOC virtual address + * before going into C code. + */ +_GLOBAL(start_secondary_prolog) + ld r2,PACATOC(r13) + li r3,0 + std r3,0(r1) /* Zero the stack frame pointer */ + bl .start_secondary + b . +/* + * Reset stack pointer and call start_secondary + * to continue with online operation when woken up + * from cede in cpu offline. + */ +_GLOBAL(start_secondary_resume) + ld r1,PACAKSAVE(r13) /* Reload kernel stack pointer */ + li r3,0 + std r3,0(r1) /* Zero the stack frame pointer */ + bl .start_secondary + b . +#endif + +/* + * This subroutine clobbers r11 and r12 + */ +_GLOBAL(enable_64b_mode) + mfmsr r11 /* grab the current MSR */ +#ifdef CONFIG_PPC_BOOK3E + oris r11,r11,0x8000 /* CM bit set, we'll set ICM later */ + mtmsr r11 +#else /* CONFIG_PPC_BOOK3E */ + li r12,(MSR_64BIT | MSR_ISF)@highest + sldi r12,r12,48 + or r11,r11,r12 + mtmsrd r11 + isync +#endif + blr + +/* + * This puts the TOC pointer into r2, offset by 0x8000 (as expected + * by the toolchain). It computes the correct value for wherever we + * are running at the moment, using position-independent code. + */ +_GLOBAL(relative_toc) + mflr r0 + bcl 20,31,$+4 +0: mflr r11 + ld r2,(p_toc - 0b)(r11) + add r2,r2,r11 + mtlr r0 + blr + +p_toc: .llong __toc_start + 0x8000 - 0b + +/* + * This is where the main kernel code starts. + */ +_INIT_STATIC(start_here_multiplatform) + /* set up the TOC (real address) */ + bl .relative_toc + + /* Clear out the BSS. It may have been done in prom_init, + * already but that's irrelevant since prom_init will soon + * be detached from the kernel completely. Besides, we need + * to clear it now for kexec-style entry. + */ + LOAD_REG_ADDR(r11,__bss_stop) + LOAD_REG_ADDR(r8,__bss_start) + sub r11,r11,r8 /* bss size */ + addi r11,r11,7 /* round up to an even double word */ + srdi. r11,r11,3 /* shift right by 3 */ + beq 4f + addi r8,r8,-8 + li r0,0 + mtctr r11 /* zero this many doublewords */ +3: stdu r0,8(r8) + bdnz 3b +4: + +#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL + /* Setup OPAL entry */ + std r28,0(r11); + std r29,8(r11); +#endif + +#ifndef CONFIG_PPC_BOOK3E + mfmsr r6 + ori r6,r6,MSR_RI + mtmsrd r6 /* RI on */ +#endif + +#ifdef CONFIG_RELOCATABLE + /* Save the physical address we're running at in kernstart_addr */ + LOAD_REG_ADDR(r4, kernstart_addr) + clrldi r0,r25,2 + std r0,0(r4) +#endif + + /* The following gets the stack set up with the regs */ + /* pointing to the real addr of the kernel stack. This is */ + /* all done to support the C function call below which sets */ + /* up the htab. This is done because we have relocated the */ + /* kernel but are still running in real mode. */ + + LOAD_REG_ADDR(r3,init_thread_union) + + /* set up a stack pointer */ + addi r1,r3,THREAD_SIZE + li r0,0 + stdu r0,-STACK_FRAME_OVERHEAD(r1) + + /* Do very early kernel initializations, including initial hash table, + * stab and slb setup before we turn on relocation. */ + + /* Restore parameters passed from prom_init/kexec */ + mr r3,r31 + bl .early_setup /* also sets r13 and SPRG_PACA */ + + LOAD_REG_ADDR(r3, .start_here_common) + ld r4,PACAKMSR(r13) + mtspr SPRN_SRR0,r3 + mtspr SPRN_SRR1,r4 + RFI + b . /* prevent speculative execution */ + + /* This is where all platforms converge execution */ +_INIT_GLOBAL(start_here_common) + /* relocation is on at this point */ + std r1,PACAKSAVE(r13) + + /* Load the TOC (virtual address) */ + ld r2,PACATOC(r13) + + /* Do more system initializations in virtual mode */ + bl .setup_system + + /* Mark interrupts soft and hard disabled (they might be enabled + * in the PACA when doing hotplug) + */ + li r0,0 + stb r0,PACASOFTIRQEN(r13) + li r0,PACA_IRQ_HARD_DIS + stb r0,PACAIRQHAPPENED(r13) + + /* Generic kernel entry */ + bl .start_kernel + + /* Not reached */ + BUG_OPCODE + +/* + * We put a few things here that have to be page-aligned. + * This stuff goes at the beginning of the bss, which is page-aligned. + */ + .section ".bss" + + .align PAGE_SHIFT + + .globl empty_zero_page +empty_zero_page: + .space PAGE_SIZE + + .globl swapper_pg_dir +swapper_pg_dir: + .space PGD_TABLE_SIZE diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S new file mode 100644 index 00000000..b2a5860a --- /dev/null +++ b/arch/powerpc/kernel/head_8xx.S @@ -0,0 +1,979 @@ +/* + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP + * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu> + * Low-level exception handlers and MMU support + * rewritten by Paul Mackerras. + * Copyright (C) 1996 Paul Mackerras. + * MPC8xx modifications by Dan Malek + * Copyright (C) 1997 Dan Malek (dmalek@jlc.net). + * + * This file contains low-level support and setup for PowerPC 8xx + * embedded processors, including trap and interrupt dispatch. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/init.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/cache.h> +#include <asm/pgtable.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/ptrace.h> + +/* Macro to make the code more readable. */ +#ifdef CONFIG_8xx_CPU6 +#define DO_8xx_CPU6(val, reg) \ + li reg, val; \ + stw reg, 12(r0); \ + lwz reg, 12(r0); +#else +#define DO_8xx_CPU6(val, reg) +#endif + __HEAD +_ENTRY(_stext); +_ENTRY(_start); + +/* MPC8xx + * This port was done on an MBX board with an 860. Right now I only + * support an ELF compressed (zImage) boot from EPPC-Bug because the + * code there loads up some registers before calling us: + * r3: ptr to board info data + * r4: initrd_start or if no initrd then 0 + * r5: initrd_end - unused if r4 is 0 + * r6: Start of command line string + * r7: End of command line string + * + * I decided to use conditional compilation instead of checking PVR and + * adding more processor specific branches around code I don't need. + * Since this is an embedded processor, I also appreciate any memory + * savings I can get. + * + * The MPC8xx does not have any BATs, but it supports large page sizes. + * We first initialize the MMU to support 8M byte pages, then load one + * entry into each of the instruction and data TLBs to map the first + * 8M 1:1. I also mapped an additional I/O space 1:1 so we can get to + * the "internal" processor registers before MMU_init is called. + * + * The TLB code currently contains a major hack. Since I use the condition + * code register, I have to save and restore it. I am out of registers, so + * I just store it in memory location 0 (the TLB handlers are not reentrant). + * To avoid making any decisions, I need to use the "segment" valid bit + * in the first level table, but that would require many changes to the + * Linux page directory/table functions that I don't want to do right now. + * + * -- Dan + */ + .globl __start +__start: + mr r31,r3 /* save device tree ptr */ + + /* We have to turn on the MMU right away so we get cache modes + * set correctly. + */ + bl initial_mmu + +/* We now have the lower 8 Meg mapped into TLB entries, and the caches + * ready to work. + */ + +turn_on_mmu: + mfmsr r0 + ori r0,r0,MSR_DR|MSR_IR + mtspr SPRN_SRR1,r0 + lis r0,start_here@h + ori r0,r0,start_here@l + mtspr SPRN_SRR0,r0 + SYNC + rfi /* enables MMU */ + +/* + * Exception entry code. This code runs with address translation + * turned off, i.e. using physical addresses. + * We assume sprg3 has the physical address of the current + * task's thread_struct. + */ +#define EXCEPTION_PROLOG \ + mtspr SPRN_SPRG_SCRATCH0,r10; \ + mtspr SPRN_SPRG_SCRATCH1,r11; \ + mfcr r10; \ + EXCEPTION_PROLOG_1; \ + EXCEPTION_PROLOG_2 + +#define EXCEPTION_PROLOG_1 \ + mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \ + andi. r11,r11,MSR_PR; \ + tophys(r11,r1); /* use tophys(r1) if kernel */ \ + beq 1f; \ + mfspr r11,SPRN_SPRG_THREAD; \ + lwz r11,THREAD_INFO-THREAD(r11); \ + addi r11,r11,THREAD_SIZE; \ + tophys(r11,r11); \ +1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */ + + +#define EXCEPTION_PROLOG_2 \ + CLR_TOP32(r11); \ + stw r10,_CCR(r11); /* save registers */ \ + stw r12,GPR12(r11); \ + stw r9,GPR9(r11); \ + mfspr r10,SPRN_SPRG_SCRATCH0; \ + stw r10,GPR10(r11); \ + mfspr r12,SPRN_SPRG_SCRATCH1; \ + stw r12,GPR11(r11); \ + mflr r10; \ + stw r10,_LINK(r11); \ + mfspr r12,SPRN_SRR0; \ + mfspr r9,SPRN_SRR1; \ + stw r1,GPR1(r11); \ + stw r1,0(r11); \ + tovirt(r1,r11); /* set new kernel sp */ \ + li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \ + MTMSRD(r10); /* (except for mach check in rtas) */ \ + stw r0,GPR0(r11); \ + SAVE_4GPRS(3, r11); \ + SAVE_2GPRS(7, r11) + +/* + * Note: code which follows this uses cr0.eq (set if from kernel), + * r11, r12 (SRR0), and r9 (SRR1). + * + * Note2: once we have set r1 we are in a position to take exceptions + * again, and we could thus set MSR:RI at that point. + */ + +/* + * Exception vectors. + */ +#define EXCEPTION(n, label, hdlr, xfer) \ + . = n; \ +label: \ + EXCEPTION_PROLOG; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + xfer(n, hdlr) + +#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret) \ + li r10,trap; \ + stw r10,_TRAP(r11); \ + li r10,MSR_KERNEL; \ + copyee(r10, r9); \ + bl tfer; \ +i##n: \ + .long hdlr; \ + .long ret + +#define COPY_EE(d, s) rlwimi d,s,0,16,16 +#define NOCOPY(d, s) + +#define EXC_XFER_STD(n, hdlr) \ + EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \ + ret_from_except_full) + +#define EXC_XFER_LITE(n, hdlr) \ + EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \ + ret_from_except) + +#define EXC_XFER_EE(n, hdlr) \ + EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE, transfer_to_handler_full, \ + ret_from_except_full) + +#define EXC_XFER_EE_LITE(n, hdlr) \ + EXC_XFER_TEMPLATE(n, hdlr, n+1, COPY_EE, transfer_to_handler, \ + ret_from_except) + +/* System reset */ + EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD) + +/* Machine check */ + . = 0x200 +MachineCheck: + EXCEPTION_PROLOG + mfspr r4,SPRN_DAR + stw r4,_DAR(r11) + li r5,0x00f0 + mtspr SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */ + mfspr r5,SPRN_DSISR + stw r5,_DSISR(r11) + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_STD(0x200, machine_check_exception) + +/* Data access exception. + * This is "never generated" by the MPC8xx. We jump to it for other + * translation errors. + */ + . = 0x300 +DataAccess: + EXCEPTION_PROLOG + mfspr r10,SPRN_DSISR + stw r10,_DSISR(r11) + mr r5,r10 + mfspr r4,SPRN_DAR + li r10,0x00f0 + mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */ + EXC_XFER_LITE(0x300, handle_page_fault) + +/* Instruction access exception. + * This is "never generated" by the MPC8xx. We jump to it for other + * translation errors. + */ + . = 0x400 +InstructionAccess: + EXCEPTION_PROLOG + mr r4,r12 + mr r5,r9 + EXC_XFER_LITE(0x400, handle_page_fault) + +/* External interrupt */ + EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) + +/* Alignment exception */ + . = 0x600 +Alignment: + EXCEPTION_PROLOG + mfspr r4,SPRN_DAR + stw r4,_DAR(r11) + li r5,0x00f0 + mtspr SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */ + mfspr r5,SPRN_DSISR + stw r5,_DSISR(r11) + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_EE(0x600, alignment_exception) + +/* Program check exception */ + EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) + +/* No FPU on MPC8xx. This exception is not supposed to happen. +*/ + EXCEPTION(0x800, FPUnavailable, unknown_exception, EXC_XFER_STD) + +/* Decrementer */ + EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) + + EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE) + +/* System call */ + . = 0xc00 +SystemCall: + EXCEPTION_PROLOG + EXC_XFER_EE_LITE(0xc00, DoSyscall) + +/* Single step - not used on 601 */ + EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) + EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xf00, Trap_0f, unknown_exception, EXC_XFER_EE) + +/* On the MPC8xx, this is a software emulation interrupt. It occurs + * for all unimplemented and illegal instructions. + */ + EXCEPTION(0x1000, SoftEmu, SoftwareEmulation, EXC_XFER_STD) + + . = 0x1100 +/* + * For the MPC8xx, this is a software tablewalk to load the instruction + * TLB. It is modelled after the example in the Motorola manual. The task + * switch loads the M_TWB register with the pointer to the first level table. + * If we discover there is no second level table (value is zero) or if there + * is an invalid pte, we load that into the TLB, which causes another fault + * into the TLB Error interrupt where we can handle such problems. + * We have to use the MD_xxx registers for the tablewalk because the + * equivalent MI_xxx registers only perform the attribute functions. + */ +InstructionTLBMiss: +#ifdef CONFIG_8xx_CPU6 + stw r3, 8(r0) +#endif + DO_8xx_CPU6(0x3f80, r3) + mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ + mfcr r10 +#ifdef CONFIG_8xx_CPU6 + stw r10, 0(r0) + stw r11, 4(r0) +#else + mtspr SPRN_DAR, r10 + mtspr SPRN_SPRG2, r11 +#endif + mfspr r10, SPRN_SRR0 /* Get effective address of fault */ +#ifdef CONFIG_8xx_CPU15 + addi r11, r10, 0x1000 + tlbie r11 + addi r11, r10, -0x1000 + tlbie r11 +#endif + DO_8xx_CPU6(0x3780, r3) + mtspr SPRN_MD_EPN, r10 /* Have to use MD_EPN for walk, MI_EPN can't */ + mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ +#ifdef CONFIG_MODULES + /* Only modules will cause ITLB Misses as we always + * pin the first 8MB of kernel memory */ + andi. r11, r10, 0x0800 /* Address >= 0x80000000 */ + beq 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + rlwimi r10, r11, 0, 2, 19 +3: +#endif + lwz r11, 0(r10) /* Get the level 1 entry */ + rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ + beq 2f /* If zero, don't try to find a pte */ + + /* We have a pte table, so load the MI_TWC with the attributes + * for this "segment." + */ + ori r11,r11,1 /* Set valid bit */ + DO_8xx_CPU6(0x2b80, r3) + mtspr SPRN_MI_TWC, r11 /* Set segment attributes */ + DO_8xx_CPU6(0x3b80, r3) + mtspr SPRN_MD_TWC, r11 /* Load pte table base address */ + mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ + lwz r10, 0(r11) /* Get the pte */ + +#ifdef CONFIG_SWAP + andi. r11, r10, _PAGE_ACCESSED | _PAGE_PRESENT + cmpwi cr0, r11, _PAGE_ACCESSED | _PAGE_PRESENT + bne- cr0, 2f +#endif + /* The Linux PTE won't go exactly into the MMU TLB. + * Software indicator bits 21 and 28 must be clear. + * Software indicator bits 24, 25, 26, and 27 must be + * set. All other Linux PTE bits control the behavior + * of the MMU. + */ + li r11, 0x00f0 + rlwimi r10, r11, 0, 0x07f8 /* Set 24-27, clear 21-23,28 */ + DO_8xx_CPU6(0x2d80, r3) + mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ + + /* Restore registers */ +#ifndef CONFIG_8xx_CPU6 + mfspr r10, SPRN_DAR + mtcr r10 + mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r11, SPRN_SPRG2 +#else + lwz r11, 0(r0) + mtcr r11 + lwz r11, 4(r0) + lwz r3, 8(r0) +#endif + mfspr r10, SPRN_M_TW + rfi +2: + mfspr r11, SPRN_SRR1 + /* clear all error bits as TLB Miss + * sets a few unconditionally + */ + rlwinm r11, r11, 0, 0xffff + mtspr SPRN_SRR1, r11 + + /* Restore registers */ +#ifndef CONFIG_8xx_CPU6 + mfspr r10, SPRN_DAR + mtcr r10 + li r11, 0x00f0 + mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r11, SPRN_SPRG2 +#else + lwz r11, 0(r0) + mtcr r11 + lwz r11, 4(r0) + lwz r3, 8(r0) +#endif + mfspr r10, SPRN_M_TW + b InstructionAccess + + . = 0x1200 +DataStoreTLBMiss: +#ifdef CONFIG_8xx_CPU6 + stw r3, 8(r0) +#endif + DO_8xx_CPU6(0x3f80, r3) + mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ + mfcr r10 +#ifdef CONFIG_8xx_CPU6 + stw r10, 0(r0) + stw r11, 4(r0) +#else + mtspr SPRN_DAR, r10 + mtspr SPRN_SPRG2, r11 +#endif + mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + andi. r11, r10, 0x0800 + beq 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + rlwimi r10, r11, 0, 2, 19 +3: + lwz r11, 0(r10) /* Get the level 1 entry */ + rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ + beq 2f /* If zero, don't try to find a pte */ + + /* We have a pte table, so load fetch the pte from the table. + */ + ori r11, r11, 1 /* Set valid bit in physical L2 page */ + DO_8xx_CPU6(0x3b80, r3) + mtspr SPRN_MD_TWC, r11 /* Load pte table base address */ + mfspr r10, SPRN_MD_TWC /* ....and get the pte address */ + lwz r10, 0(r10) /* Get the pte */ + + /* Insert the Guarded flag into the TWC from the Linux PTE. + * It is bit 27 of both the Linux PTE and the TWC (at least + * I got that right :-). It will be better when we can put + * this into the Linux pgd/pmd and load it in the operation + * above. + */ + rlwimi r11, r10, 0, 27, 27 + /* Insert the WriteThru flag into the TWC from the Linux PTE. + * It is bit 25 in the Linux PTE and bit 30 in the TWC + */ + rlwimi r11, r10, 32-5, 30, 30 + DO_8xx_CPU6(0x3b80, r3) + mtspr SPRN_MD_TWC, r11 + + /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set. + * We also need to know if the insn is a load/store, so: + * Clear _PAGE_PRESENT and load that which will + * trap into DTLB Error with store bit set accordinly. + */ + /* PRESENT=0x1, ACCESSED=0x20 + * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5)); + * r10 = (r10 & ~PRESENT) | r11; + */ +#ifdef CONFIG_SWAP + rlwinm r11, r10, 32-5, _PAGE_PRESENT + and r11, r11, r10 + rlwimi r10, r11, 0, _PAGE_PRESENT +#endif + /* Honour kernel RO, User NA */ + /* 0x200 == Extended encoding, bit 22 */ + rlwimi r10, r10, 32-2, 0x200 /* Copy USER to bit 22, 0x200 */ + /* r11 = (r10 & _PAGE_RW) >> 1 */ + rlwinm r11, r10, 32-1, 0x200 + or r10, r11, r10 + /* invert RW and 0x200 bits */ + xori r10, r10, _PAGE_RW | 0x200 + + /* The Linux PTE won't go exactly into the MMU TLB. + * Software indicator bits 22 and 28 must be clear. + * Software indicator bits 24, 25, 26, and 27 must be + * set. All other Linux PTE bits control the behavior + * of the MMU. + */ +2: li r11, 0x00f0 + rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ + DO_8xx_CPU6(0x3d80, r3) + mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ + + /* Restore registers */ +#ifndef CONFIG_8xx_CPU6 + mfspr r10, SPRN_DAR + mtcr r10 + mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r11, SPRN_SPRG2 +#else + mtspr SPRN_DAR, r11 /* Tag DAR */ + lwz r11, 0(r0) + mtcr r11 + lwz r11, 4(r0) + lwz r3, 8(r0) +#endif + mfspr r10, SPRN_M_TW + rfi + +/* This is an instruction TLB error on the MPC8xx. This could be due + * to many reasons, such as executing guarded memory or illegal instruction + * addresses. There is nothing to do but handle a big time error fault. + */ + . = 0x1300 +InstructionTLBError: + b InstructionAccess + +/* This is the data TLB error on the MPC8xx. This could be due to + * many reasons, including a dirty update to a pte. We can catch that + * one here, but anything else is an error. First, we track down the + * Linux pte. If it is valid, write access is allowed, but the + * page dirty bit is not set, we will set it and reload the TLB. For + * any other case, we bail out to a higher level function that can + * handle it. + */ + . = 0x1400 +DataTLBError: +#ifdef CONFIG_8xx_CPU6 + stw r3, 8(r0) +#endif + DO_8xx_CPU6(0x3f80, r3) + mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ + mfcr r10 + stw r10, 0(r0) + stw r11, 4(r0) + + mfspr r10, SPRN_DAR + cmpwi cr0, r10, 0x00f0 + beq- FixupDAR /* must be a buggy dcbX, icbi insn. */ +DARFixed:/* Return from dcbx instruction bug workaround, r10 holds value of DAR */ + mfspr r10, SPRN_M_TW /* Restore registers */ + lwz r11, 0(r0) + mtcr r11 + lwz r11, 4(r0) +#ifdef CONFIG_8xx_CPU6 + lwz r3, 8(r0) +#endif + b DataAccess + + EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE) + +/* On the MPC8xx, these next four traps are used for development + * support of breakpoints and such. Someday I will get around to + * using them. + */ + EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE) + + . = 0x2000 + +/* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions + * by decoding the registers used by the dcbx instruction and adding them. + * DAR is set to the calculated address and r10 also holds the EA on exit. + */ + /* define if you don't want to use self modifying code */ +#define NO_SELF_MODIFYING_CODE +FixupDAR:/* Entry point for dcbx workaround. */ + /* fetch instruction from memory. */ + mfspr r10, SPRN_SRR0 + andis. r11, r10, 0x8000 /* Address >= 0x80000000 */ + DO_8xx_CPU6(0x3780, r3) + mtspr SPRN_MD_EPN, r10 + mfspr r11, SPRN_M_TWB /* Get level 1 table entry address */ + beq- 3f /* Branch if user space */ + lis r11, (swapper_pg_dir-PAGE_OFFSET)@h + ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l + rlwimi r11, r10, 32-20, 0xffc /* r11 = r11&~0xffc|(r10>>20)&0xffc */ +3: lwz r11, 0(r11) /* Get the level 1 entry */ + DO_8xx_CPU6(0x3b80, r3) + mtspr SPRN_MD_TWC, r11 /* Load pte table base address */ + mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ + lwz r11, 0(r11) /* Get the pte */ + /* concat physical page address(r11) and page offset(r10) */ + rlwimi r11, r10, 0, 20, 31 + lwz r11,0(r11) +/* Check if it really is a dcbx instruction. */ +/* dcbt and dcbtst does not generate DTLB Misses/Errors, + * no need to include them here */ + srwi r10, r11, 26 /* check if major OP code is 31 */ + cmpwi cr0, r10, 31 + bne- 141f + rlwinm r10, r11, 0, 21, 30 + cmpwi cr0, r10, 2028 /* Is dcbz? */ + beq+ 142f + cmpwi cr0, r10, 940 /* Is dcbi? */ + beq+ 142f + cmpwi cr0, r10, 108 /* Is dcbst? */ + beq+ 144f /* Fix up store bit! */ + cmpwi cr0, r10, 172 /* Is dcbf? */ + beq+ 142f + cmpwi cr0, r10, 1964 /* Is icbi? */ + beq+ 142f +141: mfspr r10, SPRN_DAR /* r10 must hold DAR at exit */ + b DARFixed /* Nope, go back to normal TLB processing */ + +144: mfspr r10, SPRN_DSISR + rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */ + mtspr SPRN_DSISR, r10 +142: /* continue, it was a dcbx, dcbi instruction. */ +#ifdef CONFIG_8xx_CPU6 + lwz r3, 8(r0) /* restore r3 from memory */ +#endif +#ifndef NO_SELF_MODIFYING_CODE + andis. r10,r11,0x1f /* test if reg RA is r0 */ + li r10,modified_instr@l + dcbtst r0,r10 /* touch for store */ + rlwinm r11,r11,0,0,20 /* Zero lower 10 bits */ + oris r11,r11,640 /* Transform instr. to a "add r10,RA,RB" */ + ori r11,r11,532 + stw r11,0(r10) /* store add/and instruction */ + dcbf 0,r10 /* flush new instr. to memory. */ + icbi 0,r10 /* invalidate instr. cache line */ + lwz r11, 4(r0) /* restore r11 from memory */ + mfspr r10, SPRN_M_TW /* restore r10 from M_TW */ + isync /* Wait until new instr is loaded from memory */ +modified_instr: + .space 4 /* this is where the add instr. is stored */ + bne+ 143f + subf r10,r0,r10 /* r10=r10-r0, only if reg RA is r0 */ +143: mtdar r10 /* store faulting EA in DAR */ + b DARFixed /* Go back to normal TLB handling */ +#else + mfctr r10 + mtdar r10 /* save ctr reg in DAR */ + rlwinm r10, r11, 24, 24, 28 /* offset into jump table for reg RB */ + addi r10, r10, 150f@l /* add start of table */ + mtctr r10 /* load ctr with jump address */ + xor r10, r10, r10 /* sum starts at zero */ + bctr /* jump into table */ +150: + add r10, r10, r0 ;b 151f + add r10, r10, r1 ;b 151f + add r10, r10, r2 ;b 151f + add r10, r10, r3 ;b 151f + add r10, r10, r4 ;b 151f + add r10, r10, r5 ;b 151f + add r10, r10, r6 ;b 151f + add r10, r10, r7 ;b 151f + add r10, r10, r8 ;b 151f + add r10, r10, r9 ;b 151f + mtctr r11 ;b 154f /* r10 needs special handling */ + mtctr r11 ;b 153f /* r11 needs special handling */ + add r10, r10, r12 ;b 151f + add r10, r10, r13 ;b 151f + add r10, r10, r14 ;b 151f + add r10, r10, r15 ;b 151f + add r10, r10, r16 ;b 151f + add r10, r10, r17 ;b 151f + add r10, r10, r18 ;b 151f + add r10, r10, r19 ;b 151f + add r10, r10, r20 ;b 151f + add r10, r10, r21 ;b 151f + add r10, r10, r22 ;b 151f + add r10, r10, r23 ;b 151f + add r10, r10, r24 ;b 151f + add r10, r10, r25 ;b 151f + add r10, r10, r26 ;b 151f + add r10, r10, r27 ;b 151f + add r10, r10, r28 ;b 151f + add r10, r10, r29 ;b 151f + add r10, r10, r30 ;b 151f + add r10, r10, r31 +151: + rlwinm. r11,r11,19,24,28 /* offset into jump table for reg RA */ + beq 152f /* if reg RA is zero, don't add it */ + addi r11, r11, 150b@l /* add start of table */ + mtctr r11 /* load ctr with jump address */ + rlwinm r11,r11,0,16,10 /* make sure we don't execute this more than once */ + bctr /* jump into table */ +152: + mfdar r11 + mtctr r11 /* restore ctr reg from DAR */ + mtdar r10 /* save fault EA to DAR */ + b DARFixed /* Go back to normal TLB handling */ + + /* special handling for r10,r11 since these are modified already */ +153: lwz r11, 4(r0) /* load r11 from memory */ + b 155f +154: mfspr r11, SPRN_M_TW /* load r10 from M_TW */ +155: add r10, r10, r11 /* add it */ + mfctr r11 /* restore r11 */ + b 151b +#endif + + .globl giveup_fpu +giveup_fpu: + blr + +/* + * This is where the main kernel code starts. + */ +start_here: + /* ptr to current */ + lis r2,init_task@h + ori r2,r2,init_task@l + + /* ptr to phys current thread */ + tophys(r4,r2) + addi r4,r4,THREAD /* init task's THREAD */ + mtspr SPRN_SPRG_THREAD,r4 + + /* stack */ + lis r1,init_thread_union@ha + addi r1,r1,init_thread_union@l + li r0,0 + stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + + bl early_init /* We have to do this with MMU on */ + +/* + * Decide what sort of machine this is and initialize the MMU. + */ + li r3,0 + mr r4,r31 + bl machine_init + bl MMU_init + +/* + * Go back to running unmapped so we can load up new values + * and change to using our exception vectors. + * On the 8xx, all we have to do is invalidate the TLB to clear + * the old 8M byte TLB mappings and load the page table base register. + */ + /* The right way to do this would be to track it down through + * init's THREAD like the context switch code does, but this is + * easier......until someone changes init's static structures. + */ + lis r6, swapper_pg_dir@h + ori r6, r6, swapper_pg_dir@l + tophys(r6,r6) +#ifdef CONFIG_8xx_CPU6 + lis r4, cpu6_errata_word@h + ori r4, r4, cpu6_errata_word@l + li r3, 0x3980 + stw r3, 12(r4) + lwz r3, 12(r4) +#endif + mtspr SPRN_M_TWB, r6 + lis r4,2f@h + ori r4,r4,2f@l + tophys(r4,r4) + li r3,MSR_KERNEL & ~(MSR_IR|MSR_DR) + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r3 + rfi +/* Load up the kernel context */ +2: + SYNC /* Force all PTE updates to finish */ + tlbia /* Clear all TLB entries */ + sync /* wait for tlbia/tlbie to finish */ + TLBSYNC /* ... on all CPUs */ + + /* set up the PTE pointers for the Abatron bdiGDB. + */ + tovirt(r6,r6) + lis r5, abatron_pteptrs@h + ori r5, r5, abatron_pteptrs@l + stw r5, 0xf0(r0) /* Must match your Abatron config file */ + tophys(r5,r5) + stw r6, 0(r5) + +/* Now turn on the MMU for real! */ + li r4,MSR_KERNEL + lis r3,start_kernel@h + ori r3,r3,start_kernel@l + mtspr SPRN_SRR0,r3 + mtspr SPRN_SRR1,r4 + rfi /* enable MMU and jump to start_kernel */ + +/* Set up the initial MMU state so we can do the first level of + * kernel initialization. This maps the first 8 MBytes of memory 1:1 + * virtual to physical. Also, set the cache mode since that is defined + * by TLB entries and perform any additional mapping (like of the IMMR). + * If configured to pin some TLBs, we pin the first 8 Mbytes of kernel, + * 24 Mbytes of data, and the 8M IMMR space. Anything not covered by + * these mappings is mapped by page tables. + */ +initial_mmu: + tlbia /* Invalidate all TLB entries */ +/* Always pin the first 8 MB ITLB to prevent ITLB + misses while mucking around with SRR0/SRR1 in asm +*/ + lis r8, MI_RSV4I@h + ori r8, r8, 0x1c00 + + mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */ + +#ifdef CONFIG_PIN_TLB + lis r10, (MD_RSV4I | MD_RESETVAL)@h + ori r10, r10, 0x1c00 + mr r8, r10 +#else + lis r10, MD_RESETVAL@h +#endif +#ifndef CONFIG_8xx_COPYBACK + oris r10, r10, MD_WTDEF@h +#endif + mtspr SPRN_MD_CTR, r10 /* Set data TLB control */ + + /* Now map the lower 8 Meg into the TLBs. For this quick hack, + * we can load the instruction and data TLB registers with the + * same values. + */ + lis r8, KERNELBASE@h /* Create vaddr for TLB */ + ori r8, r8, MI_EVALID /* Mark it valid */ + mtspr SPRN_MI_EPN, r8 + mtspr SPRN_MD_EPN, r8 + li r8, MI_PS8MEG /* Set 8M byte page */ + ori r8, r8, MI_SVALID /* Make it valid */ + mtspr SPRN_MI_TWC, r8 + mtspr SPRN_MD_TWC, r8 + li r8, MI_BOOTINIT /* Create RPN for address 0 */ + mtspr SPRN_MI_RPN, r8 /* Store TLB entry */ + mtspr SPRN_MD_RPN, r8 + lis r8, MI_Kp@h /* Set the protection mode */ + mtspr SPRN_MI_AP, r8 + mtspr SPRN_MD_AP, r8 + + /* Map another 8 MByte at the IMMR to get the processor + * internal registers (among other things). + */ +#ifdef CONFIG_PIN_TLB + addi r10, r10, 0x0100 + mtspr SPRN_MD_CTR, r10 +#endif + mfspr r9, 638 /* Get current IMMR */ + andis. r9, r9, 0xff80 /* Get 8Mbyte boundary */ + + mr r8, r9 /* Create vaddr for TLB */ + ori r8, r8, MD_EVALID /* Mark it valid */ + mtspr SPRN_MD_EPN, r8 + li r8, MD_PS8MEG /* Set 8M byte page */ + ori r8, r8, MD_SVALID /* Make it valid */ + mtspr SPRN_MD_TWC, r8 + mr r8, r9 /* Create paddr for TLB */ + ori r8, r8, MI_BOOTINIT|0x2 /* Inhibit cache -- Cort */ + mtspr SPRN_MD_RPN, r8 + +#ifdef CONFIG_PIN_TLB + /* Map two more 8M kernel data pages. + */ + addi r10, r10, 0x0100 + mtspr SPRN_MD_CTR, r10 + + lis r8, KERNELBASE@h /* Create vaddr for TLB */ + addis r8, r8, 0x0080 /* Add 8M */ + ori r8, r8, MI_EVALID /* Mark it valid */ + mtspr SPRN_MD_EPN, r8 + li r9, MI_PS8MEG /* Set 8M byte page */ + ori r9, r9, MI_SVALID /* Make it valid */ + mtspr SPRN_MD_TWC, r9 + li r11, MI_BOOTINIT /* Create RPN for address 0 */ + addis r11, r11, 0x0080 /* Add 8M */ + mtspr SPRN_MD_RPN, r11 + + addis r8, r8, 0x0080 /* Add 8M */ + mtspr SPRN_MD_EPN, r8 + mtspr SPRN_MD_TWC, r9 + addis r11, r11, 0x0080 /* Add 8M */ + mtspr SPRN_MD_RPN, r11 +#endif + + /* Since the cache is enabled according to the information we + * just loaded into the TLB, invalidate and enable the caches here. + * We should probably check/set other modes....later. + */ + lis r8, IDC_INVALL@h + mtspr SPRN_IC_CST, r8 + mtspr SPRN_DC_CST, r8 + lis r8, IDC_ENABLE@h + mtspr SPRN_IC_CST, r8 +#ifdef CONFIG_8xx_COPYBACK + mtspr SPRN_DC_CST, r8 +#else + /* For a debug option, I left this here to easily enable + * the write through cache mode + */ + lis r8, DC_SFWT@h + mtspr SPRN_DC_CST, r8 + lis r8, IDC_ENABLE@h + mtspr SPRN_DC_CST, r8 +#endif + blr + + +/* + * Set up to use a given MMU context. + * r3 is context number, r4 is PGD pointer. + * + * We place the physical address of the new task page directory loaded + * into the MMU base register, and set the ASID compare register with + * the new "context." + */ +_GLOBAL(set_context) + +#ifdef CONFIG_BDI_SWITCH + /* Context switch the PTE pointer for the Abatron BDI2000. + * The PGDIR is passed as second argument. + */ + lis r5, KERNELBASE@h + lwz r5, 0xf0(r5) + stw r4, 0x4(r5) +#endif + +#ifdef CONFIG_8xx_CPU6 + lis r6, cpu6_errata_word@h + ori r6, r6, cpu6_errata_word@l + tophys (r4, r4) + li r7, 0x3980 + stw r7, 12(r6) + lwz r7, 12(r6) + mtspr SPRN_M_TWB, r4 /* Update MMU base address */ + li r7, 0x3380 + stw r7, 12(r6) + lwz r7, 12(r6) + mtspr SPRN_M_CASID, r3 /* Update context */ +#else + mtspr SPRN_M_CASID,r3 /* Update context */ + tophys (r4, r4) + mtspr SPRN_M_TWB, r4 /* and pgd */ +#endif + SYNC + blr + +#ifdef CONFIG_8xx_CPU6 +/* It's here because it is unique to the 8xx. + * It is important we get called with interrupts disabled. I used to + * do that, but it appears that all code that calls this already had + * interrupt disabled. + */ + .globl set_dec_cpu6 +set_dec_cpu6: + lis r7, cpu6_errata_word@h + ori r7, r7, cpu6_errata_word@l + li r4, 0x2c00 + stw r4, 8(r7) + lwz r4, 8(r7) + mtspr 22, r3 /* Update Decrementer */ + SYNC + blr +#endif + +/* + * We put a few things here that have to be page-aligned. + * This stuff goes at the beginning of the data segment, + * which is page-aligned. + */ + .data + .globl sdata +sdata: + .globl empty_zero_page +empty_zero_page: + .space 4096 + + .globl swapper_pg_dir +swapper_pg_dir: + .space 4096 + +/* Room for two PTE table poiners, usually the kernel and current user + * pointer to their respective root page table (pgdir). + */ +abatron_pteptrs: + .space 8 + +#ifdef CONFIG_8xx_CPU6 + .globl cpu6_errata_word +cpu6_errata_word: + .space 16 +#endif + diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h new file mode 100644 index 00000000..0e417538 --- /dev/null +++ b/arch/powerpc/kernel/head_booke.h @@ -0,0 +1,427 @@ +#ifndef __HEAD_BOOKE_H__ +#define __HEAD_BOOKE_H__ + +#include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */ +/* + * Macros used for common Book-e exception handling + */ + +#define SET_IVOR(vector_number, vector_label) \ + li r26,vector_label@l; \ + mtspr SPRN_IVOR##vector_number,r26; \ + sync + +#if (THREAD_SHIFT < 15) +#define ALLOC_STACK_FRAME(reg, val) \ + addi reg,reg,val +#else +#define ALLOC_STACK_FRAME(reg, val) \ + addis reg,reg,val@ha; \ + addi reg,reg,val@l +#endif + +/* + * Macro used to get to thread save registers. + * Note that entries 0-3 are used for the prolog code, and the remaining + * entries are available for specific exception use in the event a handler + * requires more than 4 scratch registers. + */ +#define THREAD_NORMSAVE(offset) (THREAD_NORMSAVES + (offset * 4)) + +#define NORMAL_EXCEPTION_PROLOG \ + mtspr SPRN_SPRG_WSCRATCH0, r10; /* save one register */ \ + mfspr r10, SPRN_SPRG_THREAD; \ + stw r11, THREAD_NORMSAVE(0)(r10); \ + stw r13, THREAD_NORMSAVE(2)(r10); \ + mfcr r13; /* save CR in r13 for now */\ + mfspr r11,SPRN_SRR1; /* check whether user or kernel */\ + andi. r11,r11,MSR_PR; \ + mr r11, r1; \ + beq 1f; \ + /* if from user, start at top of this thread's kernel stack */ \ + lwz r11, THREAD_INFO-THREAD(r10); \ + ALLOC_STACK_FRAME(r11, THREAD_SIZE); \ +1 : subi r11, r11, INT_FRAME_SIZE; /* Allocate exception frame */ \ + stw r13, _CCR(r11); /* save various registers */ \ + stw r12,GPR12(r11); \ + stw r9,GPR9(r11); \ + mfspr r13, SPRN_SPRG_RSCRATCH0; \ + stw r13, GPR10(r11); \ + lwz r12, THREAD_NORMSAVE(0)(r10); \ + stw r12,GPR11(r11); \ + lwz r13, THREAD_NORMSAVE(2)(r10); /* restore r13 */ \ + mflr r10; \ + stw r10,_LINK(r11); \ + mfspr r12,SPRN_SRR0; \ + stw r1, GPR1(r11); \ + mfspr r9,SPRN_SRR1; \ + stw r1, 0(r11); \ + mr r1, r11; \ + rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ + stw r0,GPR0(r11); \ + lis r10, STACK_FRAME_REGS_MARKER@ha;/* exception frame marker */ \ + addi r10, r10, STACK_FRAME_REGS_MARKER@l; \ + stw r10, 8(r11); \ + SAVE_4GPRS(3, r11); \ + SAVE_2GPRS(7, r11) + +/* To handle the additional exception priority levels on 40x and Book-E + * processors we allocate a stack per additional priority level. + * + * On 40x critical is the only additional level + * On 44x/e500 we have critical and machine check + * On e200 we have critical and debug (machine check occurs via critical) + * + * Additionally we reserve a SPRG for each priority level so we can free up a + * GPR to use as the base for indirect access to the exception stacks. This + * is necessary since the MMU is always on, for Book-E parts, and the stacks + * are offset from KERNELBASE. + * + * There is some space optimization to be had here if desired. However + * to allow for a common kernel with support for debug exceptions either + * going to critical or their own debug level we aren't currently + * providing configurations that micro-optimize space usage. + */ + +#define MC_STACK_BASE mcheckirq_ctx +#define CRIT_STACK_BASE critirq_ctx + +/* only on e500mc/e200 */ +#define DBG_STACK_BASE dbgirq_ctx + +#define EXC_LVL_FRAME_OVERHEAD (THREAD_SIZE - INT_FRAME_SIZE - EXC_LVL_SIZE) + +#ifdef CONFIG_SMP +#define BOOKE_LOAD_EXC_LEVEL_STACK(level) \ + mfspr r8,SPRN_PIR; \ + slwi r8,r8,2; \ + addis r8,r8,level##_STACK_BASE@ha; \ + lwz r8,level##_STACK_BASE@l(r8); \ + addi r8,r8,EXC_LVL_FRAME_OVERHEAD; +#else +#define BOOKE_LOAD_EXC_LEVEL_STACK(level) \ + lis r8,level##_STACK_BASE@ha; \ + lwz r8,level##_STACK_BASE@l(r8); \ + addi r8,r8,EXC_LVL_FRAME_OVERHEAD; +#endif + +/* + * Exception prolog for critical/machine check exceptions. This is a + * little different from the normal exception prolog above since a + * critical/machine check exception can potentially occur at any point + * during normal exception processing. Thus we cannot use the same SPRG + * registers as the normal prolog above. Instead we use a portion of the + * critical/machine check exception stack at low physical addresses. + */ +#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, exc_level_srr0, exc_level_srr1) \ + mtspr SPRN_SPRG_WSCRATCH_##exc_level,r8; \ + BOOKE_LOAD_EXC_LEVEL_STACK(exc_level);/* r8 points to the exc_level stack*/ \ + stw r9,GPR9(r8); /* save various registers */\ + mfcr r9; /* save CR in r9 for now */\ + stw r10,GPR10(r8); \ + stw r11,GPR11(r8); \ + stw r9,_CCR(r8); /* save CR on stack */\ + mfspr r10,exc_level_srr1; /* check whether user or kernel */\ + andi. r10,r10,MSR_PR; \ + mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\ + lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\ + addi r11,r11,EXC_LVL_FRAME_OVERHEAD; /* allocate stack frame */\ + beq 1f; \ + /* COMING FROM USER MODE */ \ + stw r9,_CCR(r11); /* save CR */\ + lwz r10,GPR10(r8); /* copy regs from exception stack */\ + lwz r9,GPR9(r8); \ + stw r10,GPR10(r11); \ + lwz r10,GPR11(r8); \ + stw r9,GPR9(r11); \ + stw r10,GPR11(r11); \ + b 2f; \ + /* COMING FROM PRIV MODE */ \ +1: lwz r9,TI_FLAGS-EXC_LVL_FRAME_OVERHEAD(r11); \ + lwz r10,TI_PREEMPT-EXC_LVL_FRAME_OVERHEAD(r11); \ + stw r9,TI_FLAGS-EXC_LVL_FRAME_OVERHEAD(r8); \ + stw r10,TI_PREEMPT-EXC_LVL_FRAME_OVERHEAD(r8); \ + lwz r9,TI_TASK-EXC_LVL_FRAME_OVERHEAD(r11); \ + stw r9,TI_TASK-EXC_LVL_FRAME_OVERHEAD(r8); \ + mr r11,r8; \ +2: mfspr r8,SPRN_SPRG_RSCRATCH_##exc_level; \ + stw r12,GPR12(r11); /* save various registers */\ + mflr r10; \ + stw r10,_LINK(r11); \ + mfspr r12,SPRN_DEAR; /* save DEAR and ESR in the frame */\ + stw r12,_DEAR(r11); /* since they may have had stuff */\ + mfspr r9,SPRN_ESR; /* in them at the point where the */\ + stw r9,_ESR(r11); /* exception was taken */\ + mfspr r12,exc_level_srr0; \ + stw r1,GPR1(r11); \ + mfspr r9,exc_level_srr1; \ + stw r1,0(r11); \ + mr r1,r11; \ + rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ + stw r0,GPR0(r11); \ + SAVE_4GPRS(3, r11); \ + SAVE_2GPRS(7, r11) + +#define CRITICAL_EXCEPTION_PROLOG \ + EXC_LEVEL_EXCEPTION_PROLOG(CRIT, SPRN_CSRR0, SPRN_CSRR1) +#define DEBUG_EXCEPTION_PROLOG \ + EXC_LEVEL_EXCEPTION_PROLOG(DBG, SPRN_DSRR0, SPRN_DSRR1) +#define MCHECK_EXCEPTION_PROLOG \ + EXC_LEVEL_EXCEPTION_PROLOG(MC, SPRN_MCSRR0, SPRN_MCSRR1) + +/* + * Exception vectors. + */ +#define START_EXCEPTION(label) \ + .align 5; \ +label: + +#define FINISH_EXCEPTION(func) \ + bl transfer_to_handler_full; \ + .long func; \ + .long ret_from_except_full + +#define EXCEPTION(n, label, hdlr, xfer) \ + START_EXCEPTION(label); \ + NORMAL_EXCEPTION_PROLOG; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + xfer(n, hdlr) + +#define CRITICAL_EXCEPTION(n, label, hdlr) \ + START_EXCEPTION(label); \ + CRITICAL_EXCEPTION_PROLOG; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ + NOCOPY, crit_transfer_to_handler, \ + ret_from_crit_exc) + +#define MCHECK_EXCEPTION(n, label, hdlr) \ + START_EXCEPTION(label); \ + MCHECK_EXCEPTION_PROLOG; \ + mfspr r5,SPRN_ESR; \ + stw r5,_ESR(r11); \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + EXC_XFER_TEMPLATE(hdlr, n+4, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ + NOCOPY, mcheck_transfer_to_handler, \ + ret_from_mcheck_exc) + +#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret) \ + li r10,trap; \ + stw r10,_TRAP(r11); \ + lis r10,msr@h; \ + ori r10,r10,msr@l; \ + copyee(r10, r9); \ + bl tfer; \ + .long hdlr; \ + .long ret + +#define COPY_EE(d, s) rlwimi d,s,0,16,16 +#define NOCOPY(d, s) + +#define EXC_XFER_STD(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \ + ret_from_except_full) + +#define EXC_XFER_LITE(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \ + ret_from_except) + +#define EXC_XFER_EE(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \ + ret_from_except_full) + +#define EXC_XFER_EE_LITE(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \ + ret_from_except) + +/* Check for a single step debug exception while in an exception + * handler before state has been saved. This is to catch the case + * where an instruction that we are trying to single step causes + * an exception (eg ITLB/DTLB miss) and thus the first instruction of + * the exception handler generates a single step debug exception. + * + * If we get a debug trap on the first instruction of an exception handler, + * we reset the MSR_DE in the _exception handler's_ MSR (the debug trap is + * a critical exception, so we are using SPRN_CSRR1 to manipulate the MSR). + * The exception handler was handling a non-critical interrupt, so it will + * save (and later restore) the MSR via SPRN_CSRR1, which will still have + * the MSR_DE bit set. + */ +#define DEBUG_DEBUG_EXCEPTION \ + START_EXCEPTION(DebugDebug); \ + DEBUG_EXCEPTION_PROLOG; \ + \ + /* \ + * If there is a single step or branch-taken exception in an \ + * exception entry sequence, it was probably meant to apply to \ + * the code where the exception occurred (since exception entry \ + * doesn't turn off DE automatically). We simulate the effect \ + * of turning off DE on entry to an exception handler by turning \ + * off DE in the DSRR1 value and clearing the debug status. \ + */ \ + mfspr r10,SPRN_DBSR; /* check single-step/branch taken */ \ + andis. r10,r10,(DBSR_IC|DBSR_BT)@h; \ + beq+ 2f; \ + \ + lis r10,KERNELBASE@h; /* check if exception in vectors */ \ + ori r10,r10,KERNELBASE@l; \ + cmplw r12,r10; \ + blt+ 2f; /* addr below exception vectors */ \ + \ + lis r10,DebugDebug@h; \ + ori r10,r10,DebugDebug@l; \ + cmplw r12,r10; \ + bgt+ 2f; /* addr above exception vectors */ \ + \ + /* here it looks like we got an inappropriate debug exception. */ \ +1: rlwinm r9,r9,0,~MSR_DE; /* clear DE in the CDRR1 value */ \ + lis r10,(DBSR_IC|DBSR_BT)@h; /* clear the IC event */ \ + mtspr SPRN_DBSR,r10; \ + /* restore state and get out */ \ + lwz r10,_CCR(r11); \ + lwz r0,GPR0(r11); \ + lwz r1,GPR1(r11); \ + mtcrf 0x80,r10; \ + mtspr SPRN_DSRR0,r12; \ + mtspr SPRN_DSRR1,r9; \ + lwz r9,GPR9(r11); \ + lwz r12,GPR12(r11); \ + mtspr SPRN_SPRG_WSCRATCH_DBG,r8; \ + BOOKE_LOAD_EXC_LEVEL_STACK(DBG); /* r8 points to the debug stack */ \ + lwz r10,GPR10(r8); \ + lwz r11,GPR11(r8); \ + mfspr r8,SPRN_SPRG_RSCRATCH_DBG; \ + \ + PPC_RFDI; \ + b .; \ + \ + /* continue normal handling for a debug exception... */ \ +2: mfspr r4,SPRN_DBSR; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, debug_transfer_to_handler, ret_from_debug_exc) + +#define DEBUG_CRIT_EXCEPTION \ + START_EXCEPTION(DebugCrit); \ + CRITICAL_EXCEPTION_PROLOG; \ + \ + /* \ + * If there is a single step or branch-taken exception in an \ + * exception entry sequence, it was probably meant to apply to \ + * the code where the exception occurred (since exception entry \ + * doesn't turn off DE automatically). We simulate the effect \ + * of turning off DE on entry to an exception handler by turning \ + * off DE in the CSRR1 value and clearing the debug status. \ + */ \ + mfspr r10,SPRN_DBSR; /* check single-step/branch taken */ \ + andis. r10,r10,(DBSR_IC|DBSR_BT)@h; \ + beq+ 2f; \ + \ + lis r10,KERNELBASE@h; /* check if exception in vectors */ \ + ori r10,r10,KERNELBASE@l; \ + cmplw r12,r10; \ + blt+ 2f; /* addr below exception vectors */ \ + \ + lis r10,DebugCrit@h; \ + ori r10,r10,DebugCrit@l; \ + cmplw r12,r10; \ + bgt+ 2f; /* addr above exception vectors */ \ + \ + /* here it looks like we got an inappropriate debug exception. */ \ +1: rlwinm r9,r9,0,~MSR_DE; /* clear DE in the CSRR1 value */ \ + lis r10,(DBSR_IC|DBSR_BT)@h; /* clear the IC event */ \ + mtspr SPRN_DBSR,r10; \ + /* restore state and get out */ \ + lwz r10,_CCR(r11); \ + lwz r0,GPR0(r11); \ + lwz r1,GPR1(r11); \ + mtcrf 0x80,r10; \ + mtspr SPRN_CSRR0,r12; \ + mtspr SPRN_CSRR1,r9; \ + lwz r9,GPR9(r11); \ + lwz r12,GPR12(r11); \ + mtspr SPRN_SPRG_WSCRATCH_CRIT,r8; \ + BOOKE_LOAD_EXC_LEVEL_STACK(CRIT); /* r8 points to the debug stack */ \ + lwz r10,GPR10(r8); \ + lwz r11,GPR11(r8); \ + mfspr r8,SPRN_SPRG_RSCRATCH_CRIT; \ + \ + rfci; \ + b .; \ + \ + /* continue normal handling for a critical exception... */ \ +2: mfspr r4,SPRN_DBSR; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, crit_transfer_to_handler, ret_from_crit_exc) + +#define DATA_STORAGE_EXCEPTION \ + START_EXCEPTION(DataStorage) \ + NORMAL_EXCEPTION_PROLOG; \ + mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ + stw r5,_ESR(r11); \ + mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \ + EXC_XFER_LITE(0x0300, handle_page_fault) + +#define INSTRUCTION_STORAGE_EXCEPTION \ + START_EXCEPTION(InstructionStorage) \ + NORMAL_EXCEPTION_PROLOG; \ + mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ + stw r5,_ESR(r11); \ + mr r4,r12; /* Pass SRR0 as arg2 */ \ + li r5,0; /* Pass zero as arg3 */ \ + EXC_XFER_LITE(0x0400, handle_page_fault) + +#define ALIGNMENT_EXCEPTION \ + START_EXCEPTION(Alignment) \ + NORMAL_EXCEPTION_PROLOG; \ + mfspr r4,SPRN_DEAR; /* Grab the DEAR and save it */ \ + stw r4,_DEAR(r11); \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + EXC_XFER_EE(0x0600, alignment_exception) + +#define PROGRAM_EXCEPTION \ + START_EXCEPTION(Program) \ + NORMAL_EXCEPTION_PROLOG; \ + mfspr r4,SPRN_ESR; /* Grab the ESR and save it */ \ + stw r4,_ESR(r11); \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + EXC_XFER_STD(0x0700, program_check_exception) + +#define DECREMENTER_EXCEPTION \ + START_EXCEPTION(Decrementer) \ + NORMAL_EXCEPTION_PROLOG; \ + lis r0,TSR_DIS@h; /* Setup the DEC interrupt mask */ \ + mtspr SPRN_TSR,r0; /* Clear the DEC interrupt */ \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + EXC_XFER_LITE(0x0900, timer_interrupt) + +#define FP_UNAVAILABLE_EXCEPTION \ + START_EXCEPTION(FloatingPointUnavailable) \ + NORMAL_EXCEPTION_PROLOG; \ + beq 1f; \ + bl load_up_fpu; /* if from user, just load it up */ \ + b fast_exception_return; \ +1: addi r3,r1,STACK_FRAME_OVERHEAD; \ + EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception) + +#ifndef __ASSEMBLY__ +struct exception_regs { + unsigned long mas0; + unsigned long mas1; + unsigned long mas2; + unsigned long mas3; + unsigned long mas6; + unsigned long mas7; + unsigned long srr0; + unsigned long srr1; + unsigned long csrr0; + unsigned long csrr1; + unsigned long dsrr0; + unsigned long dsrr1; + unsigned long saved_ksp_limit; +}; + +/* ensure this structure is always sized to a multiple of the stack alignment */ +#define STACK_EXC_LVL_FRAME_SIZE _ALIGN_UP(sizeof (struct exception_regs), 16) + +#endif /* __ASSEMBLY__ */ +#endif /* __HEAD_BOOKE_H__ */ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S new file mode 100644 index 00000000..28e62598 --- /dev/null +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -0,0 +1,1084 @@ +/* + * Kernel execution entry point code. + * + * Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org> + * Initial PowerPC version. + * Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu> + * Rewritten for PReP + * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au> + * Low-level exception handers, MMU support, and rewrite. + * Copyright (c) 1997 Dan Malek <dmalek@jlc.net> + * PowerPC 8xx modifications. + * Copyright (c) 1998-1999 TiVo, Inc. + * PowerPC 403GCX modifications. + * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu> + * PowerPC 403GCX/405GP modifications. + * Copyright 2000 MontaVista Software Inc. + * PPC405 modifications + * PowerPC 403GCX/405GP modifications. + * Author: MontaVista Software, Inc. + * frank_rowand@mvista.com or source@mvista.com + * debbie_chu@mvista.com + * Copyright 2002-2004 MontaVista Software, Inc. + * PowerPC 44x support, Matt Porter <mporter@kernel.crashing.org> + * Copyright 2004 Freescale Semiconductor, Inc + * PowerPC e500 modifications, Kumar Gala <galak@kernel.crashing.org> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include <linux/init.h> +#include <linux/threads.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/cache.h> +#include <asm/ptrace.h> +#include "head_booke.h" + +/* As with the other PowerPC ports, it is expected that when code + * execution begins here, the following registers contain valid, yet + * optional, information: + * + * r3 - Board info structure pointer (DRAM, frequency, MAC address, etc.) + * r4 - Starting address of the init RAM disk + * r5 - Ending address of the init RAM disk + * r6 - Start of kernel command line string (e.g. "mem=128") + * r7 - End of kernel command line string + * + */ + __HEAD +_ENTRY(_stext); +_ENTRY(_start); + /* + * Reserve a word at a fixed location to store the address + * of abatron_pteptrs + */ + nop + + /* Translate device tree address to physical, save in r30/r31 */ + mfmsr r16 + mfspr r17,SPRN_PID + rlwinm r17,r17,16,0x3fff0000 /* turn PID into MAS6[SPID] */ + rlwimi r17,r16,28,0x00000001 /* turn MSR[DS] into MAS6[SAS] */ + mtspr SPRN_MAS6,r17 + + tlbsx 0,r3 /* must succeed */ + + mfspr r16,SPRN_MAS1 + mfspr r20,SPRN_MAS3 + rlwinm r17,r16,25,0x1f /* r17 = log2(page size) */ + li r18,1024 + slw r18,r18,r17 /* r18 = page size */ + addi r18,r18,-1 + and r19,r3,r18 /* r19 = page offset */ + andc r31,r20,r18 /* r31 = page base */ + or r31,r31,r19 /* r31 = devtree phys addr */ + mfspr r30,SPRN_MAS7 + + li r25,0 /* phys kernel start (low) */ + li r24,0 /* CPU number */ + li r23,0 /* phys kernel start (high) */ + +/* We try to not make any assumptions about how the boot loader + * setup or used the TLBs. We invalidate all mappings from the + * boot loader and load a single entry in TLB1[0] to map the + * first 64M of kernel memory. Any boot info passed from the + * bootloader needs to live in this first 64M. + * + * Requirement on bootloader: + * - The page we're executing in needs to reside in TLB1 and + * have IPROT=1. If not an invalidate broadcast could + * evict the entry we're currently executing in. + * + * r3 = Index of TLB1 were executing in + * r4 = Current MSR[IS] + * r5 = Index of TLB1 temp mapping + * + * Later in mapin_ram we will correctly map lowmem, and resize TLB1[0] + * if needed + */ + +_ENTRY(__early_start) + +#define ENTRY_MAPPING_BOOT_SETUP +#include "fsl_booke_entry_mapping.S" +#undef ENTRY_MAPPING_BOOT_SETUP + + /* Establish the interrupt vector offsets */ + SET_IVOR(0, CriticalInput); + SET_IVOR(1, MachineCheck); + SET_IVOR(2, DataStorage); + SET_IVOR(3, InstructionStorage); + SET_IVOR(4, ExternalInput); + SET_IVOR(5, Alignment); + SET_IVOR(6, Program); + SET_IVOR(7, FloatingPointUnavailable); + SET_IVOR(8, SystemCall); + SET_IVOR(9, AuxillaryProcessorUnavailable); + SET_IVOR(10, Decrementer); + SET_IVOR(11, FixedIntervalTimer); + SET_IVOR(12, WatchdogTimer); + SET_IVOR(13, DataTLBError); + SET_IVOR(14, InstructionTLBError); + SET_IVOR(15, DebugCrit); + + /* Establish the interrupt vector base */ + lis r4,interrupt_base@h /* IVPR only uses the high 16-bits */ + mtspr SPRN_IVPR,r4 + + /* Setup the defaults for TLB entries */ + li r2,(MAS4_TSIZED(BOOK3E_PAGESZ_4K))@l +#ifdef CONFIG_E200 + oris r2,r2,MAS4_TLBSELD(1)@h +#endif + mtspr SPRN_MAS4, r2 + +#if 0 + /* Enable DOZE */ + mfspr r2,SPRN_HID0 + oris r2,r2,HID0_DOZE@h + mtspr SPRN_HID0, r2 +#endif + +#if !defined(CONFIG_BDI_SWITCH) + /* + * The Abatron BDI JTAG debugger does not tolerate others + * mucking with the debug registers. + */ + lis r2,DBCR0_IDM@h + mtspr SPRN_DBCR0,r2 + isync + /* clear any residual debug events */ + li r2,-1 + mtspr SPRN_DBSR,r2 +#endif + +#ifdef CONFIG_SMP + /* Check to see if we're the second processor, and jump + * to the secondary_start code if so + */ + lis r24, boot_cpuid@h + ori r24, r24, boot_cpuid@l + lwz r24, 0(r24) + cmpwi r24, -1 + mfspr r24,SPRN_PIR + bne __secondary_start +#endif + + /* + * This is where the main kernel code starts. + */ + + /* ptr to current */ + lis r2,init_task@h + ori r2,r2,init_task@l + + /* ptr to current thread */ + addi r4,r2,THREAD /* init task's THREAD */ + mtspr SPRN_SPRG_THREAD,r4 + + /* stack */ + lis r1,init_thread_union@h + ori r1,r1,init_thread_union@l + li r0,0 + stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + + rlwinm r22,r1,0,0,31-THREAD_SHIFT /* current thread_info */ + stw r24, TI_CPU(r22) + + bl early_init + +#ifdef CONFIG_DYNAMIC_MEMSTART + lis r3,kernstart_addr@ha + la r3,kernstart_addr@l(r3) +#ifdef CONFIG_PHYS_64BIT + stw r23,0(r3) + stw r25,4(r3) +#else + stw r25,0(r3) +#endif +#endif + +/* + * Decide what sort of machine this is and initialize the MMU. + */ + mr r3,r30 + mr r4,r31 + bl machine_init + bl MMU_init + + /* Setup PTE pointers for the Abatron bdiGDB */ + lis r6, swapper_pg_dir@h + ori r6, r6, swapper_pg_dir@l + lis r5, abatron_pteptrs@h + ori r5, r5, abatron_pteptrs@l + lis r4, KERNELBASE@h + ori r4, r4, KERNELBASE@l + stw r5, 0(r4) /* Save abatron_pteptrs at a fixed location */ + stw r6, 0(r5) + + /* Let's move on */ + lis r4,start_kernel@h + ori r4,r4,start_kernel@l + lis r3,MSR_KERNEL@h + ori r3,r3,MSR_KERNEL@l + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r3 + rfi /* change context and jump to start_kernel */ + +/* Macros to hide the PTE size differences + * + * FIND_PTE -- walks the page tables given EA & pgdir pointer + * r10 -- EA of fault + * r11 -- PGDIR pointer + * r12 -- free + * label 2: is the bailout case + * + * if we find the pte (fall through): + * r11 is low pte word + * r12 is pointer to the pte + * r10 is the pshift from the PGD, if we're a hugepage + */ +#ifdef CONFIG_PTE_64BIT +#ifdef CONFIG_HUGETLB_PAGE +#define FIND_PTE \ + rlwinm r12, r10, 13, 19, 29; /* Compute pgdir/pmd offset */ \ + lwzx r11, r12, r11; /* Get pgd/pmd entry */ \ + rlwinm. r12, r11, 0, 0, 20; /* Extract pt base address */ \ + blt 1000f; /* Normal non-huge page */ \ + beq 2f; /* Bail if no table */ \ + oris r11, r11, PD_HUGE@h; /* Put back address bit */ \ + andi. r10, r11, HUGEPD_SHIFT_MASK@l; /* extract size field */ \ + xor r12, r10, r11; /* drop size bits from pointer */ \ + b 1001f; \ +1000: rlwimi r12, r10, 23, 20, 28; /* Compute pte address */ \ + li r10, 0; /* clear r10 */ \ +1001: lwz r11, 4(r12); /* Get pte entry */ +#else +#define FIND_PTE \ + rlwinm r12, r10, 13, 19, 29; /* Compute pgdir/pmd offset */ \ + lwzx r11, r12, r11; /* Get pgd/pmd entry */ \ + rlwinm. r12, r11, 0, 0, 20; /* Extract pt base address */ \ + beq 2f; /* Bail if no table */ \ + rlwimi r12, r10, 23, 20, 28; /* Compute pte address */ \ + lwz r11, 4(r12); /* Get pte entry */ +#endif /* HUGEPAGE */ +#else /* !PTE_64BIT */ +#define FIND_PTE \ + rlwimi r11, r10, 12, 20, 29; /* Create L1 (pgdir/pmd) address */ \ + lwz r11, 0(r11); /* Get L1 entry */ \ + rlwinm. r12, r11, 0, 0, 19; /* Extract L2 (pte) base address */ \ + beq 2f; /* Bail if no table */ \ + rlwimi r12, r10, 22, 20, 29; /* Compute PTE address */ \ + lwz r11, 0(r12); /* Get Linux PTE */ +#endif + +/* + * Interrupt vector entry code + * + * The Book E MMUs are always on so we don't need to handle + * interrupts in real mode as with previous PPC processors. In + * this case we handle interrupts in the kernel virtual address + * space. + * + * Interrupt vectors are dynamically placed relative to the + * interrupt prefix as determined by the address of interrupt_base. + * The interrupt vectors offsets are programmed using the labels + * for each interrupt vector entry. + * + * Interrupt vectors must be aligned on a 16 byte boundary. + * We align on a 32 byte cache line boundary for good measure. + */ + +interrupt_base: + /* Critical Input Interrupt */ + CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception) + + /* Machine Check Interrupt */ +#ifdef CONFIG_E200 + /* no RFMCI, MCSRRs on E200 */ + CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception) +#else + MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception) +#endif + + /* Data Storage Interrupt */ + START_EXCEPTION(DataStorage) + NORMAL_EXCEPTION_PROLOG + mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ + stw r5,_ESR(r11) + mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ + andis. r10,r5,(ESR_ILK|ESR_DLK)@h + bne 1f + EXC_XFER_LITE(0x0300, handle_page_fault) +1: + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_EE_LITE(0x0300, CacheLockingException) + + /* Instruction Storage Interrupt */ + INSTRUCTION_STORAGE_EXCEPTION + + /* External Input Interrupt */ + EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE) + + /* Alignment Interrupt */ + ALIGNMENT_EXCEPTION + + /* Program Interrupt */ + PROGRAM_EXCEPTION + + /* Floating Point Unavailable Interrupt */ +#ifdef CONFIG_PPC_FPU + FP_UNAVAILABLE_EXCEPTION +#else +#ifdef CONFIG_E200 + /* E200 treats 'normal' floating point instructions as FP Unavail exception */ + EXCEPTION(0x0800, FloatingPointUnavailable, program_check_exception, EXC_XFER_EE) +#else + EXCEPTION(0x0800, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE) +#endif +#endif + + /* System Call Interrupt */ + START_EXCEPTION(SystemCall) + NORMAL_EXCEPTION_PROLOG + EXC_XFER_EE_LITE(0x0c00, DoSyscall) + + /* Auxiliary Processor Unavailable Interrupt */ + EXCEPTION(0x2900, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE) + + /* Decrementer Interrupt */ + DECREMENTER_EXCEPTION + + /* Fixed Internal Timer Interrupt */ + /* TODO: Add FIT support */ + EXCEPTION(0x3100, FixedIntervalTimer, unknown_exception, EXC_XFER_EE) + + /* Watchdog Timer Interrupt */ +#ifdef CONFIG_BOOKE_WDT + CRITICAL_EXCEPTION(0x3200, WatchdogTimer, WatchdogException) +#else + CRITICAL_EXCEPTION(0x3200, WatchdogTimer, unknown_exception) +#endif + + /* Data TLB Error Interrupt */ + START_EXCEPTION(DataTLBError) + mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */ + mfspr r10, SPRN_SPRG_THREAD + stw r11, THREAD_NORMSAVE(0)(r10) + stw r12, THREAD_NORMSAVE(1)(r10) + stw r13, THREAD_NORMSAVE(2)(r10) + mfcr r13 + stw r13, THREAD_NORMSAVE(3)(r10) + mfspr r10, SPRN_DEAR /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11, PAGE_OFFSET@h + cmplw 5, r10, r11 + blt 5, 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + + mfspr r12,SPRN_MAS1 /* Set TID to 0 */ + rlwinm r12,r12,0,16,1 + mtspr SPRN_MAS1,r12 + + b 4f + + /* Get the PGD for the current thread */ +3: + mfspr r11,SPRN_SPRG_THREAD + lwz r11,PGDIR(r11) + +4: + /* Mask of required permission bits. Note that while we + * do copy ESR:ST to _PAGE_RW position as trying to write + * to an RO page is pretty common, we don't do it with + * _PAGE_DIRTY. We could do it, but it's a fairly rare + * event so I'd rather take the overhead when it happens + * rather than adding an instruction here. We should measure + * whether the whole thing is worth it in the first place + * as we could avoid loading SPRN_ESR completely in the first + * place... + * + * TODO: Is it worth doing that mfspr & rlwimi in the first + * place or can we save a couple of instructions here ? + */ + mfspr r12,SPRN_ESR +#ifdef CONFIG_PTE_64BIT + li r13,_PAGE_PRESENT + oris r13,r13,_PAGE_ACCESSED@h +#else + li r13,_PAGE_PRESENT|_PAGE_ACCESSED +#endif + rlwimi r13,r12,11,29,29 + + FIND_PTE + andc. r13,r13,r11 /* Check permission */ + +#ifdef CONFIG_PTE_64BIT +#ifdef CONFIG_SMP + subf r13,r11,r12 /* create false data dep */ + lwzx r13,r11,r13 /* Get upper pte bits */ +#else + lwz r13,0(r12) /* Get upper pte bits */ +#endif +#endif + + bne 2f /* Bail if permission/valid mismach */ + + /* Jump to common tlb load */ + b finish_tlb_load +2: + /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ + mfspr r10, SPRN_SPRG_THREAD + lwz r11, THREAD_NORMSAVE(3)(r10) + mtcr r11 + lwz r13, THREAD_NORMSAVE(2)(r10) + lwz r12, THREAD_NORMSAVE(1)(r10) + lwz r11, THREAD_NORMSAVE(0)(r10) + mfspr r10, SPRN_SPRG_RSCRATCH0 + b DataStorage + + /* Instruction TLB Error Interrupt */ + /* + * Nearly the same as above, except we get our + * information from different registers and bailout + * to a different point. + */ + START_EXCEPTION(InstructionTLBError) + mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */ + mfspr r10, SPRN_SPRG_THREAD + stw r11, THREAD_NORMSAVE(0)(r10) + stw r12, THREAD_NORMSAVE(1)(r10) + stw r13, THREAD_NORMSAVE(2)(r10) + mfcr r13 + stw r13, THREAD_NORMSAVE(3)(r10) + mfspr r10, SPRN_SRR0 /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11, PAGE_OFFSET@h + cmplw 5, r10, r11 + blt 5, 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + + mfspr r12,SPRN_MAS1 /* Set TID to 0 */ + rlwinm r12,r12,0,16,1 + mtspr SPRN_MAS1,r12 + + /* Make up the required permissions for kernel code */ +#ifdef CONFIG_PTE_64BIT + li r13,_PAGE_PRESENT | _PAGE_BAP_SX + oris r13,r13,_PAGE_ACCESSED@h +#else + li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC +#endif + b 4f + + /* Get the PGD for the current thread */ +3: + mfspr r11,SPRN_SPRG_THREAD + lwz r11,PGDIR(r11) + + /* Make up the required permissions for user code */ +#ifdef CONFIG_PTE_64BIT + li r13,_PAGE_PRESENT | _PAGE_BAP_UX + oris r13,r13,_PAGE_ACCESSED@h +#else + li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC +#endif + +4: + FIND_PTE + andc. r13,r13,r11 /* Check permission */ + +#ifdef CONFIG_PTE_64BIT +#ifdef CONFIG_SMP + subf r13,r11,r12 /* create false data dep */ + lwzx r13,r11,r13 /* Get upper pte bits */ +#else + lwz r13,0(r12) /* Get upper pte bits */ +#endif +#endif + + bne 2f /* Bail if permission mismach */ + + /* Jump to common TLB load point */ + b finish_tlb_load + +2: + /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ + mfspr r10, SPRN_SPRG_THREAD + lwz r11, THREAD_NORMSAVE(3)(r10) + mtcr r11 + lwz r13, THREAD_NORMSAVE(2)(r10) + lwz r12, THREAD_NORMSAVE(1)(r10) + lwz r11, THREAD_NORMSAVE(0)(r10) + mfspr r10, SPRN_SPRG_RSCRATCH0 + b InstructionStorage + +#ifdef CONFIG_SPE + /* SPE Unavailable */ + START_EXCEPTION(SPEUnavailable) + NORMAL_EXCEPTION_PROLOG + bne load_up_spe + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_EE_LITE(0x2010, KernelSPE) +#else + EXCEPTION(0x2020, SPEUnavailable, unknown_exception, EXC_XFER_EE) +#endif /* CONFIG_SPE */ + + /* SPE Floating Point Data */ +#ifdef CONFIG_SPE + EXCEPTION(0x2030, SPEFloatingPointData, SPEFloatingPointException, EXC_XFER_EE); + + /* SPE Floating Point Round */ + EXCEPTION(0x2050, SPEFloatingPointRound, SPEFloatingPointRoundException, EXC_XFER_EE) +#else + EXCEPTION(0x2040, SPEFloatingPointData, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2050, SPEFloatingPointRound, unknown_exception, EXC_XFER_EE) +#endif /* CONFIG_SPE */ + + /* Performance Monitor */ + EXCEPTION(0x2060, PerformanceMonitor, performance_monitor_exception, EXC_XFER_STD) + + EXCEPTION(0x2070, Doorbell, doorbell_exception, EXC_XFER_STD) + + CRITICAL_EXCEPTION(0x2080, CriticalDoorbell, unknown_exception) + + /* Debug Interrupt */ + DEBUG_DEBUG_EXCEPTION + DEBUG_CRIT_EXCEPTION + +/* + * Local functions + */ + +/* + * Both the instruction and data TLB miss get to this + * point to load the TLB. + * r10 - tsize encoding (if HUGETLB_PAGE) or available to use + * r11 - TLB (info from Linux PTE) + * r12 - available to use + * r13 - upper bits of PTE (if PTE_64BIT) or available to use + * CR5 - results of addr >= PAGE_OFFSET + * MAS0, MAS1 - loaded with proper value when we get here + * MAS2, MAS3 - will need additional info from Linux PTE + * Upon exit, we reload everything and RFI. + */ +finish_tlb_load: +#ifdef CONFIG_HUGETLB_PAGE + cmpwi 6, r10, 0 /* check for huge page */ + beq 6, finish_tlb_load_cont /* !huge */ + + /* Alas, we need more scratch registers for hugepages */ + mfspr r12, SPRN_SPRG_THREAD + stw r14, THREAD_NORMSAVE(4)(r12) + stw r15, THREAD_NORMSAVE(5)(r12) + stw r16, THREAD_NORMSAVE(6)(r12) + stw r17, THREAD_NORMSAVE(7)(r12) + + /* Get the next_tlbcam_idx percpu var */ +#ifdef CONFIG_SMP + lwz r12, THREAD_INFO-THREAD(r12) + lwz r15, TI_CPU(r12) + lis r14, __per_cpu_offset@h + ori r14, r14, __per_cpu_offset@l + rlwinm r15, r15, 2, 0, 29 + lwzx r16, r14, r15 +#else + li r16, 0 +#endif + lis r17, next_tlbcam_idx@h + ori r17, r17, next_tlbcam_idx@l + add r17, r17, r16 /* r17 = *next_tlbcam_idx */ + lwz r15, 0(r17) /* r15 = next_tlbcam_idx */ + + lis r14, MAS0_TLBSEL(1)@h /* select TLB1 (TLBCAM) */ + rlwimi r14, r15, 16, 4, 15 /* next_tlbcam_idx entry */ + mtspr SPRN_MAS0, r14 + + /* Extract TLB1CFG(NENTRY) */ + mfspr r16, SPRN_TLB1CFG + andi. r16, r16, 0xfff + + /* Update next_tlbcam_idx, wrapping when necessary */ + addi r15, r15, 1 + cmpw r15, r16 + blt 100f + lis r14, tlbcam_index@h + ori r14, r14, tlbcam_index@l + lwz r15, 0(r14) +100: stw r15, 0(r17) + + /* + * Calc MAS1_TSIZE from r10 (which has pshift encoded) + * tlb_enc = (pshift - 10). + */ + subi r15, r10, 10 + mfspr r16, SPRN_MAS1 + rlwimi r16, r15, 7, 20, 24 + mtspr SPRN_MAS1, r16 + + /* copy the pshift for use later */ + mr r14, r10 + + /* fall through */ + +#endif /* CONFIG_HUGETLB_PAGE */ + + /* + * We set execute, because we don't have the granularity to + * properly set this at the page level (Linux problem). + * Many of these bits are software only. Bits we don't set + * here we (properly should) assume have the appropriate value. + */ +finish_tlb_load_cont: +#ifdef CONFIG_PTE_64BIT + rlwinm r12, r11, 32-2, 26, 31 /* Move in perm bits */ + andi. r10, r11, _PAGE_DIRTY + bne 1f + li r10, MAS3_SW | MAS3_UW + andc r12, r12, r10 +1: rlwimi r12, r13, 20, 0, 11 /* grab RPN[32:43] */ + rlwimi r12, r11, 20, 12, 19 /* grab RPN[44:51] */ +2: mtspr SPRN_MAS3, r12 +BEGIN_MMU_FTR_SECTION + srwi r10, r13, 12 /* grab RPN[12:31] */ + mtspr SPRN_MAS7, r10 +END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) +#else + li r10, (_PAGE_EXEC | _PAGE_PRESENT) + mr r13, r11 + rlwimi r10, r11, 31, 29, 29 /* extract _PAGE_DIRTY into SW */ + and r12, r11, r10 + andi. r10, r11, _PAGE_USER /* Test for _PAGE_USER */ + slwi r10, r12, 1 + or r10, r10, r12 + iseleq r12, r12, r10 + rlwimi r13, r12, 0, 20, 31 /* Get RPN from PTE, merge w/ perms */ + mtspr SPRN_MAS3, r13 +#endif + + mfspr r12, SPRN_MAS2 +#ifdef CONFIG_PTE_64BIT + rlwimi r12, r11, 32-19, 27, 31 /* extract WIMGE from pte */ +#else + rlwimi r12, r11, 26, 27, 31 /* extract WIMGE from pte */ +#endif +#ifdef CONFIG_HUGETLB_PAGE + beq 6, 3f /* don't mask if page isn't huge */ + li r13, 1 + slw r13, r13, r14 + subi r13, r13, 1 + rlwinm r13, r13, 0, 0, 19 /* bottom bits used for WIMGE/etc */ + andc r12, r12, r13 /* mask off ea bits within the page */ +#endif +3: mtspr SPRN_MAS2, r12 + +#ifdef CONFIG_E200 + /* Round robin TLB1 entries assignment */ + mfspr r12, SPRN_MAS0 + + /* Extract TLB1CFG(NENTRY) */ + mfspr r11, SPRN_TLB1CFG + andi. r11, r11, 0xfff + + /* Extract MAS0(NV) */ + andi. r13, r12, 0xfff + addi r13, r13, 1 + cmpw 0, r13, r11 + addi r12, r12, 1 + + /* check if we need to wrap */ + blt 7f + + /* wrap back to first free tlbcam entry */ + lis r13, tlbcam_index@ha + lwz r13, tlbcam_index@l(r13) + rlwimi r12, r13, 0, 20, 31 +7: + mtspr SPRN_MAS0,r12 +#endif /* CONFIG_E200 */ + +tlb_write_entry: + tlbwe + + /* Done...restore registers and get out of here. */ + mfspr r10, SPRN_SPRG_THREAD +#ifdef CONFIG_HUGETLB_PAGE + beq 6, 8f /* skip restore for 4k page faults */ + lwz r14, THREAD_NORMSAVE(4)(r10) + lwz r15, THREAD_NORMSAVE(5)(r10) + lwz r16, THREAD_NORMSAVE(6)(r10) + lwz r17, THREAD_NORMSAVE(7)(r10) +#endif +8: lwz r11, THREAD_NORMSAVE(3)(r10) + mtcr r11 + lwz r13, THREAD_NORMSAVE(2)(r10) + lwz r12, THREAD_NORMSAVE(1)(r10) + lwz r11, THREAD_NORMSAVE(0)(r10) + mfspr r10, SPRN_SPRG_RSCRATCH0 + rfi /* Force context change */ + +#ifdef CONFIG_SPE +/* Note that the SPE support is closely modeled after the AltiVec + * support. Changes to one are likely to be applicable to the + * other! */ +load_up_spe: +/* + * Disable SPE for the task which had SPE previously, + * and save its SPE registers in its thread_struct. + * Enables SPE for use in the kernel on return. + * On SMP we know the SPE units are free, since we give it up every + * switch. -- Kumar + */ + mfmsr r5 + oris r5,r5,MSR_SPE@h + mtmsr r5 /* enable use of SPE now */ + isync +/* + * For SMP, we don't do lazy SPE switching because it just gets too + * horrendously complex, especially when a task switches from one CPU + * to another. Instead we call giveup_spe in switch_to. + */ +#ifndef CONFIG_SMP + lis r3,last_task_used_spe@ha + lwz r4,last_task_used_spe@l(r3) + cmpi 0,r4,0 + beq 1f + addi r4,r4,THREAD /* want THREAD of last_task_used_spe */ + SAVE_32EVRS(0,r10,r4,THREAD_EVR0) + evxor evr10, evr10, evr10 /* clear out evr10 */ + evmwumiaa evr10, evr10, evr10 /* evr10 <- ACC = 0 * 0 + ACC */ + li r5,THREAD_ACC + evstddx evr10, r4, r5 /* save off accumulator */ + lwz r5,PT_REGS(r4) + lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5) + lis r10,MSR_SPE@h + andc r4,r4,r10 /* disable SPE for previous task */ + stw r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#endif /* !CONFIG_SMP */ + /* enable use of SPE after return */ + oris r9,r9,MSR_SPE@h + mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ + li r4,1 + li r10,THREAD_ACC + stw r4,THREAD_USED_SPE(r5) + evlddx evr4,r10,r5 + evmra evr4,evr4 + REST_32EVRS(0,r10,r5,THREAD_EVR0) +#ifndef CONFIG_SMP + subi r4,r5,THREAD + stw r4,last_task_used_spe@l(r3) +#endif /* !CONFIG_SMP */ + /* restore registers and return */ +2: REST_4GPRS(3, r11) + lwz r10,_CCR(r11) + REST_GPR(1, r11) + mtcr r10 + lwz r10,_LINK(r11) + mtlr r10 + REST_GPR(10, r11) + mtspr SPRN_SRR1,r9 + mtspr SPRN_SRR0,r12 + REST_GPR(9, r11) + REST_GPR(12, r11) + lwz r11,GPR11(r11) + rfi + +/* + * SPE unavailable trap from kernel - print a message, but let + * the task use SPE in the kernel until it returns to user mode. + */ +KernelSPE: + lwz r3,_MSR(r1) + oris r3,r3,MSR_SPE@h + stw r3,_MSR(r1) /* enable use of SPE after return */ +#ifdef CONFIG_PRINTK + lis r3,87f@h + ori r3,r3,87f@l + mr r4,r2 /* current */ + lwz r5,_NIP(r1) + bl printk +#endif + b ret_from_except +#ifdef CONFIG_PRINTK +87: .string "SPE used in kernel (task=%p, pc=%x) \n" +#endif + .align 4,0 + +#endif /* CONFIG_SPE */ + +/* + * Global functions + */ + +/* Adjust or setup IVORs for e200 */ +_GLOBAL(__setup_e200_ivors) + li r3,DebugDebug@l + mtspr SPRN_IVOR15,r3 + li r3,SPEUnavailable@l + mtspr SPRN_IVOR32,r3 + li r3,SPEFloatingPointData@l + mtspr SPRN_IVOR33,r3 + li r3,SPEFloatingPointRound@l + mtspr SPRN_IVOR34,r3 + sync + blr + +/* Adjust or setup IVORs for e500v1/v2 */ +_GLOBAL(__setup_e500_ivors) + li r3,DebugCrit@l + mtspr SPRN_IVOR15,r3 + li r3,SPEUnavailable@l + mtspr SPRN_IVOR32,r3 + li r3,SPEFloatingPointData@l + mtspr SPRN_IVOR33,r3 + li r3,SPEFloatingPointRound@l + mtspr SPRN_IVOR34,r3 + li r3,PerformanceMonitor@l + mtspr SPRN_IVOR35,r3 + sync + blr + +/* Adjust or setup IVORs for e500mc */ +_GLOBAL(__setup_e500mc_ivors) + li r3,DebugDebug@l + mtspr SPRN_IVOR15,r3 + li r3,PerformanceMonitor@l + mtspr SPRN_IVOR35,r3 + li r3,Doorbell@l + mtspr SPRN_IVOR36,r3 + li r3,CriticalDoorbell@l + mtspr SPRN_IVOR37,r3 + sync + blr + +/* + * extern void giveup_altivec(struct task_struct *prev) + * + * The e500 core does not have an AltiVec unit. + */ +_GLOBAL(giveup_altivec) + blr + +#ifdef CONFIG_SPE +/* + * extern void giveup_spe(struct task_struct *prev) + * + */ +_GLOBAL(giveup_spe) + mfmsr r5 + oris r5,r5,MSR_SPE@h + mtmsr r5 /* enable use of SPE now */ + isync + cmpi 0,r3,0 + beqlr- /* if no previous owner, done */ + addi r3,r3,THREAD /* want THREAD of task */ + lwz r5,PT_REGS(r3) + cmpi 0,r5,0 + SAVE_32EVRS(0, r4, r3, THREAD_EVR0) + evxor evr6, evr6, evr6 /* clear out evr6 */ + evmwumiaa evr6, evr6, evr6 /* evr6 <- ACC = 0 * 0 + ACC */ + li r4,THREAD_ACC + evstddx evr6, r4, r3 /* save off accumulator */ + beq 1f + lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5) + lis r3,MSR_SPE@h + andc r4,r4,r3 /* disable SPE for previous task */ + stw r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#ifndef CONFIG_SMP + li r5,0 + lis r4,last_task_used_spe@ha + stw r5,last_task_used_spe@l(r4) +#endif /* !CONFIG_SMP */ + blr +#endif /* CONFIG_SPE */ + +/* + * extern void giveup_fpu(struct task_struct *prev) + * + * Not all FSL Book-E cores have an FPU + */ +#ifndef CONFIG_PPC_FPU +_GLOBAL(giveup_fpu) + blr +#endif + +/* + * extern void abort(void) + * + * At present, this routine just applies a system reset. + */ +_GLOBAL(abort) + li r13,0 + mtspr SPRN_DBCR0,r13 /* disable all debug events */ + isync + mfmsr r13 + ori r13,r13,MSR_DE@l /* Enable Debug Events */ + mtmsr r13 + isync + mfspr r13,SPRN_DBCR0 + lis r13,(DBCR0_IDM|DBCR0_RST_CHIP)@h + mtspr SPRN_DBCR0,r13 + isync + +_GLOBAL(set_context) + +#ifdef CONFIG_BDI_SWITCH + /* Context switch the PTE pointer for the Abatron BDI2000. + * The PGDIR is the second parameter. + */ + lis r5, abatron_pteptrs@h + ori r5, r5, abatron_pteptrs@l + stw r4, 0x4(r5) +#endif + mtspr SPRN_PID,r3 + isync /* Force context change */ + blr + +_GLOBAL(flush_dcache_L1) + mfspr r3,SPRN_L1CFG0 + + rlwinm r5,r3,9,3 /* Extract cache block size */ + twlgti r5,1 /* Only 32 and 64 byte cache blocks + * are currently defined. + */ + li r4,32 + subfic r6,r5,2 /* r6 = log2(1KiB / cache block size) - + * log2(number of ways) + */ + slw r5,r4,r5 /* r5 = cache block size */ + + rlwinm r7,r3,0,0xff /* Extract number of KiB in the cache */ + mulli r7,r7,13 /* An 8-way cache will require 13 + * loads per set. + */ + slw r7,r7,r6 + + /* save off HID0 and set DCFA */ + mfspr r8,SPRN_HID0 + ori r9,r8,HID0_DCFA@l + mtspr SPRN_HID0,r9 + isync + + lis r4,KERNELBASE@h + mtctr r7 + +1: lwz r3,0(r4) /* Load... */ + add r4,r4,r5 + bdnz 1b + + msync + lis r4,KERNELBASE@h + mtctr r7 + +1: dcbf 0,r4 /* ...and flush. */ + add r4,r4,r5 + bdnz 1b + + /* restore HID0 */ + mtspr SPRN_HID0,r8 + isync + + blr + +#ifdef CONFIG_SMP +/* When we get here, r24 needs to hold the CPU # */ + .globl __secondary_start +__secondary_start: + lis r3,__secondary_hold_acknowledge@h + ori r3,r3,__secondary_hold_acknowledge@l + stw r24,0(r3) + + li r3,0 + mr r4,r24 /* Why? */ + bl call_setup_cpu + + lis r3,tlbcam_index@ha + lwz r3,tlbcam_index@l(r3) + mtctr r3 + li r26,0 /* r26 safe? */ + + /* Load each CAM entry */ +1: mr r3,r26 + bl loadcam_entry + addi r26,r26,1 + bdnz 1b + + /* get current_thread_info and current */ + lis r1,secondary_ti@ha + lwz r1,secondary_ti@l(r1) + lwz r2,TI_TASK(r1) + + /* stack */ + addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + li r0,0 + stw r0,0(r1) + + /* ptr to current thread */ + addi r4,r2,THREAD /* address of our thread_struct */ + mtspr SPRN_SPRG_THREAD,r4 + + /* Setup the defaults for TLB entries */ + li r4,(MAS4_TSIZED(BOOK3E_PAGESZ_4K))@l + mtspr SPRN_MAS4,r4 + + /* Jump to start_secondary */ + lis r4,MSR_KERNEL@h + ori r4,r4,MSR_KERNEL@l + lis r3,start_secondary@h + ori r3,r3,start_secondary@l + mtspr SPRN_SRR0,r3 + mtspr SPRN_SRR1,r4 + sync + rfi + sync + + .globl __secondary_hold_acknowledge +__secondary_hold_acknowledge: + .long -1 +#endif + +/* + * We put a few things here that have to be page-aligned. This stuff + * goes at the beginning of the data segment, which is page-aligned. + */ + .data + .align 12 + .globl sdata +sdata: + .globl empty_zero_page +empty_zero_page: + .space 4096 + .globl swapper_pg_dir +swapper_pg_dir: + .space PGD_TABLE_SIZE + +/* + * Room for two PTE pointers, usually the kernel and current user pointers + * to their respective root page table. + */ +abatron_pteptrs: + .space 8 diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c new file mode 100644 index 00000000..2bc0584b --- /dev/null +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -0,0 +1,363 @@ +/* + * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, + * using the CPU's debug registers. Derived from + * "arch/x86/kernel/hw_breakpoint.c" + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright 2010 IBM Corporation + * Author: K.Prasad <prasad@linux.vnet.ibm.com> + * + */ + +#include <linux/hw_breakpoint.h> +#include <linux/notifier.h> +#include <linux/kprobes.h> +#include <linux/percpu.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/smp.h> + +#include <asm/hw_breakpoint.h> +#include <asm/processor.h> +#include <asm/sstep.h> +#include <asm/uaccess.h> + +/* + * Stores the breakpoints currently in use on each breakpoint address + * register for every cpu + */ +static DEFINE_PER_CPU(struct perf_event *, bp_per_reg); + +/* + * Returns total number of data or instruction breakpoints available. + */ +int hw_breakpoint_slots(int type) +{ + if (type == TYPE_DATA) + return HBP_NUM; + return 0; /* no instruction breakpoints available */ +} + +/* + * Install a perf counter breakpoint. + * + * We seek a free debug address register and use it for this + * breakpoint. + * + * Atomic: we hold the counter->ctx->lock and we only handle variables + * and registers local to this cpu. + */ +int arch_install_hw_breakpoint(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + struct perf_event **slot = &__get_cpu_var(bp_per_reg); + + *slot = bp; + + /* + * Do not install DABR values if the instruction must be single-stepped. + * If so, DABR will be populated in single_step_dabr_instruction(). + */ + if (current->thread.last_hit_ubp != bp) + set_dabr(info->address | info->type | DABR_TRANSLATION); + + return 0; +} + +/* + * Uninstall the breakpoint contained in the given counter. + * + * First we search the debug address register it uses and then we disable + * it. + * + * Atomic: we hold the counter->ctx->lock and we only handle variables + * and registers local to this cpu. + */ +void arch_uninstall_hw_breakpoint(struct perf_event *bp) +{ + struct perf_event **slot = &__get_cpu_var(bp_per_reg); + + if (*slot != bp) { + WARN_ONCE(1, "Can't find the breakpoint"); + return; + } + + *slot = NULL; + set_dabr(0); +} + +/* + * Perform cleanup of arch-specific counters during unregistration + * of the perf-event + */ +void arch_unregister_hw_breakpoint(struct perf_event *bp) +{ + /* + * If the breakpoint is unregistered between a hw_breakpoint_handler() + * and the single_step_dabr_instruction(), then cleanup the breakpoint + * restoration variables to prevent dangling pointers. + */ + if (bp->ctx->task) + bp->ctx->task->thread.last_hit_ubp = NULL; +} + +/* + * Check for virtual address in kernel space. + */ +int arch_check_bp_in_kernelspace(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + + return is_kernel_addr(info->address); +} + +int arch_bp_generic_fields(int type, int *gen_bp_type) +{ + switch (type) { + case DABR_DATA_READ: + *gen_bp_type = HW_BREAKPOINT_R; + break; + case DABR_DATA_WRITE: + *gen_bp_type = HW_BREAKPOINT_W; + break; + case (DABR_DATA_WRITE | DABR_DATA_READ): + *gen_bp_type = (HW_BREAKPOINT_W | HW_BREAKPOINT_R); + break; + default: + return -EINVAL; + } + return 0; +} + +/* + * Validate the arch-specific HW Breakpoint register settings + */ +int arch_validate_hwbkpt_settings(struct perf_event *bp) +{ + int ret = -EINVAL; + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + + if (!bp) + return ret; + + switch (bp->attr.bp_type) { + case HW_BREAKPOINT_R: + info->type = DABR_DATA_READ; + break; + case HW_BREAKPOINT_W: + info->type = DABR_DATA_WRITE; + break; + case HW_BREAKPOINT_R | HW_BREAKPOINT_W: + info->type = (DABR_DATA_READ | DABR_DATA_WRITE); + break; + default: + return ret; + } + + info->address = bp->attr.bp_addr; + info->len = bp->attr.bp_len; + + /* + * Since breakpoint length can be a maximum of HW_BREAKPOINT_LEN(8) + * and breakpoint addresses are aligned to nearest double-word + * HW_BREAKPOINT_ALIGN by rounding off to the lower address, the + * 'symbolsize' should satisfy the check below. + */ + if (info->len > + (HW_BREAKPOINT_LEN - (info->address & HW_BREAKPOINT_ALIGN))) + return -EINVAL; + return 0; +} + +/* + * Restores the breakpoint on the debug registers. + * Invoke this function if it is known that the execution context is + * about to change to cause loss of MSR_SE settings. + */ +void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) +{ + struct arch_hw_breakpoint *info; + + if (likely(!tsk->thread.last_hit_ubp)) + return; + + info = counter_arch_bp(tsk->thread.last_hit_ubp); + regs->msr &= ~MSR_SE; + set_dabr(info->address | info->type | DABR_TRANSLATION); + tsk->thread.last_hit_ubp = NULL; +} + +/* + * Handle debug exception notifications. + */ +int __kprobes hw_breakpoint_handler(struct die_args *args) +{ + int rc = NOTIFY_STOP; + struct perf_event *bp; + struct pt_regs *regs = args->regs; + int stepped = 1; + struct arch_hw_breakpoint *info; + unsigned int instr; + unsigned long dar = regs->dar; + + /* Disable breakpoints during exception handling */ + set_dabr(0); + + /* + * The counter may be concurrently released but that can only + * occur from a call_rcu() path. We can then safely fetch + * the breakpoint, use its callback, touch its counter + * while we are in an rcu_read_lock() path. + */ + rcu_read_lock(); + + bp = __get_cpu_var(bp_per_reg); + if (!bp) + goto out; + info = counter_arch_bp(bp); + + /* + * Return early after invoking user-callback function without restoring + * DABR if the breakpoint is from ptrace which always operates in + * one-shot mode. The ptrace-ed process will receive the SIGTRAP signal + * generated in do_dabr(). + */ + if (bp->overflow_handler == ptrace_triggered) { + perf_bp_event(bp, regs); + rc = NOTIFY_DONE; + goto out; + } + + /* + * Verify if dar lies within the address range occupied by the symbol + * being watched to filter extraneous exceptions. If it doesn't, + * we still need to single-step the instruction, but we don't + * generate an event. + */ + info->extraneous_interrupt = !((bp->attr.bp_addr <= dar) && + (dar - bp->attr.bp_addr < bp->attr.bp_len)); + + /* Do not emulate user-space instructions, instead single-step them */ + if (user_mode(regs)) { + bp->ctx->task->thread.last_hit_ubp = bp; + regs->msr |= MSR_SE; + goto out; + } + + stepped = 0; + instr = 0; + if (!__get_user_inatomic(instr, (unsigned int *) regs->nip)) + stepped = emulate_step(regs, instr); + + /* + * emulate_step() could not execute it. We've failed in reliably + * handling the hw-breakpoint. Unregister it and throw a warning + * message to let the user know about it. + */ + if (!stepped) { + WARN(1, "Unable to handle hardware breakpoint. Breakpoint at " + "0x%lx will be disabled.", info->address); + perf_event_disable(bp); + goto out; + } + /* + * As a policy, the callback is invoked in a 'trigger-after-execute' + * fashion + */ + if (!info->extraneous_interrupt) + perf_bp_event(bp, regs); + + set_dabr(info->address | info->type | DABR_TRANSLATION); +out: + rcu_read_unlock(); + return rc; +} + +/* + * Handle single-step exceptions following a DABR hit. + */ +int __kprobes single_step_dabr_instruction(struct die_args *args) +{ + struct pt_regs *regs = args->regs; + struct perf_event *bp = NULL; + struct arch_hw_breakpoint *bp_info; + + bp = current->thread.last_hit_ubp; + /* + * Check if we are single-stepping as a result of a + * previous HW Breakpoint exception + */ + if (!bp) + return NOTIFY_DONE; + + bp_info = counter_arch_bp(bp); + + /* + * We shall invoke the user-defined callback function in the single + * stepping handler to confirm to 'trigger-after-execute' semantics + */ + if (!bp_info->extraneous_interrupt) + perf_bp_event(bp, regs); + + set_dabr(bp_info->address | bp_info->type | DABR_TRANSLATION); + current->thread.last_hit_ubp = NULL; + + /* + * If the process was being single-stepped by ptrace, let the + * other single-step actions occur (e.g. generate SIGTRAP). + */ + if (test_thread_flag(TIF_SINGLESTEP)) + return NOTIFY_DONE; + + return NOTIFY_STOP; +} + +/* + * Handle debug exception notifications. + */ +int __kprobes hw_breakpoint_exceptions_notify( + struct notifier_block *unused, unsigned long val, void *data) +{ + int ret = NOTIFY_DONE; + + switch (val) { + case DIE_DABR_MATCH: + ret = hw_breakpoint_handler(data); + break; + case DIE_SSTEP: + ret = single_step_dabr_instruction(data); + break; + } + + return ret; +} + +/* + * Release the user breakpoints used by ptrace + */ +void flush_ptrace_hw_breakpoint(struct task_struct *tsk) +{ + struct thread_struct *t = &tsk->thread; + + unregister_hw_breakpoint(t->ptrace_bps[0]); + t->ptrace_bps[0] = NULL; +} + +void hw_breakpoint_pmu_read(struct perf_event *bp) +{ + /* TODO */ +} diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c new file mode 100644 index 00000000..b01d14ee --- /dev/null +++ b/arch/powerpc/kernel/ibmebus.c @@ -0,0 +1,760 @@ +/* + * IBM PowerPC IBM eBus Infrastructure Support. + * + * Copyright (c) 2005 IBM Corporation + * Joachim Fenkes <fenkes@de.ibm.com> + * Heiko J Schick <schickhj@de.ibm.com> + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/init.h> +#include <linux/export.h> +#include <linux/console.h> +#include <linux/kobject.h> +#include <linux/dma-mapping.h> +#include <linux/interrupt.h> +#include <linux/of.h> +#include <linux/slab.h> +#include <linux/stat.h> +#include <linux/of_platform.h> +#include <asm/ibmebus.h> +#include <asm/abs_addr.h> + +static struct device ibmebus_bus_device = { /* fake "parent" device */ + .init_name = "ibmebus", +}; + +struct bus_type ibmebus_bus_type; + +/* These devices will automatically be added to the bus during init */ +static struct of_device_id __initdata ibmebus_matches[] = { + { .compatible = "IBM,lhca" }, + { .compatible = "IBM,lhea" }, + {}, +}; + +static void *ibmebus_alloc_coherent(struct device *dev, + size_t size, + dma_addr_t *dma_handle, + gfp_t flag, + struct dma_attrs *attrs) +{ + void *mem; + + mem = kmalloc(size, flag); + *dma_handle = (dma_addr_t)mem; + + return mem; +} + +static void ibmebus_free_coherent(struct device *dev, + size_t size, void *vaddr, + dma_addr_t dma_handle, + struct dma_attrs *attrs) +{ + kfree(vaddr); +} + +static dma_addr_t ibmebus_map_page(struct device *dev, + struct page *page, + unsigned long offset, + size_t size, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + return (dma_addr_t)(page_address(page) + offset); +} + +static void ibmebus_unmap_page(struct device *dev, + dma_addr_t dma_addr, + size_t size, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + return; +} + +static int ibmebus_map_sg(struct device *dev, + struct scatterlist *sgl, + int nents, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) { + sg->dma_address = (dma_addr_t) sg_virt(sg); + sg->dma_length = sg->length; + } + + return nents; +} + +static void ibmebus_unmap_sg(struct device *dev, + struct scatterlist *sg, + int nents, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + return; +} + +static int ibmebus_dma_supported(struct device *dev, u64 mask) +{ + return mask == DMA_BIT_MASK(64); +} + +static u64 ibmebus_dma_get_required_mask(struct device *dev) +{ + return DMA_BIT_MASK(64); +} + +static struct dma_map_ops ibmebus_dma_ops = { + .alloc = ibmebus_alloc_coherent, + .free = ibmebus_free_coherent, + .map_sg = ibmebus_map_sg, + .unmap_sg = ibmebus_unmap_sg, + .dma_supported = ibmebus_dma_supported, + .get_required_mask = ibmebus_dma_get_required_mask, + .map_page = ibmebus_map_page, + .unmap_page = ibmebus_unmap_page, +}; + +static int ibmebus_match_path(struct device *dev, void *data) +{ + struct device_node *dn = to_platform_device(dev)->dev.of_node; + return (dn->full_name && + (strcasecmp((char *)data, dn->full_name) == 0)); +} + +static int ibmebus_match_node(struct device *dev, void *data) +{ + return to_platform_device(dev)->dev.of_node == data; +} + +static int ibmebus_create_device(struct device_node *dn) +{ + struct platform_device *dev; + int ret; + + dev = of_device_alloc(dn, NULL, &ibmebus_bus_device); + if (!dev) + return -ENOMEM; + + dev->dev.bus = &ibmebus_bus_type; + dev->dev.archdata.dma_ops = &ibmebus_dma_ops; + + ret = of_device_add(dev); + if (ret) + platform_device_put(dev); + return ret; +} + +static int ibmebus_create_devices(const struct of_device_id *matches) +{ + struct device_node *root, *child; + int ret = 0; + + root = of_find_node_by_path("/"); + + for_each_child_of_node(root, child) { + if (!of_match_node(matches, child)) + continue; + + if (bus_find_device(&ibmebus_bus_type, NULL, child, + ibmebus_match_node)) + continue; + + ret = ibmebus_create_device(child); + if (ret) { + printk(KERN_ERR "%s: failed to create device (%i)", + __func__, ret); + of_node_put(child); + break; + } + } + + of_node_put(root); + return ret; +} + +int ibmebus_register_driver(struct of_platform_driver *drv) +{ + /* If the driver uses devices that ibmebus doesn't know, add them */ + ibmebus_create_devices(drv->driver.of_match_table); + + drv->driver.bus = &ibmebus_bus_type; + return driver_register(&drv->driver); +} +EXPORT_SYMBOL(ibmebus_register_driver); + +void ibmebus_unregister_driver(struct of_platform_driver *drv) +{ + driver_unregister(&drv->driver); +} +EXPORT_SYMBOL(ibmebus_unregister_driver); + +int ibmebus_request_irq(u32 ist, irq_handler_t handler, + unsigned long irq_flags, const char *devname, + void *dev_id) +{ + unsigned int irq = irq_create_mapping(NULL, ist); + + if (irq == NO_IRQ) + return -EINVAL; + + return request_irq(irq, handler, irq_flags, devname, dev_id); +} +EXPORT_SYMBOL(ibmebus_request_irq); + +void ibmebus_free_irq(u32 ist, void *dev_id) +{ + unsigned int irq = irq_find_mapping(NULL, ist); + + free_irq(irq, dev_id); + irq_dispose_mapping(irq); +} +EXPORT_SYMBOL(ibmebus_free_irq); + +static char *ibmebus_chomp(const char *in, size_t count) +{ + char *out = kmalloc(count + 1, GFP_KERNEL); + + if (!out) + return NULL; + + memcpy(out, in, count); + out[count] = '\0'; + if (out[count - 1] == '\n') + out[count - 1] = '\0'; + + return out; +} + +static ssize_t ibmebus_store_probe(struct bus_type *bus, + const char *buf, size_t count) +{ + struct device_node *dn = NULL; + char *path; + ssize_t rc = 0; + + path = ibmebus_chomp(buf, count); + if (!path) + return -ENOMEM; + + if (bus_find_device(&ibmebus_bus_type, NULL, path, + ibmebus_match_path)) { + printk(KERN_WARNING "%s: %s has already been probed\n", + __func__, path); + rc = -EEXIST; + goto out; + } + + if ((dn = of_find_node_by_path(path))) { + rc = ibmebus_create_device(dn); + of_node_put(dn); + } else { + printk(KERN_WARNING "%s: no such device node: %s\n", + __func__, path); + rc = -ENODEV; + } + +out: + kfree(path); + if (rc) + return rc; + return count; +} + +static ssize_t ibmebus_store_remove(struct bus_type *bus, + const char *buf, size_t count) +{ + struct device *dev; + char *path; + + path = ibmebus_chomp(buf, count); + if (!path) + return -ENOMEM; + + if ((dev = bus_find_device(&ibmebus_bus_type, NULL, path, + ibmebus_match_path))) { + of_device_unregister(to_platform_device(dev)); + + kfree(path); + return count; + } else { + printk(KERN_WARNING "%s: %s not on the bus\n", + __func__, path); + + kfree(path); + return -ENODEV; + } +} + + +static struct bus_attribute ibmebus_bus_attrs[] = { + __ATTR(probe, S_IWUSR, NULL, ibmebus_store_probe), + __ATTR(remove, S_IWUSR, NULL, ibmebus_store_remove), + __ATTR_NULL +}; + +static int ibmebus_bus_bus_match(struct device *dev, struct device_driver *drv) +{ + const struct of_device_id *matches = drv->of_match_table; + + if (!matches) + return 0; + + return of_match_device(matches, dev) != NULL; +} + +static int ibmebus_bus_device_probe(struct device *dev) +{ + int error = -ENODEV; + struct of_platform_driver *drv; + struct platform_device *of_dev; + const struct of_device_id *match; + + drv = to_of_platform_driver(dev->driver); + of_dev = to_platform_device(dev); + + if (!drv->probe) + return error; + + of_dev_get(of_dev); + + match = of_match_device(drv->driver.of_match_table, dev); + if (match) + error = drv->probe(of_dev, match); + if (error) + of_dev_put(of_dev); + + return error; +} + +static int ibmebus_bus_device_remove(struct device *dev) +{ + struct platform_device *of_dev = to_platform_device(dev); + struct of_platform_driver *drv = to_of_platform_driver(dev->driver); + + if (dev->driver && drv->remove) + drv->remove(of_dev); + return 0; +} + +static void ibmebus_bus_device_shutdown(struct device *dev) +{ + struct platform_device *of_dev = to_platform_device(dev); + struct of_platform_driver *drv = to_of_platform_driver(dev->driver); + + if (dev->driver && drv->shutdown) + drv->shutdown(of_dev); +} + +/* + * ibmebus_bus_device_attrs + */ +static ssize_t devspec_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct platform_device *ofdev; + + ofdev = to_platform_device(dev); + return sprintf(buf, "%s\n", ofdev->dev.of_node->full_name); +} + +static ssize_t name_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct platform_device *ofdev; + + ofdev = to_platform_device(dev); + return sprintf(buf, "%s\n", ofdev->dev.of_node->name); +} + +static ssize_t modalias_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + ssize_t len = of_device_get_modalias(dev, buf, PAGE_SIZE - 2); + buf[len] = '\n'; + buf[len+1] = 0; + return len+1; +} + +struct device_attribute ibmebus_bus_device_attrs[] = { + __ATTR_RO(devspec), + __ATTR_RO(name), + __ATTR_RO(modalias), + __ATTR_NULL +}; + +#ifdef CONFIG_PM_SLEEP +static int ibmebus_bus_legacy_suspend(struct device *dev, pm_message_t mesg) +{ + struct platform_device *of_dev = to_platform_device(dev); + struct of_platform_driver *drv = to_of_platform_driver(dev->driver); + int ret = 0; + + if (dev->driver && drv->suspend) + ret = drv->suspend(of_dev, mesg); + return ret; +} + +static int ibmebus_bus_legacy_resume(struct device *dev) +{ + struct platform_device *of_dev = to_platform_device(dev); + struct of_platform_driver *drv = to_of_platform_driver(dev->driver); + int ret = 0; + + if (dev->driver && drv->resume) + ret = drv->resume(of_dev); + return ret; +} + +static int ibmebus_bus_pm_prepare(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (drv && drv->pm && drv->pm->prepare) + ret = drv->pm->prepare(dev); + + return ret; +} + +static void ibmebus_bus_pm_complete(struct device *dev) +{ + struct device_driver *drv = dev->driver; + + if (drv && drv->pm && drv->pm->complete) + drv->pm->complete(dev); +} + +#ifdef CONFIG_SUSPEND + +static int ibmebus_bus_pm_suspend(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->suspend) + ret = drv->pm->suspend(dev); + } else { + ret = ibmebus_bus_legacy_suspend(dev, PMSG_SUSPEND); + } + + return ret; +} + +static int ibmebus_bus_pm_suspend_noirq(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->suspend_noirq) + ret = drv->pm->suspend_noirq(dev); + } + + return ret; +} + +static int ibmebus_bus_pm_resume(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->resume) + ret = drv->pm->resume(dev); + } else { + ret = ibmebus_bus_legacy_resume(dev); + } + + return ret; +} + +static int ibmebus_bus_pm_resume_noirq(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->resume_noirq) + ret = drv->pm->resume_noirq(dev); + } + + return ret; +} + +#else /* !CONFIG_SUSPEND */ + +#define ibmebus_bus_pm_suspend NULL +#define ibmebus_bus_pm_resume NULL +#define ibmebus_bus_pm_suspend_noirq NULL +#define ibmebus_bus_pm_resume_noirq NULL + +#endif /* !CONFIG_SUSPEND */ + +#ifdef CONFIG_HIBERNATE_CALLBACKS + +static int ibmebus_bus_pm_freeze(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->freeze) + ret = drv->pm->freeze(dev); + } else { + ret = ibmebus_bus_legacy_suspend(dev, PMSG_FREEZE); + } + + return ret; +} + +static int ibmebus_bus_pm_freeze_noirq(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->freeze_noirq) + ret = drv->pm->freeze_noirq(dev); + } + + return ret; +} + +static int ibmebus_bus_pm_thaw(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->thaw) + ret = drv->pm->thaw(dev); + } else { + ret = ibmebus_bus_legacy_resume(dev); + } + + return ret; +} + +static int ibmebus_bus_pm_thaw_noirq(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->thaw_noirq) + ret = drv->pm->thaw_noirq(dev); + } + + return ret; +} + +static int ibmebus_bus_pm_poweroff(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->poweroff) + ret = drv->pm->poweroff(dev); + } else { + ret = ibmebus_bus_legacy_suspend(dev, PMSG_HIBERNATE); + } + + return ret; +} + +static int ibmebus_bus_pm_poweroff_noirq(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->poweroff_noirq) + ret = drv->pm->poweroff_noirq(dev); + } + + return ret; +} + +static int ibmebus_bus_pm_restore(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->restore) + ret = drv->pm->restore(dev); + } else { + ret = ibmebus_bus_legacy_resume(dev); + } + + return ret; +} + +static int ibmebus_bus_pm_restore_noirq(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->restore_noirq) + ret = drv->pm->restore_noirq(dev); + } + + return ret; +} + +#else /* !CONFIG_HIBERNATE_CALLBACKS */ + +#define ibmebus_bus_pm_freeze NULL +#define ibmebus_bus_pm_thaw NULL +#define ibmebus_bus_pm_poweroff NULL +#define ibmebus_bus_pm_restore NULL +#define ibmebus_bus_pm_freeze_noirq NULL +#define ibmebus_bus_pm_thaw_noirq NULL +#define ibmebus_bus_pm_poweroff_noirq NULL +#define ibmebus_bus_pm_restore_noirq NULL + +#endif /* !CONFIG_HIBERNATE_CALLBACKS */ + +static struct dev_pm_ops ibmebus_bus_dev_pm_ops = { + .prepare = ibmebus_bus_pm_prepare, + .complete = ibmebus_bus_pm_complete, + .suspend = ibmebus_bus_pm_suspend, + .resume = ibmebus_bus_pm_resume, + .freeze = ibmebus_bus_pm_freeze, + .thaw = ibmebus_bus_pm_thaw, + .poweroff = ibmebus_bus_pm_poweroff, + .restore = ibmebus_bus_pm_restore, + .suspend_noirq = ibmebus_bus_pm_suspend_noirq, + .resume_noirq = ibmebus_bus_pm_resume_noirq, + .freeze_noirq = ibmebus_bus_pm_freeze_noirq, + .thaw_noirq = ibmebus_bus_pm_thaw_noirq, + .poweroff_noirq = ibmebus_bus_pm_poweroff_noirq, + .restore_noirq = ibmebus_bus_pm_restore_noirq, +}; + +#define IBMEBUS_BUS_PM_OPS_PTR (&ibmebus_bus_dev_pm_ops) + +#else /* !CONFIG_PM_SLEEP */ + +#define IBMEBUS_BUS_PM_OPS_PTR NULL + +#endif /* !CONFIG_PM_SLEEP */ + +struct bus_type ibmebus_bus_type = { + .name = "ibmebus", + .uevent = of_device_uevent_modalias, + .bus_attrs = ibmebus_bus_attrs, + .match = ibmebus_bus_bus_match, + .probe = ibmebus_bus_device_probe, + .remove = ibmebus_bus_device_remove, + .shutdown = ibmebus_bus_device_shutdown, + .dev_attrs = ibmebus_bus_device_attrs, + .pm = IBMEBUS_BUS_PM_OPS_PTR, +}; +EXPORT_SYMBOL(ibmebus_bus_type); + +static int __init ibmebus_bus_init(void) +{ + int err; + + printk(KERN_INFO "IBM eBus Device Driver\n"); + + err = bus_register(&ibmebus_bus_type); + if (err) { + printk(KERN_ERR "%s: failed to register IBM eBus.\n", + __func__); + return err; + } + + err = device_register(&ibmebus_bus_device); + if (err) { + printk(KERN_WARNING "%s: device_register returned %i\n", + __func__, err); + bus_unregister(&ibmebus_bus_type); + + return err; + } + + err = ibmebus_create_devices(ibmebus_matches); + if (err) { + device_unregister(&ibmebus_bus_device); + bus_unregister(&ibmebus_bus_type); + return err; + } + + return 0; +} +postcore_initcall(ibmebus_bus_init); diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c new file mode 100644 index 00000000..04d79093 --- /dev/null +++ b/arch/powerpc/kernel/idle.c @@ -0,0 +1,168 @@ +/* + * Idle daemon for PowerPC. Idle daemon will handle any action + * that needs to be taken when the system becomes idle. + * + * Originally written by Cort Dougan (cort@cs.nmt.edu). + * Subsequent 32-bit hacking by Tom Rini, Armin Kuster, + * Paul Mackerras and others. + * + * iSeries supported added by Mike Corrigan <mikejc@us.ibm.com> + * + * Additional shared processor, SMT, and firmware support + * Copyright (c) 2003 Dave Engebretsen <engebret@us.ibm.com> + * + * 32-bit and 64-bit versions merged by Paul Mackerras <paulus@samba.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/smp.h> +#include <linux/cpu.h> +#include <linux/sysctl.h> +#include <linux/tick.h> + +#include <asm/processor.h> +#include <asm/cputable.h> +#include <asm/time.h> +#include <asm/machdep.h> +#include <asm/runlatch.h> +#include <asm/smp.h> + +#ifdef CONFIG_HOTPLUG_CPU +#define cpu_should_die() cpu_is_offline(smp_processor_id()) +#else +#define cpu_should_die() 0 +#endif + +unsigned long cpuidle_disable = IDLE_NO_OVERRIDE; +EXPORT_SYMBOL(cpuidle_disable); + +static int __init powersave_off(char *arg) +{ + ppc_md.power_save = NULL; + cpuidle_disable = IDLE_POWERSAVE_OFF; + return 0; +} +__setup("powersave=off", powersave_off); + +/* + * The body of the idle task. + */ +void cpu_idle(void) +{ + if (ppc_md.idle_loop) + ppc_md.idle_loop(); /* doesn't return */ + + set_thread_flag(TIF_POLLING_NRFLAG); + while (1) { + tick_nohz_idle_enter(); + rcu_idle_enter(); + + while (!need_resched() && !cpu_should_die()) { + ppc64_runlatch_off(); + + if (ppc_md.power_save) { + clear_thread_flag(TIF_POLLING_NRFLAG); + /* + * smp_mb is so clearing of TIF_POLLING_NRFLAG + * is ordered w.r.t. need_resched() test. + */ + smp_mb(); + local_irq_disable(); + + /* Don't trace irqs off for idle */ + stop_critical_timings(); + + /* check again after disabling irqs */ + if (!need_resched() && !cpu_should_die()) + ppc_md.power_save(); + + start_critical_timings(); + + /* Some power_save functions return with + * interrupts enabled, some don't. + */ + if (irqs_disabled()) + local_irq_enable(); + set_thread_flag(TIF_POLLING_NRFLAG); + + } else { + /* + * Go into low thread priority and possibly + * low power mode. + */ + HMT_low(); + HMT_very_low(); + } + } + + HMT_medium(); + ppc64_runlatch_on(); + rcu_idle_exit(); + tick_nohz_idle_exit(); + if (cpu_should_die()) { + sched_preempt_enable_no_resched(); + cpu_die(); + } + schedule_preempt_disabled(); + } +} + +static void do_nothing(void *unused) +{ +} + +/* + * cpu_idle_wait - Used to ensure that all the CPUs come out of the old + * idle loop and start using the new idle loop. + * Required while changing idle handler on SMP systems. + * Caller must have changed idle handler to the new value before the call. + * This window may be larger on shared systems. + */ +void cpu_idle_wait(void) +{ + smp_mb(); + /* kick all the CPUs so that they exit out of pm_idle */ + smp_call_function(do_nothing, NULL, 1); +} +EXPORT_SYMBOL_GPL(cpu_idle_wait); + +int powersave_nap; + +#ifdef CONFIG_SYSCTL +/* + * Register the sysctl to set/clear powersave_nap. + */ +static ctl_table powersave_nap_ctl_table[]={ + { + .procname = "powersave-nap", + .data = &powersave_nap, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + {} +}; +static ctl_table powersave_nap_sysctl_root[] = { + { + .procname = "kernel", + .mode = 0555, + .child = powersave_nap_ctl_table, + }, + {} +}; + +static int __init +register_powersave_nap_sysctl(void) +{ + register_sysctl_table(powersave_nap_sysctl_root); + + return 0; +} +__initcall(register_powersave_nap_sysctl); +#endif diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S new file mode 100644 index 00000000..15c611de --- /dev/null +++ b/arch/powerpc/kernel/idle_6xx.S @@ -0,0 +1,197 @@ +/* + * This file contains the power_save function for 6xx & 7xxx CPUs + * rewritten in assembler + * + * Warning ! This code assumes that if your machine has a 750fx + * it will have PLL 1 set to low speed mode (used during NAP/DOZE). + * if this is not the case some additional changes will have to + * be done to check a runtime var (a bit like powersave-nap) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/threads.h> +#include <asm/reg.h> +#include <asm/page.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> + + .text + +/* + * Init idle, called at early CPU setup time from head.S for each CPU + * Make sure no rest of NAP mode remains in HID0, save default + * values for some CPU specific registers. Called with r24 + * containing CPU number and r3 reloc offset + */ +_GLOBAL(init_idle_6xx) +BEGIN_FTR_SECTION + mfspr r4,SPRN_HID0 + rlwinm r4,r4,0,10,8 /* Clear NAP */ + mtspr SPRN_HID0, r4 + b 1f +END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) + blr +1: + slwi r5,r24,2 + add r5,r5,r3 +BEGIN_FTR_SECTION + mfspr r4,SPRN_MSSCR0 + addis r6,r5, nap_save_msscr0@ha + stw r4,nap_save_msscr0@l(r6) +END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR) +BEGIN_FTR_SECTION + mfspr r4,SPRN_HID1 + addis r6,r5,nap_save_hid1@ha + stw r4,nap_save_hid1@l(r6) +END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX) + blr + +/* + * Here is the power_save_6xx function. This could eventually be + * split into several functions & changing the function pointer + * depending on the various features. + */ +_GLOBAL(ppc6xx_idle) + /* Check if we can nap or doze, put HID0 mask in r3 + */ + lis r3, 0 +BEGIN_FTR_SECTION + lis r3,HID0_DOZE@h +END_FTR_SECTION_IFSET(CPU_FTR_CAN_DOZE) +BEGIN_FTR_SECTION + /* We must dynamically check for the NAP feature as it + * can be cleared by CPU init after the fixups are done + */ + lis r4,cur_cpu_spec@ha + lwz r4,cur_cpu_spec@l(r4) + lwz r4,CPU_SPEC_FEATURES(r4) + andi. r0,r4,CPU_FTR_CAN_NAP + beq 1f + /* Now check if user or arch enabled NAP mode */ + lis r4,powersave_nap@ha + lwz r4,powersave_nap@l(r4) + cmpwi 0,r4,0 + beq 1f + lis r3,HID0_NAP@h +1: +END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) + cmpwi 0,r3,0 + beqlr + + /* Some pre-nap cleanups needed on some CPUs */ + andis. r0,r3,HID0_NAP@h + beq 2f +BEGIN_FTR_SECTION + /* Disable L2 prefetch on some 745x and try to ensure + * L2 prefetch engines are idle. As explained by errata + * text, we can't be sure they are, we just hope very hard + * that well be enough (sic !). At least I noticed Apple + * doesn't even bother doing the dcbf's here... + */ + mfspr r4,SPRN_MSSCR0 + rlwinm r4,r4,0,0,29 + sync + mtspr SPRN_MSSCR0,r4 + sync + isync + lis r4,KERNELBASE@h + dcbf 0,r4 + dcbf 0,r4 + dcbf 0,r4 + dcbf 0,r4 +END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR) +2: +BEGIN_FTR_SECTION + /* Go to low speed mode on some 750FX */ + lis r4,powersave_lowspeed@ha + lwz r4,powersave_lowspeed@l(r4) + cmpwi 0,r4,0 + beq 1f + mfspr r4,SPRN_HID1 + oris r4,r4,0x0001 + mtspr SPRN_HID1,r4 +1: +END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX) + + /* Go to NAP or DOZE now */ + mfspr r4,SPRN_HID0 + lis r5,(HID0_NAP|HID0_SLEEP)@h +BEGIN_FTR_SECTION + oris r5,r5,HID0_DOZE@h +END_FTR_SECTION_IFSET(CPU_FTR_CAN_DOZE) + andc r4,r4,r5 + or r4,r4,r3 +BEGIN_FTR_SECTION + oris r4,r4,HID0_DPM@h /* that should be done once for all */ +END_FTR_SECTION_IFCLR(CPU_FTR_NO_DPM) + mtspr SPRN_HID0,r4 +BEGIN_FTR_SECTION + DSSALL + sync +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + rlwinm r9,r1,0,0,31-THREAD_SHIFT /* current thread_info */ + lwz r8,TI_LOCAL_FLAGS(r9) /* set napping bit */ + ori r8,r8,_TLF_NAPPING /* so when we take an exception */ + stw r8,TI_LOCAL_FLAGS(r9) /* it will return to our caller */ + mfmsr r7 + ori r7,r7,MSR_EE + oris r7,r7,MSR_POW@h +1: sync + mtmsr r7 + isync + b 1b + +/* + * Return from NAP/DOZE mode, restore some CPU specific registers, + * we are called with DR/IR still off and r2 containing physical + * address of current. R11 points to the exception frame (physical + * address). We have to preserve r10. + */ +_GLOBAL(power_save_ppc32_restore) + lwz r9,_LINK(r11) /* interrupted in ppc6xx_idle: */ + stw r9,_NIP(r11) /* make it do a blr */ + +#ifdef CONFIG_SMP + rlwinm r12,r11,0,0,31-THREAD_SHIFT + lwz r11,TI_CPU(r12) /* get cpu number * 4 */ + slwi r11,r11,2 +#else + li r11,0 +#endif + /* Todo make sure all these are in the same page + * and load r11 (@ha part + CPU offset) only once + */ +BEGIN_FTR_SECTION + mfspr r9,SPRN_HID0 + andis. r9,r9,HID0_NAP@h + beq 1f + addis r9,r11,(nap_save_msscr0-KERNELBASE)@ha + lwz r9,nap_save_msscr0@l(r9) + mtspr SPRN_MSSCR0, r9 + sync + isync +1: +END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR) +BEGIN_FTR_SECTION + addis r9,r11,(nap_save_hid1-KERNELBASE)@ha + lwz r9,nap_save_hid1@l(r9) + mtspr SPRN_HID1, r9 +END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX) + b transfer_to_handler_cont + + .data + +_GLOBAL(nap_save_msscr0) + .space 4*NR_CPUS + +_GLOBAL(nap_save_hid1) + .space 4*NR_CPUS + +_GLOBAL(powersave_lowspeed) + .long 0 diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S new file mode 100644 index 00000000..ff007b59 --- /dev/null +++ b/arch/powerpc/kernel/idle_book3e.S @@ -0,0 +1,73 @@ +/* + * Copyright 2010 IBM Corp, Benjamin Herrenschmidt <benh@kernel.crashing.org> + * + * Generic idle routine for Book3E processors + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/threads.h> +#include <asm/reg.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/ppc-opcode.h> +#include <asm/processor.h> +#include <asm/thread_info.h> + +/* 64-bit version only for now */ +#ifdef CONFIG_PPC64 + +_GLOBAL(book3e_idle) + /* Save LR for later */ + mflr r0 + std r0,16(r1) + + /* Hard disable interrupts */ + wrteei 0 + + /* Now check if an interrupt came in while we were soft disabled + * since we may otherwise lose it (doorbells etc...). + */ + lbz r3,PACAIRQHAPPENED(r13) + cmpwi cr0,r3,0 + bnelr + + /* Now we are going to mark ourselves as soft and hard enabled in + * order to be able to take interrupts while asleep. We inform lockdep + * of that. We don't actually turn interrupts on just yet tho. + */ +#ifdef CONFIG_TRACE_IRQFLAGS + stdu r1,-128(r1) + bl .trace_hardirqs_on + addi r1,r1,128 +#endif + li r0,1 + stb r0,PACASOFTIRQEN(r13) + + /* Interrupts will make use return to LR, so get something we want + * in there + */ + bl 1f + + /* And return (interrupts are on) */ + ld r0,16(r1) + mtlr r0 + blr + +1: /* Let's set the _TLF_NAPPING flag so interrupts make us return + * to the right spot + */ + clrrdi r11,r1,THREAD_SHIFT + ld r10,TI_LOCAL_FLAGS(r11) + ori r10,r10,_TLF_NAPPING + std r10,TI_LOCAL_FLAGS(r11) + + /* We can now re-enable hard interrupts and go to sleep */ + wrteei 1 +1: PPC_WAIT(0) + b 1b + +#endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S new file mode 100644 index 00000000..4f0ab85f --- /dev/null +++ b/arch/powerpc/kernel/idle_e500.S @@ -0,0 +1,106 @@ +/* + * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. + * Dave Liu <daveliu@freescale.com> + * copy from idle_6xx.S and modify for e500 based processor, + * implement the power_save function in idle. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/threads.h> +#include <asm/reg.h> +#include <asm/page.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> + + .text + +_GLOBAL(e500_idle) + rlwinm r3,r1,0,0,31-THREAD_SHIFT /* current thread_info */ + lwz r4,TI_LOCAL_FLAGS(r3) /* set napping bit */ + ori r4,r4,_TLF_NAPPING /* so when we take an exception */ + stw r4,TI_LOCAL_FLAGS(r3) /* it will return to our caller */ + +#ifdef CONFIG_PPC_E500MC + wrteei 1 +1: wait + + /* + * Guard against spurious wakeups (e.g. from a hypervisor) -- + * any real interrupt will cause us to return to LR due to + * _TLF_NAPPING. + */ + b 1b +#else + /* Check if we can nap or doze, put HID0 mask in r3 */ + lis r3,0 +BEGIN_FTR_SECTION + lis r3,HID0_DOZE@h +END_FTR_SECTION_IFSET(CPU_FTR_CAN_DOZE) + +BEGIN_FTR_SECTION + /* Now check if user enabled NAP mode */ + lis r4,powersave_nap@ha + lwz r4,powersave_nap@l(r4) + cmpwi 0,r4,0 + beq 1f + stwu r1,-16(r1) + mflr r0 + stw r0,20(r1) + bl flush_dcache_L1 + lwz r0,20(r1) + addi r1,r1,16 + mtlr r0 + lis r3,HID0_NAP@h +END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) +BEGIN_FTR_SECTION + msync + li r7,L2CSR0_L2FL@l + mtspr SPRN_L2CSR0,r7 +2: + mfspr r7,SPRN_L2CSR0 + andi. r4,r7,L2CSR0_L2FL@l + bne 2b +END_FTR_SECTION_IFSET(CPU_FTR_L2CSR|CPU_FTR_CAN_NAP) +1: + /* Go to NAP or DOZE now */ + mfspr r4,SPRN_HID0 + rlwinm r4,r4,0,~(HID0_DOZE|HID0_NAP|HID0_SLEEP) + or r4,r4,r3 + isync + mtspr SPRN_HID0,r4 + isync + + mfmsr r7 + oris r7,r7,MSR_WE@h + ori r7,r7,MSR_EE + msync + mtmsr r7 + isync +2: b 2b +#endif /* !E500MC */ + +/* + * Return from NAP/DOZE mode, restore some CPU specific registers, + * r2 containing physical address of current. + * r11 points to the exception frame (physical address). + * We have to preserve r10. + */ +_GLOBAL(power_save_ppc32_restore) + lwz r9,_LINK(r11) /* interrupted in e500_idle */ + stw r9,_NIP(r11) /* make it do a blr */ + +#ifdef CONFIG_SMP + rlwinm r12,r1,0,0,31-THREAD_SHIFT + lwz r11,TI_CPU(r12) /* get cpu number * 4 */ + slwi r11,r11,2 +#else + li r11,0 +#endif + + b transfer_to_handler_cont diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S new file mode 100644 index 00000000..2c71b0fc --- /dev/null +++ b/arch/powerpc/kernel/idle_power4.S @@ -0,0 +1,73 @@ +/* + * This file contains the power_save function for 970-family CPUs. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/threads.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/irqflags.h> + +#undef DEBUG + + .text + +_GLOBAL(power4_idle) +BEGIN_FTR_SECTION + blr +END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP) + /* Now check if user or arch enabled NAP mode */ + LOAD_REG_ADDRBASE(r3,powersave_nap) + lwz r4,ADDROFF(powersave_nap)(r3) + cmpwi 0,r4,0 + beqlr + + /* Hard disable interrupts */ + mfmsr r7 + rldicl r0,r7,48,1 + rotldi r0,r0,16 + mtmsrd r0,1 + + /* Check if something happened while soft-disabled */ + lbz r0,PACAIRQHAPPENED(r13) + cmpwi cr0,r0,0 + bnelr + + /* Soft-enable interrupts */ +#ifdef CONFIG_TRACE_IRQFLAGS + mflr r0 + std r0,16(r1) + stdu r1,-128(r1) + bl .trace_hardirqs_on + addi r1,r1,128 + ld r0,16(r1) + mtlr r0 + mfmsr r7 +#endif /* CONFIG_TRACE_IRQFLAGS */ + + li r0,1 + stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */ +BEGIN_FTR_SECTION + DSSALL + sync +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + clrrdi r9,r1,THREAD_SHIFT /* current thread_info */ + ld r8,TI_LOCAL_FLAGS(r9) /* set napping bit */ + ori r8,r8,_TLF_NAPPING /* so when we take an exception */ + std r8,TI_LOCAL_FLAGS(r9) /* it will return to our caller */ + ori r7,r7,MSR_EE + oris r7,r7,MSR_POW@h +1: sync + isync + mtmsrd r7 + isync + b 1b + diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S new file mode 100644 index 00000000..0cdc9a39 --- /dev/null +++ b/arch/powerpc/kernel/idle_power7.S @@ -0,0 +1,116 @@ +/* + * This file contains the power_save function for Power7 CPUs. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/threads.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/ppc-opcode.h> +#include <asm/hw_irq.h> + +#undef DEBUG + + .text + +_GLOBAL(power7_idle) + /* Now check if user or arch enabled NAP mode */ + LOAD_REG_ADDRBASE(r3,powersave_nap) + lwz r4,ADDROFF(powersave_nap)(r3) + cmpwi 0,r4,0 + beqlr + + /* NAP is a state loss, we create a regs frame on the + * stack, fill it up with the state we care about and + * stick a pointer to it in PACAR1. We really only + * need to save PC, some CR bits and the NV GPRs, + * but for now an interrupt frame will do. + */ + mflr r0 + std r0,16(r1) + stdu r1,-INT_FRAME_SIZE(r1) + std r0,_LINK(r1) + std r0,_NIP(r1) + +#ifndef CONFIG_SMP + /* Make sure FPU, VSX etc... are flushed as we may lose + * state when going to nap mode + */ + bl .discard_lazy_cpu_state +#endif /* CONFIG_SMP */ + + /* Hard disable interrupts */ + mfmsr r9 + rldicl r9,r9,48,1 + rotldi r9,r9,16 + mtmsrd r9,1 /* hard-disable interrupts */ + + /* Check if something happened while soft-disabled */ + lbz r0,PACAIRQHAPPENED(r13) + cmpwi cr0,r0,0 + beq 1f + addi r1,r1,INT_FRAME_SIZE + ld r0,16(r1) + mtlr r0 + blr + +1: /* We mark irqs hard disabled as this is the state we'll + * be in when returning and we need to tell arch_local_irq_restore() + * about it + */ + li r0,PACA_IRQ_HARD_DIS + stb r0,PACAIRQHAPPENED(r13) + + /* We haven't lost state ... yet */ + li r0,0 + stb r0,PACA_NAPSTATELOST(r13) + + /* Continue saving state */ + SAVE_GPR(2, r1) + SAVE_NVGPRS(r1) + mfcr r3 + std r3,_CCR(r1) + std r9,_MSR(r1) + std r1,PACAR1(r13) + + /* Magic NAP mode enter sequence */ + std r0,0(r1) + ptesync + ld r0,0(r1) +1: cmp cr0,r0,r0 + bne 1b + PPC_NAP + b . + +_GLOBAL(power7_wakeup_loss) + ld r1,PACAR1(r13) + REST_NVGPRS(r1) + REST_GPR(2, r1) + ld r3,_CCR(r1) + ld r4,_MSR(r1) + ld r5,_NIP(r1) + addi r1,r1,INT_FRAME_SIZE + mtcr r3 + mtspr SPRN_SRR1,r4 + mtspr SPRN_SRR0,r5 + rfid + +_GLOBAL(power7_wakeup_noloss) + lbz r0,PACA_NAPSTATELOST(r13) + cmpwi r0,0 + bne .power7_wakeup_loss + ld r1,PACAR1(r13) + ld r4,_MSR(r1) + ld r5,_NIP(r1) + addi r1,r1,INT_FRAME_SIZE + mtspr SPRN_SRR1,r4 + mtspr SPRN_SRR0,r5 + rfid diff --git a/arch/powerpc/kernel/init_task.c b/arch/powerpc/kernel/init_task.c new file mode 100644 index 00000000..d076d465 --- /dev/null +++ b/arch/powerpc/kernel/init_task.c @@ -0,0 +1,29 @@ +#include <linux/mm.h> +#include <linux/export.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/init_task.h> +#include <linux/fs.h> +#include <linux/mqueue.h> +#include <asm/uaccess.h> + +static struct signal_struct init_signals = INIT_SIGNALS(init_signals); +static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); +/* + * Initial thread structure. + * + * We need to make sure that this is 16384-byte aligned due to the + * way process stacks are handled. This is done by having a special + * "init_task" linker map entry.. + */ +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; + +/* + * Initial task structure. + * + * All other task structs will be allocated on slabs in fork.c + */ +struct task_struct init_task = INIT_TASK(init_task); + +EXPORT_SYMBOL(init_task); diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c new file mode 100644 index 00000000..12d329bc --- /dev/null +++ b/arch/powerpc/kernel/io-workarounds.c @@ -0,0 +1,189 @@ +/* + * Support PCI IO workaround + * + * Copyright (C) 2006 Benjamin Herrenschmidt <benh@kernel.crashing.org> + * IBM, Corp. + * (C) Copyright 2007-2008 TOSHIBA CORPORATION + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#undef DEBUG + +#include <linux/kernel.h> +#include <linux/sched.h> /* for init_mm */ + +#include <asm/io.h> +#include <asm/machdep.h> +#include <asm/pgtable.h> +#include <asm/ppc-pci.h> +#include <asm/io-workarounds.h> + +#define IOWA_MAX_BUS 8 + +static struct iowa_bus iowa_busses[IOWA_MAX_BUS]; +static unsigned int iowa_bus_count; + +static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr) +{ + int i, j; + struct resource *res; + unsigned long vstart, vend; + + for (i = 0; i < iowa_bus_count; i++) { + struct iowa_bus *bus = &iowa_busses[i]; + struct pci_controller *phb = bus->phb; + + if (vaddr) { + vstart = (unsigned long)phb->io_base_virt; + vend = vstart + phb->pci_io_size - 1; + if ((vaddr >= vstart) && (vaddr <= vend)) + return bus; + } + + if (paddr) + for (j = 0; j < 3; j++) { + res = &phb->mem_resources[j]; + if (paddr >= res->start && paddr <= res->end) + return bus; + } + } + + return NULL; +} + +struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) +{ + struct iowa_bus *bus; + int token; + + token = PCI_GET_ADDR_TOKEN(addr); + + if (token && token <= iowa_bus_count) + bus = &iowa_busses[token - 1]; + else { + unsigned long vaddr, paddr; + pte_t *ptep; + + vaddr = (unsigned long)PCI_FIX_ADDR(addr); + if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END) + return NULL; + + ptep = find_linux_pte(init_mm.pgd, vaddr); + if (ptep == NULL) + paddr = 0; + else + paddr = pte_pfn(*ptep) << PAGE_SHIFT; + bus = iowa_pci_find(vaddr, paddr); + + if (bus == NULL) + return NULL; + } + + return bus; +} + +struct iowa_bus *iowa_pio_find_bus(unsigned long port) +{ + unsigned long vaddr = (unsigned long)pci_io_base + port; + return iowa_pci_find(vaddr, 0); +} + + +#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) \ +static ret iowa_##name at \ +{ \ + struct iowa_bus *bus; \ + bus = iowa_##space##_find_bus(aa); \ + if (bus && bus->ops && bus->ops->name) \ + return bus->ops->name al; \ + return __do_##name al; \ +} + +#define DEF_PCI_AC_NORET(name, at, al, space, aa) \ +static void iowa_##name at \ +{ \ + struct iowa_bus *bus; \ + bus = iowa_##space##_find_bus(aa); \ + if (bus && bus->ops && bus->ops->name) { \ + bus->ops->name al; \ + return; \ + } \ + __do_##name al; \ +} + +#include <asm/io-defs.h> + +#undef DEF_PCI_AC_RET +#undef DEF_PCI_AC_NORET + +static const struct ppc_pci_io __devinitconst iowa_pci_io = { + +#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) .name = iowa_##name, +#define DEF_PCI_AC_NORET(name, at, al, space, aa) .name = iowa_##name, + +#include <asm/io-defs.h> + +#undef DEF_PCI_AC_RET +#undef DEF_PCI_AC_NORET + +}; + +static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size, + unsigned long flags, void *caller) +{ + struct iowa_bus *bus; + void __iomem *res = __ioremap_caller(addr, size, flags, caller); + int busno; + + bus = iowa_pci_find(0, (unsigned long)addr); + if (bus != NULL) { + busno = bus - iowa_busses; + PCI_SET_ADDR_TOKEN(res, busno + 1); + } + return res; +} + +/* Enable IO workaround */ +static void __devinit io_workaround_init(void) +{ + static int io_workaround_inited; + + if (io_workaround_inited) + return; + ppc_pci_io = iowa_pci_io; + ppc_md.ioremap = iowa_ioremap; + io_workaround_inited = 1; +} + +/* Register new bus to support workaround */ +void __devinit iowa_register_bus(struct pci_controller *phb, + struct ppc_pci_io *ops, + int (*initfunc)(struct iowa_bus *, void *), void *data) +{ + struct iowa_bus *bus; + struct device_node *np = phb->dn; + + io_workaround_init(); + + if (iowa_bus_count >= IOWA_MAX_BUS) { + pr_err("IOWA:Too many pci bridges, " + "workarounds disabled for %s\n", np->full_name); + return; + } + + bus = &iowa_busses[iowa_bus_count]; + bus->phb = phb; + bus->ops = ops; + bus->private = data; + + if (initfunc) + if ((*initfunc)(bus, data)) + return; + + iowa_bus_count++; + + pr_debug("IOWA:[%d]Add bus, %s.\n", iowa_bus_count-1, np->full_name); +} + diff --git a/arch/powerpc/kernel/io.c b/arch/powerpc/kernel/io.c new file mode 100644 index 00000000..886381f3 --- /dev/null +++ b/arch/powerpc/kernel/io.c @@ -0,0 +1,207 @@ +/* + * I/O string operations + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * Copyright (C) 2006 IBM Corporation + * + * Largely rewritten by Cort Dougan (cort@cs.nmt.edu) + * and Paul Mackerras. + * + * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com) + * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com) + * + * Rewritten in C by Stephen Rothwell. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/compiler.h> +#include <linux/export.h> + +#include <asm/io.h> +#include <asm/firmware.h> +#include <asm/bug.h> + +void _insb(const volatile u8 __iomem *port, void *buf, long count) +{ + u8 *tbuf = buf; + u8 tmp; + + if (unlikely(count <= 0)) + return; + asm volatile("sync"); + do { + tmp = *port; + eieio(); + *tbuf++ = tmp; + } while (--count != 0); + asm volatile("twi 0,%0,0; isync" : : "r" (tmp)); +} +EXPORT_SYMBOL(_insb); + +void _outsb(volatile u8 __iomem *port, const void *buf, long count) +{ + const u8 *tbuf = buf; + + if (unlikely(count <= 0)) + return; + asm volatile("sync"); + do { + *port = *tbuf++; + } while (--count != 0); + asm volatile("sync"); +} +EXPORT_SYMBOL(_outsb); + +void _insw_ns(const volatile u16 __iomem *port, void *buf, long count) +{ + u16 *tbuf = buf; + u16 tmp; + + if (unlikely(count <= 0)) + return; + asm volatile("sync"); + do { + tmp = *port; + eieio(); + *tbuf++ = tmp; + } while (--count != 0); + asm volatile("twi 0,%0,0; isync" : : "r" (tmp)); +} +EXPORT_SYMBOL(_insw_ns); + +void _outsw_ns(volatile u16 __iomem *port, const void *buf, long count) +{ + const u16 *tbuf = buf; + + if (unlikely(count <= 0)) + return; + asm volatile("sync"); + do { + *port = *tbuf++; + } while (--count != 0); + asm volatile("sync"); +} +EXPORT_SYMBOL(_outsw_ns); + +void _insl_ns(const volatile u32 __iomem *port, void *buf, long count) +{ + u32 *tbuf = buf; + u32 tmp; + + if (unlikely(count <= 0)) + return; + asm volatile("sync"); + do { + tmp = *port; + eieio(); + *tbuf++ = tmp; + } while (--count != 0); + asm volatile("twi 0,%0,0; isync" : : "r" (tmp)); +} +EXPORT_SYMBOL(_insl_ns); + +void _outsl_ns(volatile u32 __iomem *port, const void *buf, long count) +{ + const u32 *tbuf = buf; + + if (unlikely(count <= 0)) + return; + asm volatile("sync"); + do { + *port = *tbuf++; + } while (--count != 0); + asm volatile("sync"); +} +EXPORT_SYMBOL(_outsl_ns); + +#define IO_CHECK_ALIGN(v,a) ((((unsigned long)(v)) & ((a) - 1)) == 0) + +notrace void +_memset_io(volatile void __iomem *addr, int c, unsigned long n) +{ + void *p = (void __force *)addr; + u32 lc = c; + lc |= lc << 8; + lc |= lc << 16; + + __asm__ __volatile__ ("sync" : : : "memory"); + while(n && !IO_CHECK_ALIGN(p, 4)) { + *((volatile u8 *)p) = c; + p++; + n--; + } + while(n >= 4) { + *((volatile u32 *)p) = lc; + p += 4; + n -= 4; + } + while(n) { + *((volatile u8 *)p) = c; + p++; + n--; + } + __asm__ __volatile__ ("sync" : : : "memory"); +} +EXPORT_SYMBOL(_memset_io); + +void _memcpy_fromio(void *dest, const volatile void __iomem *src, + unsigned long n) +{ + void *vsrc = (void __force *) src; + + __asm__ __volatile__ ("sync" : : : "memory"); + while(n && (!IO_CHECK_ALIGN(vsrc, 4) || !IO_CHECK_ALIGN(dest, 4))) { + *((u8 *)dest) = *((volatile u8 *)vsrc); + eieio(); + vsrc++; + dest++; + n--; + } + while(n >= 4) { + *((u32 *)dest) = *((volatile u32 *)vsrc); + eieio(); + vsrc += 4; + dest += 4; + n -= 4; + } + while(n) { + *((u8 *)dest) = *((volatile u8 *)vsrc); + eieio(); + vsrc++; + dest++; + n--; + } + __asm__ __volatile__ ("sync" : : : "memory"); +} +EXPORT_SYMBOL(_memcpy_fromio); + +void _memcpy_toio(volatile void __iomem *dest, const void *src, unsigned long n) +{ + void *vdest = (void __force *) dest; + + __asm__ __volatile__ ("sync" : : : "memory"); + while(n && (!IO_CHECK_ALIGN(vdest, 4) || !IO_CHECK_ALIGN(src, 4))) { + *((volatile u8 *)vdest) = *((u8 *)src); + src++; + vdest++; + n--; + } + while(n >= 4) { + *((volatile u32 *)vdest) = *((volatile u32 *)src); + src += 4; + vdest += 4; + n-=4; + } + while(n) { + *((volatile u8 *)vdest) = *((u8 *)src); + src++; + vdest++; + n--; + } + __asm__ __volatile__ ("sync" : : : "memory"); +} +EXPORT_SYMBOL(_memcpy_toio); diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c new file mode 100644 index 00000000..97a3715a --- /dev/null +++ b/arch/powerpc/kernel/iomap.c @@ -0,0 +1,132 @@ +/* + * ppc64 "iomap" interface implementation. + * + * (C) Copyright 2004 Linus Torvalds + */ +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/mm.h> +#include <linux/export.h> +#include <asm/io.h> +#include <asm/pci-bridge.h> + +/* + * Here comes the ppc64 implementation of the IOMAP + * interfaces. + */ +unsigned int ioread8(void __iomem *addr) +{ + return readb(addr); +} +unsigned int ioread16(void __iomem *addr) +{ + return readw(addr); +} +unsigned int ioread16be(void __iomem *addr) +{ + return in_be16(addr); +} +unsigned int ioread32(void __iomem *addr) +{ + return readl(addr); +} +unsigned int ioread32be(void __iomem *addr) +{ + return in_be32(addr); +} +EXPORT_SYMBOL(ioread8); +EXPORT_SYMBOL(ioread16); +EXPORT_SYMBOL(ioread16be); +EXPORT_SYMBOL(ioread32); +EXPORT_SYMBOL(ioread32be); + +void iowrite8(u8 val, void __iomem *addr) +{ + writeb(val, addr); +} +void iowrite16(u16 val, void __iomem *addr) +{ + writew(val, addr); +} +void iowrite16be(u16 val, void __iomem *addr) +{ + out_be16(addr, val); +} +void iowrite32(u32 val, void __iomem *addr) +{ + writel(val, addr); +} +void iowrite32be(u32 val, void __iomem *addr) +{ + out_be32(addr, val); +} +EXPORT_SYMBOL(iowrite8); +EXPORT_SYMBOL(iowrite16); +EXPORT_SYMBOL(iowrite16be); +EXPORT_SYMBOL(iowrite32); +EXPORT_SYMBOL(iowrite32be); + +/* + * These are the "repeat read/write" functions. Note the + * non-CPU byte order. We do things in "IO byteorder" + * here. + * + * FIXME! We could make these do EEH handling if we really + * wanted. Not clear if we do. + */ +void ioread8_rep(void __iomem *addr, void *dst, unsigned long count) +{ + _insb((u8 __iomem *) addr, dst, count); +} +void ioread16_rep(void __iomem *addr, void *dst, unsigned long count) +{ + _insw_ns((u16 __iomem *) addr, dst, count); +} +void ioread32_rep(void __iomem *addr, void *dst, unsigned long count) +{ + _insl_ns((u32 __iomem *) addr, dst, count); +} +EXPORT_SYMBOL(ioread8_rep); +EXPORT_SYMBOL(ioread16_rep); +EXPORT_SYMBOL(ioread32_rep); + +void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count) +{ + _outsb((u8 __iomem *) addr, src, count); +} +void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count) +{ + _outsw_ns((u16 __iomem *) addr, src, count); +} +void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count) +{ + _outsl_ns((u32 __iomem *) addr, src, count); +} +EXPORT_SYMBOL(iowrite8_rep); +EXPORT_SYMBOL(iowrite16_rep); +EXPORT_SYMBOL(iowrite32_rep); + +void __iomem *ioport_map(unsigned long port, unsigned int len) +{ + return (void __iomem *) (port + _IO_BASE); +} + +void ioport_unmap(void __iomem *addr) +{ + /* Nothing to do */ +} +EXPORT_SYMBOL(ioport_map); +EXPORT_SYMBOL(ioport_unmap); + +#ifdef CONFIG_PCI +void pci_iounmap(struct pci_dev *dev, void __iomem *addr) +{ + if (isa_vaddr_is_ioport(addr)) + return; + if (pcibios_vaddr_is_ioport(addr)) + return; + iounmap(addr); +} + +EXPORT_SYMBOL(pci_iounmap); +#endif /* CONFIG_PCI */ diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c new file mode 100644 index 00000000..359f0785 --- /dev/null +++ b/arch/powerpc/kernel/iommu.c @@ -0,0 +1,685 @@ +/* + * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation + * + * Rewrite, cleanup, new allocation schemes, virtual merging: + * Copyright (C) 2004 Olof Johansson, IBM Corporation + * and Ben. Herrenschmidt, IBM Corporation + * + * Dynamic DMA mapping support, bus-independent parts. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +#include <linux/init.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/spinlock.h> +#include <linux/string.h> +#include <linux/dma-mapping.h> +#include <linux/bitmap.h> +#include <linux/iommu-helper.h> +#include <linux/crash_dump.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/iommu.h> +#include <asm/pci-bridge.h> +#include <asm/machdep.h> +#include <asm/kdump.h> +#include <asm/fadump.h> + +#define DBG(...) + +static int novmerge; + +static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int); + +static int __init setup_iommu(char *str) +{ + if (!strcmp(str, "novmerge")) + novmerge = 1; + else if (!strcmp(str, "vmerge")) + novmerge = 0; + return 1; +} + +__setup("iommu=", setup_iommu); + +static unsigned long iommu_range_alloc(struct device *dev, + struct iommu_table *tbl, + unsigned long npages, + unsigned long *handle, + unsigned long mask, + unsigned int align_order) +{ + unsigned long n, end, start; + unsigned long limit; + int largealloc = npages > 15; + int pass = 0; + unsigned long align_mask; + unsigned long boundary_size; + + align_mask = 0xffffffffffffffffl >> (64 - align_order); + + /* This allocator was derived from x86_64's bit string search */ + + /* Sanity check */ + if (unlikely(npages == 0)) { + if (printk_ratelimit()) + WARN_ON(1); + return DMA_ERROR_CODE; + } + + if (handle && *handle) + start = *handle; + else + start = largealloc ? tbl->it_largehint : tbl->it_hint; + + /* Use only half of the table for small allocs (15 pages or less) */ + limit = largealloc ? tbl->it_size : tbl->it_halfpoint; + + if (largealloc && start < tbl->it_halfpoint) + start = tbl->it_halfpoint; + + /* The case below can happen if we have a small segment appended + * to a large, or when the previous alloc was at the very end of + * the available space. If so, go back to the initial start. + */ + if (start >= limit) + start = largealloc ? tbl->it_largehint : tbl->it_hint; + + again: + + if (limit + tbl->it_offset > mask) { + limit = mask - tbl->it_offset + 1; + /* If we're constrained on address range, first try + * at the masked hint to avoid O(n) search complexity, + * but on second pass, start at 0. + */ + if ((start & mask) >= limit || pass > 0) + start = 0; + else + start &= mask; + } + + if (dev) + boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, + 1 << IOMMU_PAGE_SHIFT); + else + boundary_size = ALIGN(1UL << 32, 1 << IOMMU_PAGE_SHIFT); + /* 4GB boundary for iseries_hv_alloc and iseries_hv_map */ + + n = iommu_area_alloc(tbl->it_map, limit, start, npages, + tbl->it_offset, boundary_size >> IOMMU_PAGE_SHIFT, + align_mask); + if (n == -1) { + if (likely(pass < 2)) { + /* First failure, just rescan the half of the table. + * Second failure, rescan the other half of the table. + */ + start = (largealloc ^ pass) ? tbl->it_halfpoint : 0; + limit = pass ? tbl->it_size : limit; + pass++; + goto again; + } else { + /* Third failure, give up */ + return DMA_ERROR_CODE; + } + } + + end = n + npages; + + /* Bump the hint to a new block for small allocs. */ + if (largealloc) { + /* Don't bump to new block to avoid fragmentation */ + tbl->it_largehint = end; + } else { + /* Overflow will be taken care of at the next allocation */ + tbl->it_hint = (end + tbl->it_blocksize - 1) & + ~(tbl->it_blocksize - 1); + } + + /* Update handle for SG allocations */ + if (handle) + *handle = end; + + return n; +} + +static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, + void *page, unsigned int npages, + enum dma_data_direction direction, + unsigned long mask, unsigned int align_order, + struct dma_attrs *attrs) +{ + unsigned long entry, flags; + dma_addr_t ret = DMA_ERROR_CODE; + int build_fail; + + spin_lock_irqsave(&(tbl->it_lock), flags); + + entry = iommu_range_alloc(dev, tbl, npages, NULL, mask, align_order); + + if (unlikely(entry == DMA_ERROR_CODE)) { + spin_unlock_irqrestore(&(tbl->it_lock), flags); + return DMA_ERROR_CODE; + } + + entry += tbl->it_offset; /* Offset into real TCE table */ + ret = entry << IOMMU_PAGE_SHIFT; /* Set the return dma address */ + + /* Put the TCEs in the HW table */ + build_fail = ppc_md.tce_build(tbl, entry, npages, + (unsigned long)page & IOMMU_PAGE_MASK, + direction, attrs); + + /* ppc_md.tce_build() only returns non-zero for transient errors. + * Clean up the table bitmap in this case and return + * DMA_ERROR_CODE. For all other errors the functionality is + * not altered. + */ + if (unlikely(build_fail)) { + __iommu_free(tbl, ret, npages); + + spin_unlock_irqrestore(&(tbl->it_lock), flags); + return DMA_ERROR_CODE; + } + + /* Flush/invalidate TLB caches if necessary */ + if (ppc_md.tce_flush) + ppc_md.tce_flush(tbl); + + spin_unlock_irqrestore(&(tbl->it_lock), flags); + + /* Make sure updates are seen by hardware */ + mb(); + + return ret; +} + +static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, + unsigned int npages) +{ + unsigned long entry, free_entry; + + entry = dma_addr >> IOMMU_PAGE_SHIFT; + free_entry = entry - tbl->it_offset; + + if (((free_entry + npages) > tbl->it_size) || + (entry < tbl->it_offset)) { + if (printk_ratelimit()) { + printk(KERN_INFO "iommu_free: invalid entry\n"); + printk(KERN_INFO "\tentry = 0x%lx\n", entry); + printk(KERN_INFO "\tdma_addr = 0x%llx\n", (u64)dma_addr); + printk(KERN_INFO "\tTable = 0x%llx\n", (u64)tbl); + printk(KERN_INFO "\tbus# = 0x%llx\n", (u64)tbl->it_busno); + printk(KERN_INFO "\tsize = 0x%llx\n", (u64)tbl->it_size); + printk(KERN_INFO "\tstartOff = 0x%llx\n", (u64)tbl->it_offset); + printk(KERN_INFO "\tindex = 0x%llx\n", (u64)tbl->it_index); + WARN_ON(1); + } + return; + } + + ppc_md.tce_free(tbl, entry, npages); + bitmap_clear(tbl->it_map, free_entry, npages); +} + +static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, + unsigned int npages) +{ + unsigned long flags; + + spin_lock_irqsave(&(tbl->it_lock), flags); + + __iommu_free(tbl, dma_addr, npages); + + /* Make sure TLB cache is flushed if the HW needs it. We do + * not do an mb() here on purpose, it is not needed on any of + * the current platforms. + */ + if (ppc_md.tce_flush) + ppc_md.tce_flush(tbl); + + spin_unlock_irqrestore(&(tbl->it_lock), flags); +} + +int iommu_map_sg(struct device *dev, struct iommu_table *tbl, + struct scatterlist *sglist, int nelems, + unsigned long mask, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + dma_addr_t dma_next = 0, dma_addr; + unsigned long flags; + struct scatterlist *s, *outs, *segstart; + int outcount, incount, i, build_fail = 0; + unsigned int align; + unsigned long handle; + unsigned int max_seg_size; + + BUG_ON(direction == DMA_NONE); + + if ((nelems == 0) || !tbl) + return 0; + + outs = s = segstart = &sglist[0]; + outcount = 1; + incount = nelems; + handle = 0; + + /* Init first segment length for backout at failure */ + outs->dma_length = 0; + + DBG("sg mapping %d elements:\n", nelems); + + spin_lock_irqsave(&(tbl->it_lock), flags); + + max_seg_size = dma_get_max_seg_size(dev); + for_each_sg(sglist, s, nelems, i) { + unsigned long vaddr, npages, entry, slen; + + slen = s->length; + /* Sanity check */ + if (slen == 0) { + dma_next = 0; + continue; + } + /* Allocate iommu entries for that segment */ + vaddr = (unsigned long) sg_virt(s); + npages = iommu_num_pages(vaddr, slen, IOMMU_PAGE_SIZE); + align = 0; + if (IOMMU_PAGE_SHIFT < PAGE_SHIFT && slen >= PAGE_SIZE && + (vaddr & ~PAGE_MASK) == 0) + align = PAGE_SHIFT - IOMMU_PAGE_SHIFT; + entry = iommu_range_alloc(dev, tbl, npages, &handle, + mask >> IOMMU_PAGE_SHIFT, align); + + DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen); + + /* Handle failure */ + if (unlikely(entry == DMA_ERROR_CODE)) { + if (printk_ratelimit()) + dev_info(dev, "iommu_alloc failed, tbl %p " + "vaddr %lx npages %lu\n", tbl, vaddr, + npages); + goto failure; + } + + /* Convert entry to a dma_addr_t */ + entry += tbl->it_offset; + dma_addr = entry << IOMMU_PAGE_SHIFT; + dma_addr |= (s->offset & ~IOMMU_PAGE_MASK); + + DBG(" - %lu pages, entry: %lx, dma_addr: %lx\n", + npages, entry, dma_addr); + + /* Insert into HW table */ + build_fail = ppc_md.tce_build(tbl, entry, npages, + vaddr & IOMMU_PAGE_MASK, + direction, attrs); + if(unlikely(build_fail)) + goto failure; + + /* If we are in an open segment, try merging */ + if (segstart != s) { + DBG(" - trying merge...\n"); + /* We cannot merge if: + * - allocated dma_addr isn't contiguous to previous allocation + */ + if (novmerge || (dma_addr != dma_next) || + (outs->dma_length + s->length > max_seg_size)) { + /* Can't merge: create a new segment */ + segstart = s; + outcount++; + outs = sg_next(outs); + DBG(" can't merge, new segment.\n"); + } else { + outs->dma_length += s->length; + DBG(" merged, new len: %ux\n", outs->dma_length); + } + } + + if (segstart == s) { + /* This is a new segment, fill entries */ + DBG(" - filling new segment.\n"); + outs->dma_address = dma_addr; + outs->dma_length = slen; + } + + /* Calculate next page pointer for contiguous check */ + dma_next = dma_addr + slen; + + DBG(" - dma next is: %lx\n", dma_next); + } + + /* Flush/invalidate TLB caches if necessary */ + if (ppc_md.tce_flush) + ppc_md.tce_flush(tbl); + + spin_unlock_irqrestore(&(tbl->it_lock), flags); + + DBG("mapped %d elements:\n", outcount); + + /* For the sake of iommu_unmap_sg, we clear out the length in the + * next entry of the sglist if we didn't fill the list completely + */ + if (outcount < incount) { + outs = sg_next(outs); + outs->dma_address = DMA_ERROR_CODE; + outs->dma_length = 0; + } + + /* Make sure updates are seen by hardware */ + mb(); + + return outcount; + + failure: + for_each_sg(sglist, s, nelems, i) { + if (s->dma_length != 0) { + unsigned long vaddr, npages; + + vaddr = s->dma_address & IOMMU_PAGE_MASK; + npages = iommu_num_pages(s->dma_address, s->dma_length, + IOMMU_PAGE_SIZE); + __iommu_free(tbl, vaddr, npages); + s->dma_address = DMA_ERROR_CODE; + s->dma_length = 0; + } + if (s == outs) + break; + } + spin_unlock_irqrestore(&(tbl->it_lock), flags); + return 0; +} + + +void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, + int nelems, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + struct scatterlist *sg; + unsigned long flags; + + BUG_ON(direction == DMA_NONE); + + if (!tbl) + return; + + spin_lock_irqsave(&(tbl->it_lock), flags); + + sg = sglist; + while (nelems--) { + unsigned int npages; + dma_addr_t dma_handle = sg->dma_address; + + if (sg->dma_length == 0) + break; + npages = iommu_num_pages(dma_handle, sg->dma_length, + IOMMU_PAGE_SIZE); + __iommu_free(tbl, dma_handle, npages); + sg = sg_next(sg); + } + + /* Flush/invalidate TLBs if necessary. As for iommu_free(), we + * do not do an mb() here, the affected platforms do not need it + * when freeing. + */ + if (ppc_md.tce_flush) + ppc_md.tce_flush(tbl); + + spin_unlock_irqrestore(&(tbl->it_lock), flags); +} + +static void iommu_table_clear(struct iommu_table *tbl) +{ + /* + * In case of firmware assisted dump system goes through clean + * reboot process at the time of system crash. Hence it's safe to + * clear the TCE entries if firmware assisted dump is active. + */ + if (!is_kdump_kernel() || is_fadump_active()) { + /* Clear the table in case firmware left allocations in it */ + ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size); + return; + } + +#ifdef CONFIG_CRASH_DUMP + if (ppc_md.tce_get) { + unsigned long index, tceval, tcecount = 0; + + /* Reserve the existing mappings left by the first kernel. */ + for (index = 0; index < tbl->it_size; index++) { + tceval = ppc_md.tce_get(tbl, index + tbl->it_offset); + /* + * Freed TCE entry contains 0x7fffffffffffffff on JS20 + */ + if (tceval && (tceval != 0x7fffffffffffffffUL)) { + __set_bit(index, tbl->it_map); + tcecount++; + } + } + + if ((tbl->it_size - tcecount) < KDUMP_MIN_TCE_ENTRIES) { + printk(KERN_WARNING "TCE table is full; freeing "); + printk(KERN_WARNING "%d entries for the kdump boot\n", + KDUMP_MIN_TCE_ENTRIES); + for (index = tbl->it_size - KDUMP_MIN_TCE_ENTRIES; + index < tbl->it_size; index++) + __clear_bit(index, tbl->it_map); + } + } +#endif +} + +/* + * Build a iommu_table structure. This contains a bit map which + * is used to manage allocation of the tce space. + */ +struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) +{ + unsigned long sz; + static int welcomed = 0; + struct page *page; + + /* Set aside 1/4 of the table for large allocations. */ + tbl->it_halfpoint = tbl->it_size * 3 / 4; + + /* number of bytes needed for the bitmap */ + sz = (tbl->it_size + 7) >> 3; + + page = alloc_pages_node(nid, GFP_ATOMIC, get_order(sz)); + if (!page) + panic("iommu_init_table: Can't allocate %ld bytes\n", sz); + tbl->it_map = page_address(page); + memset(tbl->it_map, 0, sz); + + /* + * Reserve page 0 so it will not be used for any mappings. + * This avoids buggy drivers that consider page 0 to be invalid + * to crash the machine or even lose data. + */ + if (tbl->it_offset == 0) + set_bit(0, tbl->it_map); + + tbl->it_hint = 0; + tbl->it_largehint = tbl->it_halfpoint; + spin_lock_init(&tbl->it_lock); + + iommu_table_clear(tbl); + + if (!welcomed) { + printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n", + novmerge ? "disabled" : "enabled"); + welcomed = 1; + } + + return tbl; +} + +void iommu_free_table(struct iommu_table *tbl, const char *node_name) +{ + unsigned long bitmap_sz, i; + unsigned int order; + + if (!tbl || !tbl->it_map) { + printk(KERN_ERR "%s: expected TCE map for %s\n", __func__, + node_name); + return; + } + + /* verify that table contains no entries */ + /* it_size is in entries, and we're examining 64 at a time */ + for (i = 0; i < (tbl->it_size/64); i++) { + if (tbl->it_map[i] != 0) { + printk(KERN_WARNING "%s: Unexpected TCEs for %s\n", + __func__, node_name); + break; + } + } + + /* calculate bitmap size in bytes */ + bitmap_sz = (tbl->it_size + 7) / 8; + + /* free bitmap */ + order = get_order(bitmap_sz); + free_pages((unsigned long) tbl->it_map, order); + + /* free table */ + kfree(tbl); +} + +/* Creates TCEs for a user provided buffer. The user buffer must be + * contiguous real kernel storage (not vmalloc). The address passed here + * comprises a page address and offset into that page. The dma_addr_t + * returned will point to the same byte within the page as was passed in. + */ +dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, + struct page *page, unsigned long offset, size_t size, + unsigned long mask, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + dma_addr_t dma_handle = DMA_ERROR_CODE; + void *vaddr; + unsigned long uaddr; + unsigned int npages, align; + + BUG_ON(direction == DMA_NONE); + + vaddr = page_address(page) + offset; + uaddr = (unsigned long)vaddr; + npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE); + + if (tbl) { + align = 0; + if (IOMMU_PAGE_SHIFT < PAGE_SHIFT && size >= PAGE_SIZE && + ((unsigned long)vaddr & ~PAGE_MASK) == 0) + align = PAGE_SHIFT - IOMMU_PAGE_SHIFT; + + dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction, + mask >> IOMMU_PAGE_SHIFT, align, + attrs); + if (dma_handle == DMA_ERROR_CODE) { + if (printk_ratelimit()) { + dev_info(dev, "iommu_alloc failed, tbl %p " + "vaddr %p npages %d\n", tbl, vaddr, + npages); + } + } else + dma_handle |= (uaddr & ~IOMMU_PAGE_MASK); + } + + return dma_handle; +} + +void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle, + size_t size, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + unsigned int npages; + + BUG_ON(direction == DMA_NONE); + + if (tbl) { + npages = iommu_num_pages(dma_handle, size, IOMMU_PAGE_SIZE); + iommu_free(tbl, dma_handle, npages); + } +} + +/* Allocates a contiguous real buffer and creates mappings over it. + * Returns the virtual address of the buffer and sets dma_handle + * to the dma address (mapping) of the first page. + */ +void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, + size_t size, dma_addr_t *dma_handle, + unsigned long mask, gfp_t flag, int node) +{ + void *ret = NULL; + dma_addr_t mapping; + unsigned int order; + unsigned int nio_pages, io_order; + struct page *page; + + size = PAGE_ALIGN(size); + order = get_order(size); + + /* + * Client asked for way too much space. This is checked later + * anyway. It is easier to debug here for the drivers than in + * the tce tables. + */ + if (order >= IOMAP_MAX_ORDER) { + dev_info(dev, "iommu_alloc_consistent size too large: 0x%lx\n", + size); + return NULL; + } + + if (!tbl) + return NULL; + + /* Alloc enough pages (and possibly more) */ + page = alloc_pages_node(node, flag, order); + if (!page) + return NULL; + ret = page_address(page); + memset(ret, 0, size); + + /* Set up tces to cover the allocated range */ + nio_pages = size >> IOMMU_PAGE_SHIFT; + io_order = get_iommu_order(size); + mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL, + mask >> IOMMU_PAGE_SHIFT, io_order, NULL); + if (mapping == DMA_ERROR_CODE) { + free_pages((unsigned long)ret, order); + return NULL; + } + *dma_handle = mapping; + return ret; +} + +void iommu_free_coherent(struct iommu_table *tbl, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + if (tbl) { + unsigned int nio_pages; + + size = PAGE_ALIGN(size); + nio_pages = size >> IOMMU_PAGE_SHIFT; + iommu_free(tbl, dma_handle, nio_pages); + size = PAGE_ALIGN(size); + free_pages((unsigned long)vaddr, get_order(size)); + } +} diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c new file mode 100644 index 00000000..d7ebc58c --- /dev/null +++ b/arch/powerpc/kernel/irq.c @@ -0,0 +1,680 @@ +/* + * Derived from arch/i386/kernel/irq.c + * Copyright (C) 1992 Linus Torvalds + * Adapted from arch/i386 by Gary Thomas + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * Updated and modified by Cort Dougan <cort@fsmlabs.com> + * Copyright (C) 1996-2001 Cort Dougan + * Adapted for Power Macintosh by Paul Mackerras + * Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This file contains the code used by various IRQ handling routines: + * asking for different IRQ's should be done through these routines + * instead of just grabbing them. Thus setups with different IRQ numbers + * shouldn't result in any weird surprises, and installing new handlers + * should be easier. + * + * The MPC8xx has an interrupt mask in the SIU. If a bit is set, the + * interrupt is _enabled_. As expected, IRQ0 is bit 0 in the 32-bit + * mask register (of which only 16 are defined), hence the weird shifting + * and complement of the cached_irq_mask. I want to be able to stuff + * this right into the SIU SMASK register. + * Many of the prep/chrp functions are conditional compiled on CONFIG_8xx + * to reduce code space and undefined function references. + */ + +#undef DEBUG + +#include <linux/export.h> +#include <linux/threads.h> +#include <linux/kernel_stat.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/ptrace.h> +#include <linux/ioport.h> +#include <linux/interrupt.h> +#include <linux/timex.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/irq.h> +#include <linux/seq_file.h> +#include <linux/cpumask.h> +#include <linux/profile.h> +#include <linux/bitops.h> +#include <linux/list.h> +#include <linux/radix-tree.h> +#include <linux/mutex.h> +#include <linux/bootmem.h> +#include <linux/pci.h> +#include <linux/debugfs.h> +#include <linux/of.h> +#include <linux/of_irq.h> + +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/irq.h> +#include <asm/cache.h> +#include <asm/prom.h> +#include <asm/ptrace.h> +#include <asm/machdep.h> +#include <asm/udbg.h> +#include <asm/smp.h> +#include <asm/debug.h> + +#ifdef CONFIG_PPC64 +#include <asm/paca.h> +#include <asm/firmware.h> +#include <asm/lv1call.h> +#endif +#define CREATE_TRACE_POINTS +#include <asm/trace.h> + +DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); +EXPORT_PER_CPU_SYMBOL(irq_stat); + +int __irq_offset_value; + +#ifdef CONFIG_PPC32 +EXPORT_SYMBOL(__irq_offset_value); +atomic_t ppc_n_lost_interrupts; + +#ifdef CONFIG_TAU_INT +extern int tau_initialized; +extern int tau_interrupts(int); +#endif +#endif /* CONFIG_PPC32 */ + +#ifdef CONFIG_PPC64 + +int distribute_irqs = 1; + +static inline notrace unsigned long get_irq_happened(void) +{ + unsigned long happened; + + __asm__ __volatile__("lbz %0,%1(13)" + : "=r" (happened) : "i" (offsetof(struct paca_struct, irq_happened))); + + return happened; +} + +static inline notrace void set_soft_enabled(unsigned long enable) +{ + __asm__ __volatile__("stb %0,%1(13)" + : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); +} + +static inline notrace int decrementer_check_overflow(void) +{ + u64 now = get_tb_or_rtc(); + u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + + if (now >= *next_tb) + set_dec(1); + return now >= *next_tb; +} + +/* This is called whenever we are re-enabling interrupts + * and returns either 0 (nothing to do) or 500/900 if there's + * either an EE or a DEC to generate. + * + * This is called in two contexts: From arch_local_irq_restore() + * before soft-enabling interrupts, and from the exception exit + * path when returning from an interrupt from a soft-disabled to + * a soft enabled context. In both case we have interrupts hard + * disabled. + * + * We take care of only clearing the bits we handled in the + * PACA irq_happened field since we can only re-emit one at a + * time and we don't want to "lose" one. + */ +notrace unsigned int __check_irq_replay(void) +{ + /* + * We use local_paca rather than get_paca() to avoid all + * the debug_smp_processor_id() business in this low level + * function + */ + unsigned char happened = local_paca->irq_happened; + + /* Clear bit 0 which we wouldn't clear otherwise */ + local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; + + /* + * Force the delivery of pending soft-disabled interrupts on PS3. + * Any HV call will have this side effect. + */ + if (firmware_has_feature(FW_FEATURE_PS3_LV1)) { + u64 tmp, tmp2; + lv1_get_version_info(&tmp, &tmp2); + } + + /* + * We may have missed a decrementer interrupt. We check the + * decrementer itself rather than the paca irq_happened field + * in case we also had a rollover while hard disabled + */ + local_paca->irq_happened &= ~PACA_IRQ_DEC; + if (decrementer_check_overflow()) + return 0x900; + + /* Finally check if an external interrupt happened */ + local_paca->irq_happened &= ~PACA_IRQ_EE; + if (happened & PACA_IRQ_EE) + return 0x500; + +#ifdef CONFIG_PPC_BOOK3E + /* Finally check if an EPR external interrupt happened + * this bit is typically set if we need to handle another + * "edge" interrupt from within the MPIC "EPR" handler + */ + local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE; + if (happened & PACA_IRQ_EE_EDGE) + return 0x500; + + local_paca->irq_happened &= ~PACA_IRQ_DBELL; + if (happened & PACA_IRQ_DBELL) + return 0x280; +#endif /* CONFIG_PPC_BOOK3E */ + + /* There should be nothing left ! */ + BUG_ON(local_paca->irq_happened != 0); + + return 0; +} + +notrace void arch_local_irq_restore(unsigned long en) +{ + unsigned char irq_happened; + unsigned int replay; + + /* Write the new soft-enabled value */ + set_soft_enabled(en); + if (!en) + return; + /* + * From this point onward, we can take interrupts, preempt, + * etc... unless we got hard-disabled. We check if an event + * happened. If none happened, we know we can just return. + * + * We may have preempted before the check below, in which case + * we are checking the "new" CPU instead of the old one. This + * is only a problem if an event happened on the "old" CPU. + * + * External interrupt events will have caused interrupts to + * be hard-disabled, so there is no problem, we + * cannot have preempted. + */ + irq_happened = get_irq_happened(); + if (!irq_happened) + return; + + /* + * We need to hard disable to get a trusted value from + * __check_irq_replay(). We also need to soft-disable + * again to avoid warnings in there due to the use of + * per-cpu variables. + * + * We know that if the value in irq_happened is exactly 0x01 + * then we are already hard disabled (there are other less + * common cases that we'll ignore for now), so we skip the + * (expensive) mtmsrd. + */ + if (unlikely(irq_happened != PACA_IRQ_HARD_DIS)) + __hard_irq_disable(); +#ifdef CONFIG_TRACE_IRQFLAGS + else { + /* + * We should already be hard disabled here. We had bugs + * where that wasn't the case so let's dbl check it and + * warn if we are wrong. Only do that when IRQ tracing + * is enabled as mfmsr() can be costly. + */ + if (WARN_ON(mfmsr() & MSR_EE)) + __hard_irq_disable(); + } +#endif /* CONFIG_TRACE_IRQFLAG */ + + set_soft_enabled(0); + + /* + * Check if anything needs to be re-emitted. We haven't + * soft-enabled yet to avoid warnings in decrementer_check_overflow + * accessing per-cpu variables + */ + replay = __check_irq_replay(); + + /* We can soft-enable now */ + set_soft_enabled(1); + + /* + * And replay if we have to. This will return with interrupts + * hard-enabled. + */ + if (replay) { + __replay_interrupt(replay); + return; + } + + /* Finally, let's ensure we are hard enabled */ + __hard_irq_enable(); +} +EXPORT_SYMBOL(arch_local_irq_restore); + +/* + * This is specifically called by assembly code to re-enable interrupts + * if they are currently disabled. This is typically called before + * schedule() or do_signal() when returning to userspace. We do it + * in C to avoid the burden of dealing with lockdep etc... + * + * NOTE: This is called with interrupts hard disabled but not marked + * as such in paca->irq_happened, so we need to resync this. + */ +void notrace restore_interrupts(void) +{ + if (irqs_disabled()) { + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + local_irq_enable(); + } else + __hard_irq_enable(); +} + +/* + * This is a helper to use when about to go into idle low-power + * when the latter has the side effect of re-enabling interrupts + * (such as calling H_CEDE under pHyp). + * + * You call this function with interrupts soft-disabled (this is + * already the case when ppc_md.power_save is called). The function + * will return whether to enter power save or just return. + * + * In the former case, it will have notified lockdep of interrupts + * being re-enabled and generally sanitized the lazy irq state, + * and in the latter case it will leave with interrupts hard + * disabled and marked as such, so the local_irq_enable() call + * in cpu_idle() will properly re-enable everything. + */ +bool prep_irq_for_idle(void) +{ + /* + * First we need to hard disable to ensure no interrupt + * occurs before we effectively enter the low power state + */ + hard_irq_disable(); + + /* + * If anything happened while we were soft-disabled, + * we return now and do not enter the low power state. + */ + if (lazy_irq_pending()) + return false; + + /* Tell lockdep we are about to re-enable */ + trace_hardirqs_on(); + + /* + * Mark interrupts as soft-enabled and clear the + * PACA_IRQ_HARD_DIS from the pending mask since we + * are about to hard enable as well as a side effect + * of entering the low power state. + */ + local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; + local_paca->soft_enabled = 1; + + /* Tell the caller to enter the low power state */ + return true; +} + +#endif /* CONFIG_PPC64 */ + +int arch_show_interrupts(struct seq_file *p, int prec) +{ + int j; + +#if defined(CONFIG_PPC32) && defined(CONFIG_TAU_INT) + if (tau_initialized) { + seq_printf(p, "%*s: ", prec, "TAU"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", tau_interrupts(j)); + seq_puts(p, " PowerPC Thermal Assist (cpu temp)\n"); + } +#endif /* CONFIG_PPC32 && CONFIG_TAU_INT */ + + seq_printf(p, "%*s: ", prec, "LOC"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs); + seq_printf(p, " Local timer interrupts\n"); + + seq_printf(p, "%*s: ", prec, "SPU"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat, j).spurious_irqs); + seq_printf(p, " Spurious interrupts\n"); + + seq_printf(p, "%*s: ", prec, "CNT"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat, j).pmu_irqs); + seq_printf(p, " Performance monitoring interrupts\n"); + + seq_printf(p, "%*s: ", prec, "MCE"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat, j).mce_exceptions); + seq_printf(p, " Machine check exceptions\n"); + + return 0; +} + +/* + * /proc/stat helpers + */ +u64 arch_irq_stat_cpu(unsigned int cpu) +{ + u64 sum = per_cpu(irq_stat, cpu).timer_irqs; + + sum += per_cpu(irq_stat, cpu).pmu_irqs; + sum += per_cpu(irq_stat, cpu).mce_exceptions; + sum += per_cpu(irq_stat, cpu).spurious_irqs; + + return sum; +} + +#ifdef CONFIG_HOTPLUG_CPU +void migrate_irqs(void) +{ + struct irq_desc *desc; + unsigned int irq; + static int warned; + cpumask_var_t mask; + const struct cpumask *map = cpu_online_mask; + + alloc_cpumask_var(&mask, GFP_KERNEL); + + for_each_irq_desc(irq, desc) { + struct irq_data *data; + struct irq_chip *chip; + + data = irq_desc_get_irq_data(desc); + if (irqd_is_per_cpu(data)) + continue; + + chip = irq_data_get_irq_chip(data); + + cpumask_and(mask, data->affinity, map); + if (cpumask_any(mask) >= nr_cpu_ids) { + printk("Breaking affinity for irq %i\n", irq); + cpumask_copy(mask, map); + } + if (chip->irq_set_affinity) + chip->irq_set_affinity(data, mask, true); + else if (desc->action && !(warned++)) + printk("Cannot set affinity for irq %i\n", irq); + } + + free_cpumask_var(mask); + + local_irq_enable(); + mdelay(1); + local_irq_disable(); +} +#endif + +static inline void handle_one_irq(unsigned int irq) +{ + struct thread_info *curtp, *irqtp; + unsigned long saved_sp_limit; + struct irq_desc *desc; + + desc = irq_to_desc(irq); + if (!desc) + return; + + /* Switch to the irq stack to handle this */ + curtp = current_thread_info(); + irqtp = hardirq_ctx[smp_processor_id()]; + + if (curtp == irqtp) { + /* We're already on the irq stack, just handle it */ + desc->handle_irq(irq, desc); + return; + } + + saved_sp_limit = current->thread.ksp_limit; + + irqtp->task = curtp->task; + irqtp->flags = 0; + + /* Copy the softirq bits in preempt_count so that the + * softirq checks work in the hardirq context. */ + irqtp->preempt_count = (irqtp->preempt_count & ~SOFTIRQ_MASK) | + (curtp->preempt_count & SOFTIRQ_MASK); + + current->thread.ksp_limit = (unsigned long)irqtp + + _ALIGN_UP(sizeof(struct thread_info), 16); + + call_handle_irq(irq, desc, irqtp, desc->handle_irq); + current->thread.ksp_limit = saved_sp_limit; + irqtp->task = NULL; + + /* Set any flag that may have been set on the + * alternate stack + */ + if (irqtp->flags) + set_bits(irqtp->flags, &curtp->flags); +} + +static inline void check_stack_overflow(void) +{ +#ifdef CONFIG_DEBUG_STACKOVERFLOW + long sp; + + sp = __get_SP() & (THREAD_SIZE-1); + + /* check for stack overflow: is there less than 2KB free? */ + if (unlikely(sp < (sizeof(struct thread_info) + 2048))) { + printk("do_IRQ: stack overflow: %ld\n", + sp - sizeof(struct thread_info)); + dump_stack(); + } +#endif +} + +void do_IRQ(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + unsigned int irq; + + trace_irq_entry(regs); + + irq_enter(); + + check_stack_overflow(); + + /* + * Query the platform PIC for the interrupt & ack it. + * + * This will typically lower the interrupt line to the CPU + */ + irq = ppc_md.get_irq(); + + /* We can hard enable interrupts now */ + may_hard_irq_enable(); + + /* And finally process it */ + if (irq != NO_IRQ) + handle_one_irq(irq); + else + __get_cpu_var(irq_stat).spurious_irqs++; + + irq_exit(); + set_irq_regs(old_regs); + + trace_irq_exit(regs); +} + +void __init init_IRQ(void) +{ + if (ppc_md.init_IRQ) + ppc_md.init_IRQ(); + + exc_lvl_ctx_init(); + + irq_ctx_init(); +} + +#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +struct thread_info *critirq_ctx[NR_CPUS] __read_mostly; +struct thread_info *dbgirq_ctx[NR_CPUS] __read_mostly; +struct thread_info *mcheckirq_ctx[NR_CPUS] __read_mostly; + +void exc_lvl_ctx_init(void) +{ + struct thread_info *tp; + int i, cpu_nr; + + for_each_possible_cpu(i) { +#ifdef CONFIG_PPC64 + cpu_nr = i; +#else + cpu_nr = get_hard_smp_processor_id(i); +#endif + memset((void *)critirq_ctx[cpu_nr], 0, THREAD_SIZE); + tp = critirq_ctx[cpu_nr]; + tp->cpu = cpu_nr; + tp->preempt_count = 0; + +#ifdef CONFIG_BOOKE + memset((void *)dbgirq_ctx[cpu_nr], 0, THREAD_SIZE); + tp = dbgirq_ctx[cpu_nr]; + tp->cpu = cpu_nr; + tp->preempt_count = 0; + + memset((void *)mcheckirq_ctx[cpu_nr], 0, THREAD_SIZE); + tp = mcheckirq_ctx[cpu_nr]; + tp->cpu = cpu_nr; + tp->preempt_count = HARDIRQ_OFFSET; +#endif + } +} +#endif + +struct thread_info *softirq_ctx[NR_CPUS] __read_mostly; +struct thread_info *hardirq_ctx[NR_CPUS] __read_mostly; + +void irq_ctx_init(void) +{ + struct thread_info *tp; + int i; + + for_each_possible_cpu(i) { + memset((void *)softirq_ctx[i], 0, THREAD_SIZE); + tp = softirq_ctx[i]; + tp->cpu = i; + tp->preempt_count = 0; + + memset((void *)hardirq_ctx[i], 0, THREAD_SIZE); + tp = hardirq_ctx[i]; + tp->cpu = i; + tp->preempt_count = HARDIRQ_OFFSET; + } +} + +static inline void do_softirq_onstack(void) +{ + struct thread_info *curtp, *irqtp; + unsigned long saved_sp_limit = current->thread.ksp_limit; + + curtp = current_thread_info(); + irqtp = softirq_ctx[smp_processor_id()]; + irqtp->task = curtp->task; + irqtp->flags = 0; + current->thread.ksp_limit = (unsigned long)irqtp + + _ALIGN_UP(sizeof(struct thread_info), 16); + call_do_softirq(irqtp); + current->thread.ksp_limit = saved_sp_limit; + irqtp->task = NULL; + + /* Set any flag that may have been set on the + * alternate stack + */ + if (irqtp->flags) + set_bits(irqtp->flags, &curtp->flags); +} + +void do_softirq(void) +{ + unsigned long flags; + + if (in_interrupt()) + return; + + local_irq_save(flags); + + if (local_softirq_pending()) + do_softirq_onstack(); + + local_irq_restore(flags); +} + +irq_hw_number_t virq_to_hw(unsigned int virq) +{ + struct irq_data *irq_data = irq_get_irq_data(virq); + return WARN_ON(!irq_data) ? 0 : irq_data->hwirq; +} +EXPORT_SYMBOL_GPL(virq_to_hw); + +#ifdef CONFIG_SMP +int irq_choose_cpu(const struct cpumask *mask) +{ + int cpuid; + + if (cpumask_equal(mask, cpu_all_mask)) { + static int irq_rover; + static DEFINE_RAW_SPINLOCK(irq_rover_lock); + unsigned long flags; + + /* Round-robin distribution... */ +do_round_robin: + raw_spin_lock_irqsave(&irq_rover_lock, flags); + + irq_rover = cpumask_next(irq_rover, cpu_online_mask); + if (irq_rover >= nr_cpu_ids) + irq_rover = cpumask_first(cpu_online_mask); + + cpuid = irq_rover; + + raw_spin_unlock_irqrestore(&irq_rover_lock, flags); + } else { + cpuid = cpumask_first_and(mask, cpu_online_mask); + if (cpuid >= nr_cpu_ids) + goto do_round_robin; + } + + return get_hard_smp_processor_id(cpuid); +} +#else +int irq_choose_cpu(const struct cpumask *mask) +{ + return hard_smp_processor_id(); +} +#endif + +int arch_early_irq_init(void) +{ + return 0; +} + +#ifdef CONFIG_PPC64 +static int __init setup_noirqdistrib(char *str) +{ + distribute_irqs = 0; + return 1; +} + +__setup("noirqdistrib", setup_noirqdistrib); +#endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c new file mode 100644 index 00000000..d45ec587 --- /dev/null +++ b/arch/powerpc/kernel/isa-bridge.c @@ -0,0 +1,266 @@ +/* + * Routines for tracking a legacy ISA bridge + * + * Copyrigh 2007 Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp. + * + * Some bits and pieces moved over from pci_64.c + * + * Copyrigh 2003 Anton Blanchard <anton@au.ibm.com>, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define DEBUG + +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/string.h> +#include <linux/export.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/notifier.h> + +#include <asm/processor.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/pci-bridge.h> +#include <asm/machdep.h> +#include <asm/ppc-pci.h> + +unsigned long isa_io_base; /* NULL if no ISA bus */ +EXPORT_SYMBOL(isa_io_base); + +/* Cached ISA bridge dev. */ +static struct device_node *isa_bridge_devnode; +struct pci_dev *isa_bridge_pcidev; +EXPORT_SYMBOL_GPL(isa_bridge_pcidev); + +#define ISA_SPACE_MASK 0x1 +#define ISA_SPACE_IO 0x1 + +static void __devinit pci_process_ISA_OF_ranges(struct device_node *isa_node, + unsigned long phb_io_base_phys) +{ + /* We should get some saner parsing here and remove these structs */ + struct pci_address { + u32 a_hi; + u32 a_mid; + u32 a_lo; + }; + + struct isa_address { + u32 a_hi; + u32 a_lo; + }; + + struct isa_range { + struct isa_address isa_addr; + struct pci_address pci_addr; + unsigned int size; + }; + + const struct isa_range *range; + unsigned long pci_addr; + unsigned int isa_addr; + unsigned int size; + int rlen = 0; + + range = of_get_property(isa_node, "ranges", &rlen); + if (range == NULL || (rlen < sizeof(struct isa_range))) + goto inval_range; + + /* From "ISA Binding to 1275" + * The ranges property is laid out as an array of elements, + * each of which comprises: + * cells 0 - 1: an ISA address + * cells 2 - 4: a PCI address + * (size depending on dev->n_addr_cells) + * cell 5: the size of the range + */ + if ((range->isa_addr.a_hi & ISA_SPACE_MASK) != ISA_SPACE_IO) { + range++; + rlen -= sizeof(struct isa_range); + if (rlen < sizeof(struct isa_range)) + goto inval_range; + } + if ((range->isa_addr.a_hi & ISA_SPACE_MASK) != ISA_SPACE_IO) + goto inval_range; + + isa_addr = range->isa_addr.a_lo; + pci_addr = (unsigned long) range->pci_addr.a_mid << 32 | + range->pci_addr.a_lo; + + /* Assume these are both zero. Note: We could fix that and + * do a proper parsing instead ... oh well, that will do for + * now as nobody uses fancy mappings for ISA bridges + */ + if ((pci_addr != 0) || (isa_addr != 0)) { + printk(KERN_ERR "unexpected isa to pci mapping: %s\n", + __func__); + return; + } + + /* Align size and make sure it's cropped to 64K */ + size = PAGE_ALIGN(range->size); + if (size > 0x10000) + size = 0x10000; + + __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE, + size, _PAGE_NO_CACHE|_PAGE_GUARDED); + return; + +inval_range: + printk(KERN_ERR "no ISA IO ranges or unexpected isa range, " + "mapping 64k\n"); + __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE, + 0x10000, _PAGE_NO_CACHE|_PAGE_GUARDED); +} + + +/** + * isa_bridge_find_early - Find and map the ISA IO space early before + * main PCI discovery. This is optionally called by + * the arch code when adding PCI PHBs to get early + * access to ISA IO ports + */ +void __init isa_bridge_find_early(struct pci_controller *hose) +{ + struct device_node *np, *parent = NULL, *tmp; + + /* If we already have an ISA bridge, bail off */ + if (isa_bridge_devnode != NULL) + return; + + /* For each "isa" node in the system. Note : we do a search by + * type and not by name. It might be better to do by name but that's + * what the code used to do and I don't want to break too much at + * once. We can look into changing that separately + */ + for_each_node_by_type(np, "isa") { + /* Look for our hose being a parent */ + for (parent = of_get_parent(np); parent;) { + if (parent == hose->dn) { + of_node_put(parent); + break; + } + tmp = parent; + parent = of_get_parent(parent); + of_node_put(tmp); + } + if (parent != NULL) + break; + } + if (np == NULL) + return; + isa_bridge_devnode = np; + + /* Now parse the "ranges" property and setup the ISA mapping */ + pci_process_ISA_OF_ranges(np, hose->io_base_phys); + + /* Set the global ISA io base to indicate we have an ISA bridge */ + isa_io_base = ISA_IO_BASE; + + pr_debug("ISA bridge (early) is %s\n", np->full_name); +} + +/** + * isa_bridge_find_late - Find and map the ISA IO space upon discovery of + * a new ISA bridge + */ +static void __devinit isa_bridge_find_late(struct pci_dev *pdev, + struct device_node *devnode) +{ + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + + /* Store ISA device node and PCI device */ + isa_bridge_devnode = of_node_get(devnode); + isa_bridge_pcidev = pdev; + + /* Now parse the "ranges" property and setup the ISA mapping */ + pci_process_ISA_OF_ranges(devnode, hose->io_base_phys); + + /* Set the global ISA io base to indicate we have an ISA bridge */ + isa_io_base = ISA_IO_BASE; + + pr_debug("ISA bridge (late) is %s on %s\n", + devnode->full_name, pci_name(pdev)); +} + +/** + * isa_bridge_remove - Remove/unmap an ISA bridge + */ +static void isa_bridge_remove(void) +{ + pr_debug("ISA bridge removed !\n"); + + /* Clear the global ISA io base to indicate that we have no more + * ISA bridge. Note that drivers don't quite handle that, though + * we should probably do something about it. But do we ever really + * have ISA bridges being removed on machines using legacy devices ? + */ + isa_io_base = ISA_IO_BASE; + + /* Clear references to the bridge */ + of_node_put(isa_bridge_devnode); + isa_bridge_devnode = NULL; + isa_bridge_pcidev = NULL; + + /* Unmap the ISA area */ + __iounmap_at((void *)ISA_IO_BASE, 0x10000); +} + +/** + * isa_bridge_notify - Get notified of PCI devices addition/removal + */ +static int __devinit isa_bridge_notify(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + struct pci_dev *pdev = to_pci_dev(dev); + struct device_node *devnode = pci_device_to_OF_node(pdev); + + switch(action) { + case BUS_NOTIFY_ADD_DEVICE: + /* Check if we have an early ISA device, without PCI dev */ + if (isa_bridge_devnode && isa_bridge_devnode == devnode && + !isa_bridge_pcidev) { + pr_debug("ISA bridge PCI attached: %s\n", + pci_name(pdev)); + isa_bridge_pcidev = pdev; + } + + /* Check if we have no ISA device, and this happens to be one, + * register it as such if it has an OF device + */ + if (!isa_bridge_devnode && devnode && devnode->type && + !strcmp(devnode->type, "isa")) + isa_bridge_find_late(pdev, devnode); + + return 0; + case BUS_NOTIFY_DEL_DEVICE: + /* Check if this our existing ISA device */ + if (pdev == isa_bridge_pcidev || + (devnode && devnode == isa_bridge_devnode)) + isa_bridge_remove(); + return 0; + } + return 0; +} + +static struct notifier_block isa_bridge_notifier = { + .notifier_call = isa_bridge_notify +}; + +/** + * isa_bridge_init - register to be notified of ISA bridge addition/removal + * + */ +static int __init isa_bridge_init(void) +{ + bus_register_notifier(&pci_bus_type, &isa_bridge_notifier); + return 0; +} +arch_initcall(isa_bridge_init); diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c new file mode 100644 index 00000000..a1ed8a8c --- /dev/null +++ b/arch/powerpc/kernel/jump_label.c @@ -0,0 +1,25 @@ +/* + * Copyright 2010 Michael Ellerman, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/jump_label.h> +#include <asm/code-patching.h> + +#ifdef HAVE_JUMP_LABEL +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + u32 *addr = (u32 *)(unsigned long)entry->code; + + if (type == JUMP_LABEL_ENABLE) + patch_branch(addr, entry->target, 0); + else + patch_instruction(addr, PPC_INST_NOP); +} +#endif diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c new file mode 100644 index 00000000..782bd0a3 --- /dev/null +++ b/arch/powerpc/kernel/kgdb.c @@ -0,0 +1,473 @@ +/* + * PowerPC backend to the KGDB stub. + * + * 1998 (c) Michael AK Tesch (tesch@cs.wisc.edu) + * Copyright (C) 2003 Timesys Corporation. + * Copyright (C) 2004-2006 MontaVista Software, Inc. + * PPC64 Mods (C) 2005 Frank Rowand (frowand@mvista.com) + * PPC32 support restored by Vitaly Wool <vwool@ru.mvista.com> and + * Sergei Shtylyov <sshtylyov@ru.mvista.com> + * Copyright (C) 2007-2008 Wind River Systems, Inc. + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program as licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/kgdb.h> +#include <linux/smp.h> +#include <linux/signal.h> +#include <linux/ptrace.h> +#include <linux/kdebug.h> +#include <asm/current.h> +#include <asm/processor.h> +#include <asm/machdep.h> +#include <asm/debug.h> + +/* + * This table contains the mapping between PowerPC hardware trap types, and + * signals, which are primarily what GDB understands. GDB and the kernel + * don't always agree on values, so we use constants taken from gdb-6.2. + */ +static struct hard_trap_info +{ + unsigned int tt; /* Trap type code for powerpc */ + unsigned char signo; /* Signal that we map this trap into */ +} hard_trap_info[] = { + { 0x0100, 0x02 /* SIGINT */ }, /* system reset */ + { 0x0200, 0x0b /* SIGSEGV */ }, /* machine check */ + { 0x0300, 0x0b /* SIGSEGV */ }, /* data access */ + { 0x0400, 0x0b /* SIGSEGV */ }, /* instruction access */ + { 0x0500, 0x02 /* SIGINT */ }, /* external interrupt */ + { 0x0600, 0x0a /* SIGBUS */ }, /* alignment */ + { 0x0700, 0x05 /* SIGTRAP */ }, /* program check */ + { 0x0800, 0x08 /* SIGFPE */ }, /* fp unavailable */ + { 0x0900, 0x0e /* SIGALRM */ }, /* decrementer */ + { 0x0c00, 0x14 /* SIGCHLD */ }, /* system call */ +#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) + { 0x2002, 0x05 /* SIGTRAP */ }, /* debug */ +#if defined(CONFIG_FSL_BOOKE) + { 0x2010, 0x08 /* SIGFPE */ }, /* spe unavailable */ + { 0x2020, 0x08 /* SIGFPE */ }, /* spe unavailable */ + { 0x2030, 0x08 /* SIGFPE */ }, /* spe fp data */ + { 0x2040, 0x08 /* SIGFPE */ }, /* spe fp data */ + { 0x2050, 0x08 /* SIGFPE */ }, /* spe fp round */ + { 0x2060, 0x0e /* SIGILL */ }, /* performance monitor */ + { 0x2900, 0x08 /* SIGFPE */ }, /* apu unavailable */ + { 0x3100, 0x0e /* SIGALRM */ }, /* fixed interval timer */ + { 0x3200, 0x02 /* SIGINT */ }, /* watchdog */ +#else /* ! CONFIG_FSL_BOOKE */ + { 0x1000, 0x0e /* SIGALRM */ }, /* prog interval timer */ + { 0x1010, 0x0e /* SIGALRM */ }, /* fixed interval timer */ + { 0x1020, 0x02 /* SIGINT */ }, /* watchdog */ + { 0x2010, 0x08 /* SIGFPE */ }, /* fp unavailable */ + { 0x2020, 0x08 /* SIGFPE */ }, /* ap unavailable */ +#endif +#else /* ! (defined(CONFIG_40x) || defined(CONFIG_BOOKE)) */ + { 0x0d00, 0x05 /* SIGTRAP */ }, /* single-step */ +#if defined(CONFIG_8xx) + { 0x1000, 0x04 /* SIGILL */ }, /* software emulation */ +#else /* ! CONFIG_8xx */ + { 0x0f00, 0x04 /* SIGILL */ }, /* performance monitor */ + { 0x0f20, 0x08 /* SIGFPE */ }, /* altivec unavailable */ + { 0x1300, 0x05 /* SIGTRAP */ }, /* instruction address break */ +#if defined(CONFIG_PPC64) + { 0x1200, 0x05 /* SIGILL */ }, /* system error */ + { 0x1500, 0x04 /* SIGILL */ }, /* soft patch */ + { 0x1600, 0x04 /* SIGILL */ }, /* maintenance */ + { 0x1700, 0x08 /* SIGFPE */ }, /* altivec assist */ + { 0x1800, 0x04 /* SIGILL */ }, /* thermal */ +#else /* ! CONFIG_PPC64 */ + { 0x1400, 0x02 /* SIGINT */ }, /* SMI */ + { 0x1600, 0x08 /* SIGFPE */ }, /* altivec assist */ + { 0x1700, 0x04 /* SIGILL */ }, /* TAU */ + { 0x2000, 0x05 /* SIGTRAP */ }, /* run mode */ +#endif +#endif +#endif + { 0x0000, 0x00 } /* Must be last */ +}; + +static int computeSignal(unsigned int tt) +{ + struct hard_trap_info *ht; + + for (ht = hard_trap_info; ht->tt && ht->signo; ht++) + if (ht->tt == tt) + return ht->signo; + + return SIGHUP; /* default for things we don't know about */ +} + +static int kgdb_call_nmi_hook(struct pt_regs *regs) +{ + kgdb_nmicallback(raw_smp_processor_id(), regs); + return 0; +} + +#ifdef CONFIG_SMP +void kgdb_roundup_cpus(unsigned long flags) +{ + smp_send_debugger_break(); +} +#endif + +/* KGDB functions to use existing PowerPC64 hooks. */ +static int kgdb_debugger(struct pt_regs *regs) +{ + return !kgdb_handle_exception(1, computeSignal(TRAP(regs)), + DIE_OOPS, regs); +} + +static int kgdb_handle_breakpoint(struct pt_regs *regs) +{ + if (user_mode(regs)) + return 0; + + if (kgdb_handle_exception(1, SIGTRAP, 0, regs) != 0) + return 0; + + if (*(u32 *) (regs->nip) == *(u32 *) (&arch_kgdb_ops.gdb_bpt_instr)) + regs->nip += BREAK_INSTR_SIZE; + + return 1; +} + +static int kgdb_singlestep(struct pt_regs *regs) +{ + struct thread_info *thread_info, *exception_thread_info; + + if (user_mode(regs)) + return 0; + + /* + * On Book E and perhaps other processors, singlestep is handled on + * the critical exception stack. This causes current_thread_info() + * to fail, since it it locates the thread_info by masking off + * the low bits of the current stack pointer. We work around + * this issue by copying the thread_info from the kernel stack + * before calling kgdb_handle_exception, and copying it back + * afterwards. On most processors the copy is avoided since + * exception_thread_info == thread_info. + */ + thread_info = (struct thread_info *)(regs->gpr[1] & ~(THREAD_SIZE-1)); + exception_thread_info = current_thread_info(); + + if (thread_info != exception_thread_info) + memcpy(exception_thread_info, thread_info, sizeof *thread_info); + + kgdb_handle_exception(0, SIGTRAP, 0, regs); + + if (thread_info != exception_thread_info) + memcpy(thread_info, exception_thread_info, sizeof *thread_info); + + return 1; +} + +static int kgdb_iabr_match(struct pt_regs *regs) +{ + if (user_mode(regs)) + return 0; + + if (kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs) != 0) + return 0; + return 1; +} + +static int kgdb_dabr_match(struct pt_regs *regs) +{ + if (user_mode(regs)) + return 0; + + if (kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs) != 0) + return 0; + return 1; +} + +#define PACK64(ptr, src) do { *(ptr++) = (src); } while (0) + +#define PACK32(ptr, src) do { \ + u32 *ptr32; \ + ptr32 = (u32 *)ptr; \ + *(ptr32++) = (src); \ + ptr = (unsigned long *)ptr32; \ + } while (0) + +void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) +{ + struct pt_regs *regs = (struct pt_regs *)(p->thread.ksp + + STACK_FRAME_OVERHEAD); + unsigned long *ptr = gdb_regs; + int reg; + + memset(gdb_regs, 0, NUMREGBYTES); + + /* Regs GPR0-2 */ + for (reg = 0; reg < 3; reg++) + PACK64(ptr, regs->gpr[reg]); + + /* Regs GPR3-13 are caller saved, not in regs->gpr[] */ + ptr += 11; + + /* Regs GPR14-31 */ + for (reg = 14; reg < 32; reg++) + PACK64(ptr, regs->gpr[reg]); + +#ifdef CONFIG_FSL_BOOKE +#ifdef CONFIG_SPE + for (reg = 0; reg < 32; reg++) + PACK64(ptr, p->thread.evr[reg]); +#else + ptr += 32; +#endif +#else + /* fp registers not used by kernel, leave zero */ + ptr += 32 * 8 / sizeof(long); +#endif + + PACK64(ptr, regs->nip); + PACK64(ptr, regs->msr); + PACK32(ptr, regs->ccr); + PACK64(ptr, regs->link); + PACK64(ptr, regs->ctr); + PACK32(ptr, regs->xer); + + BUG_ON((unsigned long)ptr > + (unsigned long)(((void *)gdb_regs) + NUMREGBYTES)); +} + +#define GDB_SIZEOF_REG sizeof(unsigned long) +#define GDB_SIZEOF_REG_U32 sizeof(u32) + +#ifdef CONFIG_FSL_BOOKE +#define GDB_SIZEOF_FLOAT_REG sizeof(unsigned long) +#else +#define GDB_SIZEOF_FLOAT_REG sizeof(u64) +#endif + +struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = +{ + { "r0", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[0]) }, + { "r1", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[1]) }, + { "r2", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[2]) }, + { "r3", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[3]) }, + { "r4", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[4]) }, + { "r5", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[5]) }, + { "r6", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[6]) }, + { "r7", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[7]) }, + { "r8", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[8]) }, + { "r9", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[9]) }, + { "r10", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[10]) }, + { "r11", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[11]) }, + { "r12", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[12]) }, + { "r13", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[13]) }, + { "r14", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[14]) }, + { "r15", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[15]) }, + { "r16", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[16]) }, + { "r17", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[17]) }, + { "r18", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[18]) }, + { "r19", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[19]) }, + { "r20", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[20]) }, + { "r21", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[21]) }, + { "r22", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[22]) }, + { "r23", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[23]) }, + { "r24", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[24]) }, + { "r25", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[25]) }, + { "r26", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[26]) }, + { "r27", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[27]) }, + { "r28", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[28]) }, + { "r29", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[29]) }, + { "r30", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[30]) }, + { "r31", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[31]) }, + + { "f0", GDB_SIZEOF_FLOAT_REG, 0 }, + { "f1", GDB_SIZEOF_FLOAT_REG, 1 }, + { "f2", GDB_SIZEOF_FLOAT_REG, 2 }, + { "f3", GDB_SIZEOF_FLOAT_REG, 3 }, + { "f4", GDB_SIZEOF_FLOAT_REG, 4 }, + { "f5", GDB_SIZEOF_FLOAT_REG, 5 }, + { "f6", GDB_SIZEOF_FLOAT_REG, 6 }, + { "f7", GDB_SIZEOF_FLOAT_REG, 7 }, + { "f8", GDB_SIZEOF_FLOAT_REG, 8 }, + { "f9", GDB_SIZEOF_FLOAT_REG, 9 }, + { "f10", GDB_SIZEOF_FLOAT_REG, 10 }, + { "f11", GDB_SIZEOF_FLOAT_REG, 11 }, + { "f12", GDB_SIZEOF_FLOAT_REG, 12 }, + { "f13", GDB_SIZEOF_FLOAT_REG, 13 }, + { "f14", GDB_SIZEOF_FLOAT_REG, 14 }, + { "f15", GDB_SIZEOF_FLOAT_REG, 15 }, + { "f16", GDB_SIZEOF_FLOAT_REG, 16 }, + { "f17", GDB_SIZEOF_FLOAT_REG, 17 }, + { "f18", GDB_SIZEOF_FLOAT_REG, 18 }, + { "f19", GDB_SIZEOF_FLOAT_REG, 19 }, + { "f20", GDB_SIZEOF_FLOAT_REG, 20 }, + { "f21", GDB_SIZEOF_FLOAT_REG, 21 }, + { "f22", GDB_SIZEOF_FLOAT_REG, 22 }, + { "f23", GDB_SIZEOF_FLOAT_REG, 23 }, + { "f24", GDB_SIZEOF_FLOAT_REG, 24 }, + { "f25", GDB_SIZEOF_FLOAT_REG, 25 }, + { "f26", GDB_SIZEOF_FLOAT_REG, 26 }, + { "f27", GDB_SIZEOF_FLOAT_REG, 27 }, + { "f28", GDB_SIZEOF_FLOAT_REG, 28 }, + { "f29", GDB_SIZEOF_FLOAT_REG, 29 }, + { "f30", GDB_SIZEOF_FLOAT_REG, 30 }, + { "f31", GDB_SIZEOF_FLOAT_REG, 31 }, + + { "pc", GDB_SIZEOF_REG, offsetof(struct pt_regs, nip) }, + { "msr", GDB_SIZEOF_REG, offsetof(struct pt_regs, msr) }, + { "cr", GDB_SIZEOF_REG_U32, offsetof(struct pt_regs, ccr) }, + { "lr", GDB_SIZEOF_REG, offsetof(struct pt_regs, link) }, + { "ctr", GDB_SIZEOF_REG_U32, offsetof(struct pt_regs, ctr) }, + { "xer", GDB_SIZEOF_REG, offsetof(struct pt_regs, xer) }, +}; + +char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) +{ + if (regno >= DBG_MAX_REG_NUM || regno < 0) + return NULL; + + if (regno < 32 || regno >= 64) + /* First 0 -> 31 gpr registers*/ + /* pc, msr, ls... registers 64 -> 69 */ + memcpy(mem, (void *)regs + dbg_reg_def[regno].offset, + dbg_reg_def[regno].size); + + if (regno >= 32 && regno < 64) { + /* FP registers 32 -> 63 */ +#if defined(CONFIG_FSL_BOOKE) && defined(CONFIG_SPE) + if (current) + memcpy(mem, ¤t->thread.evr[regno-32], + dbg_reg_def[regno].size); +#else + /* fp registers not used by kernel, leave zero */ + memset(mem, 0, dbg_reg_def[regno].size); +#endif + } + + return dbg_reg_def[regno].name; +} + +int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) +{ + if (regno >= DBG_MAX_REG_NUM || regno < 0) + return -EINVAL; + + if (regno < 32 || regno >= 64) + /* First 0 -> 31 gpr registers*/ + /* pc, msr, ls... registers 64 -> 69 */ + memcpy((void *)regs + dbg_reg_def[regno].offset, mem, + dbg_reg_def[regno].size); + + if (regno >= 32 && regno < 64) { + /* FP registers 32 -> 63 */ +#if defined(CONFIG_FSL_BOOKE) && defined(CONFIG_SPE) + memcpy(¤t->thread.evr[regno-32], mem, + dbg_reg_def[regno].size); +#else + /* fp registers not used by kernel, leave zero */ + return 0; +#endif + } + + return 0; +} + +void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) +{ + regs->nip = pc; +} + +/* + * This function does PowerPC specific procesing for interfacing to gdb. + */ +int kgdb_arch_handle_exception(int vector, int signo, int err_code, + char *remcom_in_buffer, char *remcom_out_buffer, + struct pt_regs *linux_regs) +{ + char *ptr = &remcom_in_buffer[1]; + unsigned long addr; + + switch (remcom_in_buffer[0]) { + /* + * sAA..AA Step one instruction from AA..AA + * This will return an error to gdb .. + */ + case 's': + case 'c': + /* handle the optional parameter */ + if (kgdb_hex2long(&ptr, &addr)) + linux_regs->nip = addr; + + atomic_set(&kgdb_cpu_doing_single_step, -1); + /* set the trace bit if we're stepping */ + if (remcom_in_buffer[0] == 's') { +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + mtspr(SPRN_DBCR0, + mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); + linux_regs->msr |= MSR_DE; +#else + linux_regs->msr |= MSR_SE; +#endif + kgdb_single_step = 1; + atomic_set(&kgdb_cpu_doing_single_step, + raw_smp_processor_id()); + } + return 0; + } + + return -1; +} + +/* + * Global data + */ +struct kgdb_arch arch_kgdb_ops = { + .gdb_bpt_instr = {0x7d, 0x82, 0x10, 0x08}, +}; + +static int kgdb_not_implemented(struct pt_regs *regs) +{ + return 0; +} + +static void *old__debugger_ipi; +static void *old__debugger; +static void *old__debugger_bpt; +static void *old__debugger_sstep; +static void *old__debugger_iabr_match; +static void *old__debugger_dabr_match; +static void *old__debugger_fault_handler; + +int kgdb_arch_init(void) +{ + old__debugger_ipi = __debugger_ipi; + old__debugger = __debugger; + old__debugger_bpt = __debugger_bpt; + old__debugger_sstep = __debugger_sstep; + old__debugger_iabr_match = __debugger_iabr_match; + old__debugger_dabr_match = __debugger_dabr_match; + old__debugger_fault_handler = __debugger_fault_handler; + + __debugger_ipi = kgdb_call_nmi_hook; + __debugger = kgdb_debugger; + __debugger_bpt = kgdb_handle_breakpoint; + __debugger_sstep = kgdb_singlestep; + __debugger_iabr_match = kgdb_iabr_match; + __debugger_dabr_match = kgdb_dabr_match; + __debugger_fault_handler = kgdb_not_implemented; + + return 0; +} + +void kgdb_arch_exit(void) +{ + __debugger_ipi = old__debugger_ipi; + __debugger = old__debugger; + __debugger_bpt = old__debugger_bpt; + __debugger_sstep = old__debugger_sstep; + __debugger_iabr_match = old__debugger_iabr_match; + __debugger_dabr_match = old__debugger_dabr_match; + __debugger_fault_handler = old__debugger_fault_handler; +} diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c new file mode 100644 index 00000000..e88c6433 --- /dev/null +++ b/arch/powerpc/kernel/kprobes.c @@ -0,0 +1,574 @@ +/* + * Kernel Probes (KProbes) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004 + * + * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel + * Probes initial implementation ( includes contributions from + * Rusty Russell). + * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes + * interface to access function arguments. + * 2004-Nov Ananth N Mavinakayanahalli <ananth@in.ibm.com> kprobes port + * for PPC64 + */ + +#include <linux/kprobes.h> +#include <linux/ptrace.h> +#include <linux/preempt.h> +#include <linux/module.h> +#include <linux/kdebug.h> +#include <linux/slab.h> +#include <asm/cacheflush.h> +#include <asm/sstep.h> +#include <asm/uaccess.h> + +#ifdef CONFIG_PPC_ADV_DEBUG_REGS +#define MSR_SINGLESTEP (MSR_DE) +#else +#define MSR_SINGLESTEP (MSR_SE) +#endif + +DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); + +struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}}; + +int __kprobes arch_prepare_kprobe(struct kprobe *p) +{ + int ret = 0; + kprobe_opcode_t insn = *p->addr; + + if ((unsigned long)p->addr & 0x03) { + printk("Attempt to register kprobe at an unaligned address\n"); + ret = -EINVAL; + } else if (IS_MTMSRD(insn) || IS_RFID(insn) || IS_RFI(insn)) { + printk("Cannot register a kprobe on rfi/rfid or mtmsr[d]\n"); + ret = -EINVAL; + } + + /* insn must be on a special executable page on ppc64. This is + * not explicitly required on ppc32 (right now), but it doesn't hurt */ + if (!ret) { + p->ainsn.insn = get_insn_slot(); + if (!p->ainsn.insn) + ret = -ENOMEM; + } + + if (!ret) { + memcpy(p->ainsn.insn, p->addr, + MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + p->opcode = *p->addr; + flush_icache_range((unsigned long)p->ainsn.insn, + (unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t)); + } + + p->ainsn.boostable = 0; + return ret; +} + +void __kprobes arch_arm_kprobe(struct kprobe *p) +{ + *p->addr = BREAKPOINT_INSTRUCTION; + flush_icache_range((unsigned long) p->addr, + (unsigned long) p->addr + sizeof(kprobe_opcode_t)); +} + +void __kprobes arch_disarm_kprobe(struct kprobe *p) +{ + *p->addr = p->opcode; + flush_icache_range((unsigned long) p->addr, + (unsigned long) p->addr + sizeof(kprobe_opcode_t)); +} + +void __kprobes arch_remove_kprobe(struct kprobe *p) +{ + if (p->ainsn.insn) { + free_insn_slot(p->ainsn.insn, 0); + p->ainsn.insn = NULL; + } +} + +static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) +{ + /* We turn off async exceptions to ensure that the single step will + * be for the instruction we have the kprobe on, if we dont its + * possible we'd get the single step reported for an exception handler + * like Decrementer or External Interrupt */ + regs->msr &= ~MSR_EE; + regs->msr |= MSR_SINGLESTEP; +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + regs->msr &= ~MSR_CE; + mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); +#ifdef CONFIG_PPC_47x + isync(); +#endif +#endif + + /* + * On powerpc we should single step on the original + * instruction even if the probed insn is a trap + * variant as values in regs could play a part in + * if the trap is taken or not + */ + regs->nip = (unsigned long)p->ainsn.insn; +} + +static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + kcb->prev_kprobe.kp = kprobe_running(); + kcb->prev_kprobe.status = kcb->kprobe_status; + kcb->prev_kprobe.saved_msr = kcb->kprobe_saved_msr; +} + +static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; + kcb->kprobe_status = kcb->prev_kprobe.status; + kcb->kprobe_saved_msr = kcb->prev_kprobe.saved_msr; +} + +static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + __get_cpu_var(current_kprobe) = p; + kcb->kprobe_saved_msr = regs->msr; +} + +void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, + struct pt_regs *regs) +{ + ri->ret_addr = (kprobe_opcode_t *)regs->link; + + /* Replace the return addr with trampoline addr */ + regs->link = (unsigned long)kretprobe_trampoline; +} + +static int __kprobes kprobe_handler(struct pt_regs *regs) +{ + struct kprobe *p; + int ret = 0; + unsigned int *addr = (unsigned int *)regs->nip; + struct kprobe_ctlblk *kcb; + + /* + * We don't want to be preempted for the entire + * duration of kprobe processing + */ + preempt_disable(); + kcb = get_kprobe_ctlblk(); + + /* Check we're not actually recursing */ + if (kprobe_running()) { + p = get_kprobe(addr); + if (p) { + kprobe_opcode_t insn = *p->ainsn.insn; + if (kcb->kprobe_status == KPROBE_HIT_SS && + is_trap(insn)) { + /* Turn off 'trace' bits */ + regs->msr &= ~MSR_SINGLESTEP; + regs->msr |= kcb->kprobe_saved_msr; + goto no_kprobe; + } + /* We have reentered the kprobe_handler(), since + * another probe was hit while within the handler. + * We here save the original kprobes variables and + * just single step on the instruction of the new probe + * without calling any user handlers. + */ + save_previous_kprobe(kcb); + set_current_kprobe(p, regs, kcb); + kcb->kprobe_saved_msr = regs->msr; + kprobes_inc_nmissed_count(p); + prepare_singlestep(p, regs); + kcb->kprobe_status = KPROBE_REENTER; + return 1; + } else { + if (*addr != BREAKPOINT_INSTRUCTION) { + /* If trap variant, then it belongs not to us */ + kprobe_opcode_t cur_insn = *addr; + if (is_trap(cur_insn)) + goto no_kprobe; + /* The breakpoint instruction was removed by + * another cpu right after we hit, no further + * handling of this interrupt is appropriate + */ + ret = 1; + goto no_kprobe; + } + p = __get_cpu_var(current_kprobe); + if (p->break_handler && p->break_handler(p, regs)) { + goto ss_probe; + } + } + goto no_kprobe; + } + + p = get_kprobe(addr); + if (!p) { + if (*addr != BREAKPOINT_INSTRUCTION) { + /* + * PowerPC has multiple variants of the "trap" + * instruction. If the current instruction is a + * trap variant, it could belong to someone else + */ + kprobe_opcode_t cur_insn = *addr; + if (is_trap(cur_insn)) + goto no_kprobe; + /* + * The breakpoint instruction was removed right + * after we hit it. Another cpu has removed + * either a probepoint or a debugger breakpoint + * at this address. In either case, no further + * handling of this interrupt is appropriate. + */ + ret = 1; + } + /* Not one of ours: let kernel handle it */ + goto no_kprobe; + } + + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + set_current_kprobe(p, regs, kcb); + if (p->pre_handler && p->pre_handler(p, regs)) + /* handler has already set things up, so skip ss setup */ + return 1; + +ss_probe: + if (p->ainsn.boostable >= 0) { + unsigned int insn = *p->ainsn.insn; + + /* regs->nip is also adjusted if emulate_step returns 1 */ + ret = emulate_step(regs, insn); + if (ret > 0) { + /* + * Once this instruction has been boosted + * successfully, set the boostable flag + */ + if (unlikely(p->ainsn.boostable == 0)) + p->ainsn.boostable = 1; + + if (p->post_handler) + p->post_handler(p, regs, 0); + + kcb->kprobe_status = KPROBE_HIT_SSDONE; + reset_current_kprobe(); + preempt_enable_no_resched(); + return 1; + } else if (ret < 0) { + /* + * We don't allow kprobes on mtmsr(d)/rfi(d), etc. + * So, we should never get here... but, its still + * good to catch them, just in case... + */ + printk("Can't step on instruction %x\n", insn); + BUG(); + } else if (ret == 0) + /* This instruction can't be boosted */ + p->ainsn.boostable = -1; + } + prepare_singlestep(p, regs); + kcb->kprobe_status = KPROBE_HIT_SS; + return 1; + +no_kprobe: + preempt_enable_no_resched(); + return ret; +} + +/* + * Function return probe trampoline: + * - init_kprobes() establishes a probepoint here + * - When the probed function returns, this probe + * causes the handlers to fire + */ +static void __used kretprobe_trampoline_holder(void) +{ + asm volatile(".global kretprobe_trampoline\n" + "kretprobe_trampoline:\n" + "nop\n"); +} + +/* + * Called when the probe at kretprobe trampoline is hit + */ +static int __kprobes trampoline_probe_handler(struct kprobe *p, + struct pt_regs *regs) +{ + struct kretprobe_instance *ri = NULL; + struct hlist_head *head, empty_rp; + struct hlist_node *node, *tmp; + unsigned long flags, orig_ret_address = 0; + unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; + + INIT_HLIST_HEAD(&empty_rp); + kretprobe_hash_lock(current, &head, &flags); + + /* + * It is possible to have multiple instances associated with a given + * task either because an multiple functions in the call path + * have a return probe installed on them, and/or more than one return + * return probe was registered for a target function. + * + * We can handle this because: + * - instances are always inserted at the head of the list + * - when multiple return probes are registered for the same + * function, the first instance's ret_addr will point to the + * real return address, and all the rest will point to + * kretprobe_trampoline + */ + hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + if (ri->rp && ri->rp->handler) + ri->rp->handler(ri, regs); + + orig_ret_address = (unsigned long)ri->ret_addr; + recycle_rp_inst(ri, &empty_rp); + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + kretprobe_assert(ri, orig_ret_address, trampoline_address); + regs->nip = orig_ret_address; + + reset_current_kprobe(); + kretprobe_hash_unlock(current, &flags); + preempt_enable_no_resched(); + + hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { + hlist_del(&ri->hlist); + kfree(ri); + } + /* + * By returning a non-zero value, we are telling + * kprobe_handler() that we don't want the post_handler + * to run (and have re-enabled preemption) + */ + return 1; +} + +/* + * Called after single-stepping. p->addr is the address of the + * instruction whose first byte has been replaced by the "breakpoint" + * instruction. To avoid the SMP problems that can occur when we + * temporarily put back the original opcode to single-step, we + * single-stepped a copy of the instruction. The address of this + * copy is p->ainsn.insn. + */ +static int __kprobes post_kprobe_handler(struct pt_regs *regs) +{ + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (!cur) + return 0; + + /* make sure we got here for instruction we have a kprobe on */ + if (((unsigned long)cur->ainsn.insn + 4) != regs->nip) + return 0; + + if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + cur->post_handler(cur, regs, 0); + } + + /* Adjust nip to after the single-stepped instruction */ + regs->nip = (unsigned long)cur->addr + 4; + regs->msr |= kcb->kprobe_saved_msr; + + /*Restore back the original saved kprobes variables and continue. */ + if (kcb->kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(kcb); + goto out; + } + reset_current_kprobe(); +out: + preempt_enable_no_resched(); + + /* + * if somebody else is singlestepping across a probe point, msr + * will have DE/SE set, in which case, continue the remaining processing + * of do_debug, as if this is not a probe hit. + */ + if (regs->msr & MSR_SINGLESTEP) + return 0; + + return 1; +} + +int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) +{ + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + const struct exception_table_entry *entry; + + switch(kcb->kprobe_status) { + case KPROBE_HIT_SS: + case KPROBE_REENTER: + /* + * We are here because the instruction being single + * stepped caused a page fault. We reset the current + * kprobe and the nip points back to the probe address + * and allow the page fault handler to continue as a + * normal page fault. + */ + regs->nip = (unsigned long)cur->addr; + regs->msr &= ~MSR_SINGLESTEP; /* Turn off 'trace' bits */ + regs->msr |= kcb->kprobe_saved_msr; + if (kcb->kprobe_status == KPROBE_REENTER) + restore_previous_kprobe(kcb); + else + reset_current_kprobe(); + preempt_enable_no_resched(); + break; + case KPROBE_HIT_ACTIVE: + case KPROBE_HIT_SSDONE: + /* + * We increment the nmissed count for accounting, + * we can also use npre/npostfault count for accouting + * these specific fault cases. + */ + kprobes_inc_nmissed_count(cur); + + /* + * We come here because instructions in the pre/post + * handler caused the page_fault, this could happen + * if handler tries to access user space by + * copy_from_user(), get_user() etc. Let the + * user-specified handler try to fix it first. + */ + if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) + return 1; + + /* + * In case the user-specified fault handler returned + * zero, try to fix up. + */ + if ((entry = search_exception_tables(regs->nip)) != NULL) { + regs->nip = entry->fixup; + return 1; + } + + /* + * fixup_exception() could not handle it, + * Let do_page_fault() fix it. + */ + break; + default: + break; + } + return 0; +} + +/* + * Wrapper routine to for handling exceptions. + */ +int __kprobes kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct die_args *args = (struct die_args *)data; + int ret = NOTIFY_DONE; + + if (args->regs && user_mode(args->regs)) + return ret; + + switch (val) { + case DIE_BPT: + if (kprobe_handler(args->regs)) + ret = NOTIFY_STOP; + break; + case DIE_SSTEP: + if (post_kprobe_handler(args->regs)) + ret = NOTIFY_STOP; + break; + default: + break; + } + return ret; +} + +#ifdef CONFIG_PPC64 +unsigned long arch_deref_entry_point(void *entry) +{ + return ((func_descr_t *)entry)->entry; +} +#endif + +int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct jprobe *jp = container_of(p, struct jprobe, kp); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs)); + + /* setup return addr to the jprobe handler routine */ + regs->nip = arch_deref_entry_point(jp->entry); +#ifdef CONFIG_PPC64 + regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc); +#endif + + return 1; +} + +void __used __kprobes jprobe_return(void) +{ + asm volatile("trap" ::: "memory"); +} + +static void __used __kprobes jprobe_return_end(void) +{ +}; + +int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + /* + * FIXME - we should ideally be validating that we got here 'cos + * of the "trap" in jprobe_return() above, before restoring the + * saved regs... + */ + memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs)); + preempt_enable_no_resched(); + return 1; +} + +static struct kprobe trampoline_p = { + .addr = (kprobe_opcode_t *) &kretprobe_trampoline, + .pre_handler = trampoline_probe_handler +}; + +int __init arch_init_kprobes(void) +{ + return register_kprobe(&trampoline_p); +} + +int __kprobes arch_trampoline_kprobe(struct kprobe *p) +{ + if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline) + return 1; + + return 0; +} diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c new file mode 100644 index 00000000..62bdf238 --- /dev/null +++ b/arch/powerpc/kernel/kvm.c @@ -0,0 +1,811 @@ +/* + * Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved. + * Copyright 2010-2011 Freescale Semiconductor, Inc. + * + * Authors: + * Alexander Graf <agraf@suse.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include <linux/kvm_host.h> +#include <linux/init.h> +#include <linux/export.h> +#include <linux/kvm_para.h> +#include <linux/slab.h> +#include <linux/of.h> + +#include <asm/reg.h> +#include <asm/sections.h> +#include <asm/cacheflush.h> +#include <asm/disassemble.h> +#include <asm/ppc-opcode.h> + +#define KVM_MAGIC_PAGE (-4096L) +#define magic_var(x) KVM_MAGIC_PAGE + offsetof(struct kvm_vcpu_arch_shared, x) + +#define KVM_INST_LWZ 0x80000000 +#define KVM_INST_STW 0x90000000 +#define KVM_INST_LD 0xe8000000 +#define KVM_INST_STD 0xf8000000 +#define KVM_INST_NOP 0x60000000 +#define KVM_INST_B 0x48000000 +#define KVM_INST_B_MASK 0x03ffffff +#define KVM_INST_B_MAX 0x01ffffff +#define KVM_INST_LI 0x38000000 + +#define KVM_MASK_RT 0x03e00000 +#define KVM_RT_30 0x03c00000 +#define KVM_MASK_RB 0x0000f800 +#define KVM_INST_MFMSR 0x7c0000a6 + +#define SPR_FROM 0 +#define SPR_TO 0x100 + +#define KVM_INST_SPR(sprn, moveto) (0x7c0002a6 | \ + (((sprn) & 0x1f) << 16) | \ + (((sprn) & 0x3e0) << 6) | \ + (moveto)) + +#define KVM_INST_MFSPR(sprn) KVM_INST_SPR(sprn, SPR_FROM) +#define KVM_INST_MTSPR(sprn) KVM_INST_SPR(sprn, SPR_TO) + +#define KVM_INST_TLBSYNC 0x7c00046c +#define KVM_INST_MTMSRD_L0 0x7c000164 +#define KVM_INST_MTMSRD_L1 0x7c010164 +#define KVM_INST_MTMSR 0x7c000124 + +#define KVM_INST_WRTEE 0x7c000106 +#define KVM_INST_WRTEEI_0 0x7c000146 +#define KVM_INST_WRTEEI_1 0x7c008146 + +#define KVM_INST_MTSRIN 0x7c0001e4 + +static bool kvm_patching_worked = true; +static char kvm_tmp[1024 * 1024]; +static int kvm_tmp_index; + +static inline void kvm_patch_ins(u32 *inst, u32 new_inst) +{ + *inst = new_inst; + flush_icache_range((ulong)inst, (ulong)inst + 4); +} + +static void kvm_patch_ins_ll(u32 *inst, long addr, u32 rt) +{ +#ifdef CONFIG_64BIT + kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc)); +#else + kvm_patch_ins(inst, KVM_INST_LWZ | rt | (addr & 0x0000fffc)); +#endif +} + +static void kvm_patch_ins_ld(u32 *inst, long addr, u32 rt) +{ +#ifdef CONFIG_64BIT + kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc)); +#else + kvm_patch_ins(inst, KVM_INST_LWZ | rt | ((addr + 4) & 0x0000fffc)); +#endif +} + +static void kvm_patch_ins_lwz(u32 *inst, long addr, u32 rt) +{ + kvm_patch_ins(inst, KVM_INST_LWZ | rt | (addr & 0x0000ffff)); +} + +static void kvm_patch_ins_std(u32 *inst, long addr, u32 rt) +{ +#ifdef CONFIG_64BIT + kvm_patch_ins(inst, KVM_INST_STD | rt | (addr & 0x0000fffc)); +#else + kvm_patch_ins(inst, KVM_INST_STW | rt | ((addr + 4) & 0x0000fffc)); +#endif +} + +static void kvm_patch_ins_stw(u32 *inst, long addr, u32 rt) +{ + kvm_patch_ins(inst, KVM_INST_STW | rt | (addr & 0x0000fffc)); +} + +static void kvm_patch_ins_nop(u32 *inst) +{ + kvm_patch_ins(inst, KVM_INST_NOP); +} + +static void kvm_patch_ins_b(u32 *inst, int addr) +{ +#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_PPC_BOOK3S) + /* On relocatable kernels interrupts handlers and our code + can be in different regions, so we don't patch them */ + + if ((ulong)inst < (ulong)&__end_interrupts) + return; +#endif + + kvm_patch_ins(inst, KVM_INST_B | (addr & KVM_INST_B_MASK)); +} + +static u32 *kvm_alloc(int len) +{ + u32 *p; + + if ((kvm_tmp_index + len) > ARRAY_SIZE(kvm_tmp)) { + printk(KERN_ERR "KVM: No more space (%d + %d)\n", + kvm_tmp_index, len); + kvm_patching_worked = false; + return NULL; + } + + p = (void*)&kvm_tmp[kvm_tmp_index]; + kvm_tmp_index += len; + + return p; +} + +extern u32 kvm_emulate_mtmsrd_branch_offs; +extern u32 kvm_emulate_mtmsrd_reg_offs; +extern u32 kvm_emulate_mtmsrd_orig_ins_offs; +extern u32 kvm_emulate_mtmsrd_len; +extern u32 kvm_emulate_mtmsrd[]; + +static void kvm_patch_ins_mtmsrd(u32 *inst, u32 rt) +{ + u32 *p; + int distance_start; + int distance_end; + ulong next_inst; + + p = kvm_alloc(kvm_emulate_mtmsrd_len * 4); + if (!p) + return; + + /* Find out where we are and put everything there */ + distance_start = (ulong)p - (ulong)inst; + next_inst = ((ulong)inst + 4); + distance_end = next_inst - (ulong)&p[kvm_emulate_mtmsrd_branch_offs]; + + /* Make sure we only write valid b instructions */ + if (distance_start > KVM_INST_B_MAX) { + kvm_patching_worked = false; + return; + } + + /* Modify the chunk to fit the invocation */ + memcpy(p, kvm_emulate_mtmsrd, kvm_emulate_mtmsrd_len * 4); + p[kvm_emulate_mtmsrd_branch_offs] |= distance_end & KVM_INST_B_MASK; + switch (get_rt(rt)) { + case 30: + kvm_patch_ins_ll(&p[kvm_emulate_mtmsrd_reg_offs], + magic_var(scratch2), KVM_RT_30); + break; + case 31: + kvm_patch_ins_ll(&p[kvm_emulate_mtmsrd_reg_offs], + magic_var(scratch1), KVM_RT_30); + break; + default: + p[kvm_emulate_mtmsrd_reg_offs] |= rt; + break; + } + + p[kvm_emulate_mtmsrd_orig_ins_offs] = *inst; + flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtmsrd_len * 4); + + /* Patch the invocation */ + kvm_patch_ins_b(inst, distance_start); +} + +extern u32 kvm_emulate_mtmsr_branch_offs; +extern u32 kvm_emulate_mtmsr_reg1_offs; +extern u32 kvm_emulate_mtmsr_reg2_offs; +extern u32 kvm_emulate_mtmsr_orig_ins_offs; +extern u32 kvm_emulate_mtmsr_len; +extern u32 kvm_emulate_mtmsr[]; + +static void kvm_patch_ins_mtmsr(u32 *inst, u32 rt) +{ + u32 *p; + int distance_start; + int distance_end; + ulong next_inst; + + p = kvm_alloc(kvm_emulate_mtmsr_len * 4); + if (!p) + return; + + /* Find out where we are and put everything there */ + distance_start = (ulong)p - (ulong)inst; + next_inst = ((ulong)inst + 4); + distance_end = next_inst - (ulong)&p[kvm_emulate_mtmsr_branch_offs]; + + /* Make sure we only write valid b instructions */ + if (distance_start > KVM_INST_B_MAX) { + kvm_patching_worked = false; + return; + } + + /* Modify the chunk to fit the invocation */ + memcpy(p, kvm_emulate_mtmsr, kvm_emulate_mtmsr_len * 4); + p[kvm_emulate_mtmsr_branch_offs] |= distance_end & KVM_INST_B_MASK; + + /* Make clobbered registers work too */ + switch (get_rt(rt)) { + case 30: + kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg1_offs], + magic_var(scratch2), KVM_RT_30); + kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg2_offs], + magic_var(scratch2), KVM_RT_30); + break; + case 31: + kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg1_offs], + magic_var(scratch1), KVM_RT_30); + kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg2_offs], + magic_var(scratch1), KVM_RT_30); + break; + default: + p[kvm_emulate_mtmsr_reg1_offs] |= rt; + p[kvm_emulate_mtmsr_reg2_offs] |= rt; + break; + } + + p[kvm_emulate_mtmsr_orig_ins_offs] = *inst; + flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtmsr_len * 4); + + /* Patch the invocation */ + kvm_patch_ins_b(inst, distance_start); +} + +#ifdef CONFIG_BOOKE + +extern u32 kvm_emulate_wrtee_branch_offs; +extern u32 kvm_emulate_wrtee_reg_offs; +extern u32 kvm_emulate_wrtee_orig_ins_offs; +extern u32 kvm_emulate_wrtee_len; +extern u32 kvm_emulate_wrtee[]; + +static void kvm_patch_ins_wrtee(u32 *inst, u32 rt, int imm_one) +{ + u32 *p; + int distance_start; + int distance_end; + ulong next_inst; + + p = kvm_alloc(kvm_emulate_wrtee_len * 4); + if (!p) + return; + + /* Find out where we are and put everything there */ + distance_start = (ulong)p - (ulong)inst; + next_inst = ((ulong)inst + 4); + distance_end = next_inst - (ulong)&p[kvm_emulate_wrtee_branch_offs]; + + /* Make sure we only write valid b instructions */ + if (distance_start > KVM_INST_B_MAX) { + kvm_patching_worked = false; + return; + } + + /* Modify the chunk to fit the invocation */ + memcpy(p, kvm_emulate_wrtee, kvm_emulate_wrtee_len * 4); + p[kvm_emulate_wrtee_branch_offs] |= distance_end & KVM_INST_B_MASK; + + if (imm_one) { + p[kvm_emulate_wrtee_reg_offs] = + KVM_INST_LI | __PPC_RT(30) | MSR_EE; + } else { + /* Make clobbered registers work too */ + switch (get_rt(rt)) { + case 30: + kvm_patch_ins_ll(&p[kvm_emulate_wrtee_reg_offs], + magic_var(scratch2), KVM_RT_30); + break; + case 31: + kvm_patch_ins_ll(&p[kvm_emulate_wrtee_reg_offs], + magic_var(scratch1), KVM_RT_30); + break; + default: + p[kvm_emulate_wrtee_reg_offs] |= rt; + break; + } + } + + p[kvm_emulate_wrtee_orig_ins_offs] = *inst; + flush_icache_range((ulong)p, (ulong)p + kvm_emulate_wrtee_len * 4); + + /* Patch the invocation */ + kvm_patch_ins_b(inst, distance_start); +} + +extern u32 kvm_emulate_wrteei_0_branch_offs; +extern u32 kvm_emulate_wrteei_0_len; +extern u32 kvm_emulate_wrteei_0[]; + +static void kvm_patch_ins_wrteei_0(u32 *inst) +{ + u32 *p; + int distance_start; + int distance_end; + ulong next_inst; + + p = kvm_alloc(kvm_emulate_wrteei_0_len * 4); + if (!p) + return; + + /* Find out where we are and put everything there */ + distance_start = (ulong)p - (ulong)inst; + next_inst = ((ulong)inst + 4); + distance_end = next_inst - (ulong)&p[kvm_emulate_wrteei_0_branch_offs]; + + /* Make sure we only write valid b instructions */ + if (distance_start > KVM_INST_B_MAX) { + kvm_patching_worked = false; + return; + } + + memcpy(p, kvm_emulate_wrteei_0, kvm_emulate_wrteei_0_len * 4); + p[kvm_emulate_wrteei_0_branch_offs] |= distance_end & KVM_INST_B_MASK; + flush_icache_range((ulong)p, (ulong)p + kvm_emulate_wrteei_0_len * 4); + + /* Patch the invocation */ + kvm_patch_ins_b(inst, distance_start); +} + +#endif + +#ifdef CONFIG_PPC_BOOK3S_32 + +extern u32 kvm_emulate_mtsrin_branch_offs; +extern u32 kvm_emulate_mtsrin_reg1_offs; +extern u32 kvm_emulate_mtsrin_reg2_offs; +extern u32 kvm_emulate_mtsrin_orig_ins_offs; +extern u32 kvm_emulate_mtsrin_len; +extern u32 kvm_emulate_mtsrin[]; + +static void kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb) +{ + u32 *p; + int distance_start; + int distance_end; + ulong next_inst; + + p = kvm_alloc(kvm_emulate_mtsrin_len * 4); + if (!p) + return; + + /* Find out where we are and put everything there */ + distance_start = (ulong)p - (ulong)inst; + next_inst = ((ulong)inst + 4); + distance_end = next_inst - (ulong)&p[kvm_emulate_mtsrin_branch_offs]; + + /* Make sure we only write valid b instructions */ + if (distance_start > KVM_INST_B_MAX) { + kvm_patching_worked = false; + return; + } + + /* Modify the chunk to fit the invocation */ + memcpy(p, kvm_emulate_mtsrin, kvm_emulate_mtsrin_len * 4); + p[kvm_emulate_mtsrin_branch_offs] |= distance_end & KVM_INST_B_MASK; + p[kvm_emulate_mtsrin_reg1_offs] |= (rb << 10); + p[kvm_emulate_mtsrin_reg2_offs] |= rt; + p[kvm_emulate_mtsrin_orig_ins_offs] = *inst; + flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtsrin_len * 4); + + /* Patch the invocation */ + kvm_patch_ins_b(inst, distance_start); +} + +#endif + +static void kvm_map_magic_page(void *data) +{ + u32 *features = data; + + ulong in[8]; + ulong out[8]; + + in[0] = KVM_MAGIC_PAGE; + in[1] = KVM_MAGIC_PAGE; + + kvm_hypercall(in, out, HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE); + + *features = out[0]; +} + +static void kvm_check_ins(u32 *inst, u32 features) +{ + u32 _inst = *inst; + u32 inst_no_rt = _inst & ~KVM_MASK_RT; + u32 inst_rt = _inst & KVM_MASK_RT; + + switch (inst_no_rt) { + /* Loads */ + case KVM_INST_MFMSR: + kvm_patch_ins_ld(inst, magic_var(msr), inst_rt); + break; + case KVM_INST_MFSPR(SPRN_SPRG0): + kvm_patch_ins_ld(inst, magic_var(sprg0), inst_rt); + break; + case KVM_INST_MFSPR(SPRN_SPRG1): + kvm_patch_ins_ld(inst, magic_var(sprg1), inst_rt); + break; + case KVM_INST_MFSPR(SPRN_SPRG2): + kvm_patch_ins_ld(inst, magic_var(sprg2), inst_rt); + break; + case KVM_INST_MFSPR(SPRN_SPRG3): + kvm_patch_ins_ld(inst, magic_var(sprg3), inst_rt); + break; + case KVM_INST_MFSPR(SPRN_SRR0): + kvm_patch_ins_ld(inst, magic_var(srr0), inst_rt); + break; + case KVM_INST_MFSPR(SPRN_SRR1): + kvm_patch_ins_ld(inst, magic_var(srr1), inst_rt); + break; +#ifdef CONFIG_BOOKE + case KVM_INST_MFSPR(SPRN_DEAR): +#else + case KVM_INST_MFSPR(SPRN_DAR): +#endif + kvm_patch_ins_ld(inst, magic_var(dar), inst_rt); + break; + case KVM_INST_MFSPR(SPRN_DSISR): + kvm_patch_ins_lwz(inst, magic_var(dsisr), inst_rt); + break; + +#ifdef CONFIG_PPC_BOOK3E_MMU + case KVM_INST_MFSPR(SPRN_MAS0): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_lwz(inst, magic_var(mas0), inst_rt); + break; + case KVM_INST_MFSPR(SPRN_MAS1): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_lwz(inst, magic_var(mas1), inst_rt); + break; + case KVM_INST_MFSPR(SPRN_MAS2): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_ld(inst, magic_var(mas2), inst_rt); + break; + case KVM_INST_MFSPR(SPRN_MAS3): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_lwz(inst, magic_var(mas7_3) + 4, inst_rt); + break; + case KVM_INST_MFSPR(SPRN_MAS4): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_lwz(inst, magic_var(mas4), inst_rt); + break; + case KVM_INST_MFSPR(SPRN_MAS6): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_lwz(inst, magic_var(mas6), inst_rt); + break; + case KVM_INST_MFSPR(SPRN_MAS7): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_lwz(inst, magic_var(mas7_3), inst_rt); + break; +#endif /* CONFIG_PPC_BOOK3E_MMU */ + + case KVM_INST_MFSPR(SPRN_SPRG4): +#ifdef CONFIG_BOOKE + case KVM_INST_MFSPR(SPRN_SPRG4R): +#endif + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_ld(inst, magic_var(sprg4), inst_rt); + break; + case KVM_INST_MFSPR(SPRN_SPRG5): +#ifdef CONFIG_BOOKE + case KVM_INST_MFSPR(SPRN_SPRG5R): +#endif + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_ld(inst, magic_var(sprg5), inst_rt); + break; + case KVM_INST_MFSPR(SPRN_SPRG6): +#ifdef CONFIG_BOOKE + case KVM_INST_MFSPR(SPRN_SPRG6R): +#endif + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_ld(inst, magic_var(sprg6), inst_rt); + break; + case KVM_INST_MFSPR(SPRN_SPRG7): +#ifdef CONFIG_BOOKE + case KVM_INST_MFSPR(SPRN_SPRG7R): +#endif + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_ld(inst, magic_var(sprg7), inst_rt); + break; + +#ifdef CONFIG_BOOKE + case KVM_INST_MFSPR(SPRN_ESR): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_lwz(inst, magic_var(esr), inst_rt); + break; +#endif + + case KVM_INST_MFSPR(SPRN_PIR): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_lwz(inst, magic_var(pir), inst_rt); + break; + + + /* Stores */ + case KVM_INST_MTSPR(SPRN_SPRG0): + kvm_patch_ins_std(inst, magic_var(sprg0), inst_rt); + break; + case KVM_INST_MTSPR(SPRN_SPRG1): + kvm_patch_ins_std(inst, magic_var(sprg1), inst_rt); + break; + case KVM_INST_MTSPR(SPRN_SPRG2): + kvm_patch_ins_std(inst, magic_var(sprg2), inst_rt); + break; + case KVM_INST_MTSPR(SPRN_SPRG3): + kvm_patch_ins_std(inst, magic_var(sprg3), inst_rt); + break; + case KVM_INST_MTSPR(SPRN_SRR0): + kvm_patch_ins_std(inst, magic_var(srr0), inst_rt); + break; + case KVM_INST_MTSPR(SPRN_SRR1): + kvm_patch_ins_std(inst, magic_var(srr1), inst_rt); + break; +#ifdef CONFIG_BOOKE + case KVM_INST_MTSPR(SPRN_DEAR): +#else + case KVM_INST_MTSPR(SPRN_DAR): +#endif + kvm_patch_ins_std(inst, magic_var(dar), inst_rt); + break; + case KVM_INST_MTSPR(SPRN_DSISR): + kvm_patch_ins_stw(inst, magic_var(dsisr), inst_rt); + break; +#ifdef CONFIG_PPC_BOOK3E_MMU + case KVM_INST_MTSPR(SPRN_MAS0): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_stw(inst, magic_var(mas0), inst_rt); + break; + case KVM_INST_MTSPR(SPRN_MAS1): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_stw(inst, magic_var(mas1), inst_rt); + break; + case KVM_INST_MTSPR(SPRN_MAS2): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_std(inst, magic_var(mas2), inst_rt); + break; + case KVM_INST_MTSPR(SPRN_MAS3): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_stw(inst, magic_var(mas7_3) + 4, inst_rt); + break; + case KVM_INST_MTSPR(SPRN_MAS4): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_stw(inst, magic_var(mas4), inst_rt); + break; + case KVM_INST_MTSPR(SPRN_MAS6): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_stw(inst, magic_var(mas6), inst_rt); + break; + case KVM_INST_MTSPR(SPRN_MAS7): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_stw(inst, magic_var(mas7_3), inst_rt); + break; +#endif /* CONFIG_PPC_BOOK3E_MMU */ + + case KVM_INST_MTSPR(SPRN_SPRG4): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_std(inst, magic_var(sprg4), inst_rt); + break; + case KVM_INST_MTSPR(SPRN_SPRG5): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_std(inst, magic_var(sprg5), inst_rt); + break; + case KVM_INST_MTSPR(SPRN_SPRG6): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_std(inst, magic_var(sprg6), inst_rt); + break; + case KVM_INST_MTSPR(SPRN_SPRG7): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_std(inst, magic_var(sprg7), inst_rt); + break; + +#ifdef CONFIG_BOOKE + case KVM_INST_MTSPR(SPRN_ESR): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_stw(inst, magic_var(esr), inst_rt); + break; +#endif + + /* Nops */ + case KVM_INST_TLBSYNC: + kvm_patch_ins_nop(inst); + break; + + /* Rewrites */ + case KVM_INST_MTMSRD_L1: + kvm_patch_ins_mtmsrd(inst, inst_rt); + break; + case KVM_INST_MTMSR: + case KVM_INST_MTMSRD_L0: + kvm_patch_ins_mtmsr(inst, inst_rt); + break; +#ifdef CONFIG_BOOKE + case KVM_INST_WRTEE: + kvm_patch_ins_wrtee(inst, inst_rt, 0); + break; +#endif + } + + switch (inst_no_rt & ~KVM_MASK_RB) { +#ifdef CONFIG_PPC_BOOK3S_32 + case KVM_INST_MTSRIN: + if (features & KVM_MAGIC_FEAT_SR) { + u32 inst_rb = _inst & KVM_MASK_RB; + kvm_patch_ins_mtsrin(inst, inst_rt, inst_rb); + } + break; + break; +#endif + } + + switch (_inst) { +#ifdef CONFIG_BOOKE + case KVM_INST_WRTEEI_0: + kvm_patch_ins_wrteei_0(inst); + break; + + case KVM_INST_WRTEEI_1: + kvm_patch_ins_wrtee(inst, 0, 1); + break; +#endif + } +} + +extern u32 kvm_template_start[]; +extern u32 kvm_template_end[]; + +static void kvm_use_magic_page(void) +{ + u32 *p; + u32 *start, *end; + u32 tmp; + u32 features; + + /* Tell the host to map the magic page to -4096 on all CPUs */ + on_each_cpu(kvm_map_magic_page, &features, 1); + + /* Quick self-test to see if the mapping works */ + if (__get_user(tmp, (u32*)KVM_MAGIC_PAGE)) { + kvm_patching_worked = false; + return; + } + + /* Now loop through all code and find instructions */ + start = (void*)_stext; + end = (void*)_etext; + + /* + * Being interrupted in the middle of patching would + * be bad for SPRG4-7, which KVM can't keep in sync + * with emulated accesses because reads don't trap. + */ + local_irq_disable(); + + for (p = start; p < end; p++) { + /* Avoid patching the template code */ + if (p >= kvm_template_start && p < kvm_template_end) { + p = kvm_template_end - 1; + continue; + } + kvm_check_ins(p, features); + } + + local_irq_enable(); + + printk(KERN_INFO "KVM: Live patching for a fast VM %s\n", + kvm_patching_worked ? "worked" : "failed"); +} + +unsigned long kvm_hypercall(unsigned long *in, + unsigned long *out, + unsigned long nr) +{ + unsigned long register r0 asm("r0"); + unsigned long register r3 asm("r3") = in[0]; + unsigned long register r4 asm("r4") = in[1]; + unsigned long register r5 asm("r5") = in[2]; + unsigned long register r6 asm("r6") = in[3]; + unsigned long register r7 asm("r7") = in[4]; + unsigned long register r8 asm("r8") = in[5]; + unsigned long register r9 asm("r9") = in[6]; + unsigned long register r10 asm("r10") = in[7]; + unsigned long register r11 asm("r11") = nr; + unsigned long register r12 asm("r12"); + + asm volatile("bl kvm_hypercall_start" + : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6), + "=r"(r7), "=r"(r8), "=r"(r9), "=r"(r10), "=r"(r11), + "=r"(r12) + : "r"(r3), "r"(r4), "r"(r5), "r"(r6), "r"(r7), "r"(r8), + "r"(r9), "r"(r10), "r"(r11) + : "memory", "cc", "xer", "ctr", "lr"); + + out[0] = r4; + out[1] = r5; + out[2] = r6; + out[3] = r7; + out[4] = r8; + out[5] = r9; + out[6] = r10; + out[7] = r11; + + return r3; +} +EXPORT_SYMBOL_GPL(kvm_hypercall); + +static int kvm_para_setup(void) +{ + extern u32 kvm_hypercall_start; + struct device_node *hyper_node; + u32 *insts; + int len, i; + + hyper_node = of_find_node_by_path("/hypervisor"); + if (!hyper_node) + return -1; + + insts = (u32*)of_get_property(hyper_node, "hcall-instructions", &len); + if (len % 4) + return -1; + if (len > (4 * 4)) + return -1; + + for (i = 0; i < (len / 4); i++) + kvm_patch_ins(&(&kvm_hypercall_start)[i], insts[i]); + + return 0; +} + +static __init void kvm_free_tmp(void) +{ + unsigned long start, end; + + start = (ulong)&kvm_tmp[kvm_tmp_index + (PAGE_SIZE - 1)] & PAGE_MASK; + end = (ulong)&kvm_tmp[ARRAY_SIZE(kvm_tmp)] & PAGE_MASK; + + /* Free the tmp space we don't need */ + for (; start < end; start += PAGE_SIZE) { + ClearPageReserved(virt_to_page(start)); + init_page_count(virt_to_page(start)); + free_page(start); + totalram_pages++; + } +} + +static int __init kvm_guest_init(void) +{ + if (!kvm_para_available()) + goto free_tmp; + + if (kvm_para_setup()) + goto free_tmp; + + if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE)) + kvm_use_magic_page(); + +#ifdef CONFIG_PPC_BOOK3S_64 + /* Enable napping */ + powersave_nap = 1; +#endif + +free_tmp: + kvm_free_tmp(); + + return 0; +} + +postcore_initcall(kvm_guest_init); diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S new file mode 100644 index 00000000..e291cf3c --- /dev/null +++ b/arch/powerpc/kernel/kvm_emul.S @@ -0,0 +1,358 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright SUSE Linux Products GmbH 2010 + * Copyright 2010-2011 Freescale Semiconductor, Inc. + * + * Authors: Alexander Graf <agraf@suse.de> + */ + +#include <asm/ppc_asm.h> +#include <asm/kvm_asm.h> +#include <asm/reg.h> +#include <asm/page.h> +#include <asm/asm-offsets.h> + +/* Hypercall entry point. Will be patched with device tree instructions. */ + +.global kvm_hypercall_start +kvm_hypercall_start: + li r3, -1 + nop + nop + nop + blr + +#define KVM_MAGIC_PAGE (-4096) + +#ifdef CONFIG_64BIT +#define LL64(reg, offs, reg2) ld reg, (offs)(reg2) +#define STL64(reg, offs, reg2) std reg, (offs)(reg2) +#else +#define LL64(reg, offs, reg2) lwz reg, (offs + 4)(reg2) +#define STL64(reg, offs, reg2) stw reg, (offs + 4)(reg2) +#endif + +#define SCRATCH_SAVE \ + /* Enable critical section. We are critical if \ + shared->critical == r1 */ \ + STL64(r1, KVM_MAGIC_PAGE + KVM_MAGIC_CRITICAL, 0); \ + \ + /* Save state */ \ + PPC_STL r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH1)(0); \ + PPC_STL r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH2)(0); \ + mfcr r31; \ + stw r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH3)(0); + +#define SCRATCH_RESTORE \ + /* Restore state */ \ + PPC_LL r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH1)(0); \ + lwz r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH3)(0); \ + mtcr r30; \ + PPC_LL r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH2)(0); \ + \ + /* Disable critical section. We are critical if \ + shared->critical == r1 and r2 is always != r1 */ \ + STL64(r2, KVM_MAGIC_PAGE + KVM_MAGIC_CRITICAL, 0); + +.global kvm_template_start +kvm_template_start: + +.global kvm_emulate_mtmsrd +kvm_emulate_mtmsrd: + + SCRATCH_SAVE + + /* Put MSR & ~(MSR_EE|MSR_RI) in r31 */ + LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) + lis r30, (~(MSR_EE | MSR_RI))@h + ori r30, r30, (~(MSR_EE | MSR_RI))@l + and r31, r31, r30 + + /* OR the register's (MSR_EE|MSR_RI) on MSR */ +kvm_emulate_mtmsrd_reg: + ori r30, r0, 0 + andi. r30, r30, (MSR_EE|MSR_RI) + or r31, r31, r30 + + /* Put MSR back into magic page */ + STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) + + /* Check if we have to fetch an interrupt */ + lwz r31, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0) + cmpwi r31, 0 + beq+ no_check + + /* Check if we may trigger an interrupt */ + andi. r30, r30, MSR_EE + beq no_check + + SCRATCH_RESTORE + + /* Nag hypervisor */ +kvm_emulate_mtmsrd_orig_ins: + tlbsync + + b kvm_emulate_mtmsrd_branch + +no_check: + + SCRATCH_RESTORE + + /* Go back to caller */ +kvm_emulate_mtmsrd_branch: + b . +kvm_emulate_mtmsrd_end: + +.global kvm_emulate_mtmsrd_branch_offs +kvm_emulate_mtmsrd_branch_offs: + .long (kvm_emulate_mtmsrd_branch - kvm_emulate_mtmsrd) / 4 + +.global kvm_emulate_mtmsrd_reg_offs +kvm_emulate_mtmsrd_reg_offs: + .long (kvm_emulate_mtmsrd_reg - kvm_emulate_mtmsrd) / 4 + +.global kvm_emulate_mtmsrd_orig_ins_offs +kvm_emulate_mtmsrd_orig_ins_offs: + .long (kvm_emulate_mtmsrd_orig_ins - kvm_emulate_mtmsrd) / 4 + +.global kvm_emulate_mtmsrd_len +kvm_emulate_mtmsrd_len: + .long (kvm_emulate_mtmsrd_end - kvm_emulate_mtmsrd) / 4 + + +#define MSR_SAFE_BITS (MSR_EE | MSR_CE | MSR_ME | MSR_RI) +#define MSR_CRITICAL_BITS ~MSR_SAFE_BITS + +.global kvm_emulate_mtmsr +kvm_emulate_mtmsr: + + SCRATCH_SAVE + + /* Fetch old MSR in r31 */ + LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) + + /* Find the changed bits between old and new MSR */ +kvm_emulate_mtmsr_reg1: + ori r30, r0, 0 + xor r31, r30, r31 + + /* Check if we need to really do mtmsr */ + LOAD_REG_IMMEDIATE(r30, MSR_CRITICAL_BITS) + and. r31, r31, r30 + + /* No critical bits changed? Maybe we can stay in the guest. */ + beq maybe_stay_in_guest + +do_mtmsr: + + SCRATCH_RESTORE + + /* Just fire off the mtmsr if it's critical */ +kvm_emulate_mtmsr_orig_ins: + mtmsr r0 + + b kvm_emulate_mtmsr_branch + +maybe_stay_in_guest: + + /* Get the target register in r30 */ +kvm_emulate_mtmsr_reg2: + ori r30, r0, 0 + + /* Put MSR into magic page because we don't call mtmsr */ + STL64(r30, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) + + /* Check if we have to fetch an interrupt */ + lwz r31, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0) + cmpwi r31, 0 + beq+ no_mtmsr + + /* Check if we may trigger an interrupt */ + andi. r31, r30, MSR_EE + bne do_mtmsr + +no_mtmsr: + + SCRATCH_RESTORE + + /* Go back to caller */ +kvm_emulate_mtmsr_branch: + b . +kvm_emulate_mtmsr_end: + +.global kvm_emulate_mtmsr_branch_offs +kvm_emulate_mtmsr_branch_offs: + .long (kvm_emulate_mtmsr_branch - kvm_emulate_mtmsr) / 4 + +.global kvm_emulate_mtmsr_reg1_offs +kvm_emulate_mtmsr_reg1_offs: + .long (kvm_emulate_mtmsr_reg1 - kvm_emulate_mtmsr) / 4 + +.global kvm_emulate_mtmsr_reg2_offs +kvm_emulate_mtmsr_reg2_offs: + .long (kvm_emulate_mtmsr_reg2 - kvm_emulate_mtmsr) / 4 + +.global kvm_emulate_mtmsr_orig_ins_offs +kvm_emulate_mtmsr_orig_ins_offs: + .long (kvm_emulate_mtmsr_orig_ins - kvm_emulate_mtmsr) / 4 + +.global kvm_emulate_mtmsr_len +kvm_emulate_mtmsr_len: + .long (kvm_emulate_mtmsr_end - kvm_emulate_mtmsr) / 4 + +/* also used for wrteei 1 */ +.global kvm_emulate_wrtee +kvm_emulate_wrtee: + + SCRATCH_SAVE + + /* Fetch old MSR in r31 */ + LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) + + /* Insert new MSR[EE] */ +kvm_emulate_wrtee_reg: + ori r30, r0, 0 + rlwimi r31, r30, 0, MSR_EE + + /* + * If MSR[EE] is now set, check for a pending interrupt. + * We could skip this if MSR[EE] was already on, but that + * should be rare, so don't bother. + */ + andi. r30, r30, MSR_EE + + /* Put MSR into magic page because we don't call wrtee */ + STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) + + beq no_wrtee + + /* Check if we have to fetch an interrupt */ + lwz r30, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0) + cmpwi r30, 0 + bne do_wrtee + +no_wrtee: + SCRATCH_RESTORE + + /* Go back to caller */ +kvm_emulate_wrtee_branch: + b . + +do_wrtee: + SCRATCH_RESTORE + + /* Just fire off the wrtee if it's critical */ +kvm_emulate_wrtee_orig_ins: + wrtee r0 + + b kvm_emulate_wrtee_branch + +kvm_emulate_wrtee_end: + +.global kvm_emulate_wrtee_branch_offs +kvm_emulate_wrtee_branch_offs: + .long (kvm_emulate_wrtee_branch - kvm_emulate_wrtee) / 4 + +.global kvm_emulate_wrtee_reg_offs +kvm_emulate_wrtee_reg_offs: + .long (kvm_emulate_wrtee_reg - kvm_emulate_wrtee) / 4 + +.global kvm_emulate_wrtee_orig_ins_offs +kvm_emulate_wrtee_orig_ins_offs: + .long (kvm_emulate_wrtee_orig_ins - kvm_emulate_wrtee) / 4 + +.global kvm_emulate_wrtee_len +kvm_emulate_wrtee_len: + .long (kvm_emulate_wrtee_end - kvm_emulate_wrtee) / 4 + +.global kvm_emulate_wrteei_0 +kvm_emulate_wrteei_0: + SCRATCH_SAVE + + /* Fetch old MSR in r31 */ + LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) + + /* Remove MSR_EE from old MSR */ + rlwinm r31, r31, 0, ~MSR_EE + + /* Write new MSR value back */ + STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) + + SCRATCH_RESTORE + + /* Go back to caller */ +kvm_emulate_wrteei_0_branch: + b . +kvm_emulate_wrteei_0_end: + +.global kvm_emulate_wrteei_0_branch_offs +kvm_emulate_wrteei_0_branch_offs: + .long (kvm_emulate_wrteei_0_branch - kvm_emulate_wrteei_0) / 4 + +.global kvm_emulate_wrteei_0_len +kvm_emulate_wrteei_0_len: + .long (kvm_emulate_wrteei_0_end - kvm_emulate_wrteei_0) / 4 + +.global kvm_emulate_mtsrin +kvm_emulate_mtsrin: + + SCRATCH_SAVE + + LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) + andi. r31, r31, MSR_DR | MSR_IR + beq kvm_emulate_mtsrin_reg1 + + SCRATCH_RESTORE + +kvm_emulate_mtsrin_orig_ins: + nop + b kvm_emulate_mtsrin_branch + +kvm_emulate_mtsrin_reg1: + /* rX >> 26 */ + rlwinm r30,r0,6,26,29 + +kvm_emulate_mtsrin_reg2: + stw r0, (KVM_MAGIC_PAGE + KVM_MAGIC_SR)(r30) + + SCRATCH_RESTORE + + /* Go back to caller */ +kvm_emulate_mtsrin_branch: + b . +kvm_emulate_mtsrin_end: + +.global kvm_emulate_mtsrin_branch_offs +kvm_emulate_mtsrin_branch_offs: + .long (kvm_emulate_mtsrin_branch - kvm_emulate_mtsrin) / 4 + +.global kvm_emulate_mtsrin_reg1_offs +kvm_emulate_mtsrin_reg1_offs: + .long (kvm_emulate_mtsrin_reg1 - kvm_emulate_mtsrin) / 4 + +.global kvm_emulate_mtsrin_reg2_offs +kvm_emulate_mtsrin_reg2_offs: + .long (kvm_emulate_mtsrin_reg2 - kvm_emulate_mtsrin) / 4 + +.global kvm_emulate_mtsrin_orig_ins_offs +kvm_emulate_mtsrin_orig_ins_offs: + .long (kvm_emulate_mtsrin_orig_ins - kvm_emulate_mtsrin) / 4 + +.global kvm_emulate_mtsrin_len +kvm_emulate_mtsrin_len: + .long (kvm_emulate_mtsrin_end - kvm_emulate_mtsrin) / 4 + +.global kvm_template_end +kvm_template_end: diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S new file mode 100644 index 00000000..97ec8557 --- /dev/null +++ b/arch/powerpc/kernel/l2cr_6xx.S @@ -0,0 +1,470 @@ +/* + L2CR functions + Copyright © 1997-1998 by PowerLogix R & D, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ +/* + Thur, Dec. 12, 1998. + - First public release, contributed by PowerLogix. + *********** + Sat, Aug. 7, 1999. + - Terry: Made sure code disabled interrupts before running. (Previously + it was assumed interrupts were already disabled). + - Terry: Updated for tentative G4 support. 4MB of memory is now flushed + instead of 2MB. (Prob. only 3 is necessary). + - Terry: Updated for workaround to HID0[DPM] processor bug + during global invalidates. + *********** + Thu, July 13, 2000. + - Terry: Added isync to correct for an errata. + + 22 August 2001. + - DanM: Finally added the 7450 patch I've had for the past + several months. The L2CR is similar, but I'm going + to assume the user of this functions knows what they + are doing. + + Author: Terry Greeniaus (tgree@phys.ualberta.ca) + Please e-mail updates to this file to me, thanks! +*/ +#include <asm/processor.h> +#include <asm/cputable.h> +#include <asm/ppc_asm.h> +#include <asm/cache.h> +#include <asm/page.h> + +/* Usage: + + When setting the L2CR register, you must do a few special + things. If you are enabling the cache, you must perform a + global invalidate. If you are disabling the cache, you must + flush the cache contents first. This routine takes care of + doing these things. When first enabling the cache, make sure + you pass in the L2CR you want, as well as passing in the + global invalidate bit set. A global invalidate will only be + performed if the L2I bit is set in applyThis. When enabling + the cache, you should also set the L2E bit in applyThis. If + you want to modify the L2CR contents after the cache has been + enabled, the recommended procedure is to first call + __setL2CR(0) to disable the cache and then call it again with + the new values for L2CR. Examples: + + _setL2CR(0) - disables the cache + _setL2CR(0xB3A04000) - enables my G3 upgrade card: + - L2E set to turn on the cache + - L2SIZ set to 1MB + - L2CLK set to 1:1 + - L2RAM set to pipelined synchronous late-write + - L2I set to perform a global invalidation + - L2OH set to 0.5 nS + - L2DF set because this upgrade card + requires it + + A similar call should work for your card. You need to know + the correct setting for your card and then place them in the + fields I have outlined above. Other fields support optional + features, such as L2DO which caches only data, or L2TS which + causes cache pushes from the L1 cache to go to the L2 cache + instead of to main memory. + +IMPORTANT: + Starting with the 7450, the bits in this register have moved + or behave differently. The Enable, Parity Enable, Size, + and L2 Invalidate are the only bits that have not moved. + The size is read-only for these processors with internal L2 + cache, and the invalidate is a control as well as status. + -- Dan + +*/ +/* + * Summary: this procedure ignores the L2I bit in the value passed in, + * flushes the cache if it was already enabled, always invalidates the + * cache, then enables the cache if the L2E bit is set in the value + * passed in. + * -- paulus. + */ +_GLOBAL(_set_L2CR) + /* Make sure this is a 750 or 7400 chip */ +BEGIN_FTR_SECTION + li r3,-1 + blr +END_FTR_SECTION_IFCLR(CPU_FTR_L2CR) + + mflr r9 + + /* Stop DST streams */ +BEGIN_FTR_SECTION + DSSALL + sync +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + + /* Turn off interrupts and data relocation. */ + mfmsr r7 /* Save MSR in r7 */ + rlwinm r4,r7,0,17,15 + rlwinm r4,r4,0,28,26 /* Turn off DR bit */ + sync + mtmsr r4 + isync + + /* Before we perform the global invalidation, we must disable dynamic + * power management via HID0[DPM] to work around a processor bug where + * DPM can possibly interfere with the state machine in the processor + * that invalidates the L2 cache tags. + */ + mfspr r8,SPRN_HID0 /* Save HID0 in r8 */ + rlwinm r4,r8,0,12,10 /* Turn off HID0[DPM] */ + sync + mtspr SPRN_HID0,r4 /* Disable DPM */ + sync + + /* Get the current enable bit of the L2CR into r4 */ + mfspr r4,SPRN_L2CR + + /* Tweak some bits */ + rlwinm r5,r3,0,0,0 /* r5 contains the new enable bit */ + rlwinm r3,r3,0,11,9 /* Turn off the invalidate bit */ + rlwinm r3,r3,0,1,31 /* Turn off the enable bit */ + + /* Check to see if we need to flush */ + rlwinm. r4,r4,0,0,0 + beq 2f + + /* Flush the cache. First, read the first 4MB of memory (physical) to + * put new data in the cache. (Actually we only need + * the size of the L2 cache plus the size of the L1 cache, but 4MB will + * cover everything just to be safe). + */ + + /**** Might be a good idea to set L2DO here - to prevent instructions + from getting into the cache. But since we invalidate + the next time we enable the cache it doesn't really matter. + Don't do this unless you accommodate all processor variations. + The bit moved on the 7450..... + ****/ + +BEGIN_FTR_SECTION + /* Disable L2 prefetch on some 745x and try to ensure + * L2 prefetch engines are idle. As explained by errata + * text, we can't be sure they are, we just hope very hard + * that well be enough (sic !). At least I noticed Apple + * doesn't even bother doing the dcbf's here... + */ + mfspr r4,SPRN_MSSCR0 + rlwinm r4,r4,0,0,29 + sync + mtspr SPRN_MSSCR0,r4 + sync + isync + lis r4,KERNELBASE@h + dcbf 0,r4 + dcbf 0,r4 + dcbf 0,r4 + dcbf 0,r4 +END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450) + + /* TODO: use HW flush assist when available */ + + lis r4,0x0002 + mtctr r4 + li r4,0 +1: + lwzx r0,r0,r4 + addi r4,r4,32 /* Go to start of next cache line */ + bdnz 1b + isync + + /* Now, flush the first 4MB of memory */ + lis r4,0x0002 + mtctr r4 + li r4,0 + sync +1: + dcbf 0,r4 + addi r4,r4,32 /* Go to start of next cache line */ + bdnz 1b + +2: + /* Set up the L2CR configuration bits (and switch L2 off) */ + /* CPU errata: Make sure the mtspr below is already in the + * L1 icache + */ + b 20f + .balign L1_CACHE_BYTES +22: + sync + mtspr SPRN_L2CR,r3 + sync + b 23f +20: + b 21f +21: sync + isync + b 22b + +23: + /* Perform a global invalidation */ + oris r3,r3,0x0020 + sync + mtspr SPRN_L2CR,r3 + sync + isync /* For errata */ + +BEGIN_FTR_SECTION + /* On the 7450, we wait for the L2I bit to clear...... + */ +10: mfspr r3,SPRN_L2CR + andis. r4,r3,0x0020 + bne 10b + b 11f +END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450) + + /* Wait for the invalidation to complete */ +3: mfspr r3,SPRN_L2CR + rlwinm. r4,r3,0,31,31 + bne 3b + +11: rlwinm r3,r3,0,11,9 /* Turn off the L2I bit */ + sync + mtspr SPRN_L2CR,r3 + sync + + /* See if we need to enable the cache */ + cmplwi r5,0 + beq 4f + + /* Enable the cache */ + oris r3,r3,0x8000 + mtspr SPRN_L2CR,r3 + sync + + /* Enable L2 HW prefetch on 744x/745x */ +BEGIN_FTR_SECTION + mfspr r3,SPRN_MSSCR0 + ori r3,r3,3 + sync + mtspr SPRN_MSSCR0,r3 + sync + isync +END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450) +4: + + /* Restore HID0[DPM] to whatever it was before */ + sync + mtspr 1008,r8 + sync + + /* Restore MSR (restores EE and DR bits to original state) */ + SYNC + mtmsr r7 + isync + + mtlr r9 + blr + +_GLOBAL(_get_L2CR) + /* Return the L2CR contents */ + li r3,0 +BEGIN_FTR_SECTION + mfspr r3,SPRN_L2CR +END_FTR_SECTION_IFSET(CPU_FTR_L2CR) + blr + + +/* + * Here is a similar routine for dealing with the L3 cache + * on the 745x family of chips + */ + +_GLOBAL(_set_L3CR) + /* Make sure this is a 745x chip */ +BEGIN_FTR_SECTION + li r3,-1 + blr +END_FTR_SECTION_IFCLR(CPU_FTR_L3CR) + + /* Turn off interrupts and data relocation. */ + mfmsr r7 /* Save MSR in r7 */ + rlwinm r4,r7,0,17,15 + rlwinm r4,r4,0,28,26 /* Turn off DR bit */ + sync + mtmsr r4 + isync + + /* Stop DST streams */ + DSSALL + sync + + /* Get the current enable bit of the L3CR into r4 */ + mfspr r4,SPRN_L3CR + + /* Tweak some bits */ + rlwinm r5,r3,0,0,0 /* r5 contains the new enable bit */ + rlwinm r3,r3,0,22,20 /* Turn off the invalidate bit */ + rlwinm r3,r3,0,2,31 /* Turn off the enable & PE bits */ + rlwinm r3,r3,0,5,3 /* Turn off the clken bit */ + /* Check to see if we need to flush */ + rlwinm. r4,r4,0,0,0 + beq 2f + + /* Flush the cache. + */ + + /* TODO: use HW flush assist */ + + lis r4,0x0008 + mtctr r4 + li r4,0 +1: + lwzx r0,r0,r4 + dcbf 0,r4 + addi r4,r4,32 /* Go to start of next cache line */ + bdnz 1b + +2: + /* Set up the L3CR configuration bits (and switch L3 off) */ + sync + mtspr SPRN_L3CR,r3 + sync + + oris r3,r3,L3CR_L3RES@h /* Set reserved bit 5 */ + mtspr SPRN_L3CR,r3 + sync + oris r3,r3,L3CR_L3CLKEN@h /* Set clken */ + mtspr SPRN_L3CR,r3 + sync + + /* Wait for stabilize */ + li r0,256 + mtctr r0 +1: bdnz 1b + + /* Perform a global invalidation */ + ori r3,r3,0x0400 + sync + mtspr SPRN_L3CR,r3 + sync + isync + + /* We wait for the L3I bit to clear...... */ +10: mfspr r3,SPRN_L3CR + andi. r4,r3,0x0400 + bne 10b + + /* Clear CLKEN */ + rlwinm r3,r3,0,5,3 /* Turn off the clken bit */ + mtspr SPRN_L3CR,r3 + sync + + /* Wait for stabilize */ + li r0,256 + mtctr r0 +1: bdnz 1b + + /* See if we need to enable the cache */ + cmplwi r5,0 + beq 4f + + /* Enable the cache */ + oris r3,r3,(L3CR_L3E | L3CR_L3CLKEN)@h + mtspr SPRN_L3CR,r3 + sync + + /* Wait for stabilize */ + li r0,256 + mtctr r0 +1: bdnz 1b + + /* Restore MSR (restores EE and DR bits to original state) */ +4: SYNC + mtmsr r7 + isync + blr + +_GLOBAL(_get_L3CR) + /* Return the L3CR contents */ + li r3,0 +BEGIN_FTR_SECTION + mfspr r3,SPRN_L3CR +END_FTR_SECTION_IFSET(CPU_FTR_L3CR) + blr + +/* --- End of PowerLogix code --- + */ + + +/* flush_disable_L1() - Flush and disable L1 cache + * + * clobbers r0, r3, ctr, cr0 + * Must be called with interrupts disabled and MMU enabled. + */ +_GLOBAL(__flush_disable_L1) + /* Stop pending alitvec streams and memory accesses */ +BEGIN_FTR_SECTION + DSSALL +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + sync + + /* Load counter to 0x4000 cache lines (512k) and + * load cache with datas + */ + li r3,0x4000 /* 512kB / 32B */ + mtctr r3 + lis r3,KERNELBASE@h +1: + lwz r0,0(r3) + addi r3,r3,0x0020 /* Go to start of next cache line */ + bdnz 1b + isync + sync + + /* Now flush those cache lines */ + li r3,0x4000 /* 512kB / 32B */ + mtctr r3 + lis r3,KERNELBASE@h +1: + dcbf 0,r3 + addi r3,r3,0x0020 /* Go to start of next cache line */ + bdnz 1b + sync + + /* We can now disable the L1 cache (HID0:DCE, HID0:ICE) */ + mfspr r3,SPRN_HID0 + rlwinm r3,r3,0,18,15 + mtspr SPRN_HID0,r3 + sync + isync + blr + +/* inval_enable_L1 - Invalidate and enable L1 cache + * + * Assumes L1 is already disabled and MSR:EE is off + * + * clobbers r3 + */ +_GLOBAL(__inval_enable_L1) + /* Enable and then Flash inval the instruction & data cache */ + mfspr r3,SPRN_HID0 + ori r3,r3, HID0_ICE|HID0_ICFI|HID0_DCE|HID0_DCI + sync + isync + mtspr SPRN_HID0,r3 + xori r3,r3, HID0_ICFI|HID0_DCI + mtspr SPRN_HID0,r3 + sync + + blr + + diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c new file mode 100644 index 00000000..bedd12e1 --- /dev/null +++ b/arch/powerpc/kernel/legacy_serial.c @@ -0,0 +1,616 @@ +#include <linux/kernel.h> +#include <linux/serial.h> +#include <linux/serial_8250.h> +#include <linux/serial_core.h> +#include <linux/console.h> +#include <linux/pci.h> +#include <linux/of_address.h> +#include <linux/of_device.h> +#include <linux/serial_reg.h> +#include <asm/io.h> +#include <asm/mmu.h> +#include <asm/prom.h> +#include <asm/serial.h> +#include <asm/udbg.h> +#include <asm/pci-bridge.h> +#include <asm/ppc-pci.h> + +#undef DEBUG + +#ifdef DEBUG +#define DBG(fmt...) do { printk(fmt); } while(0) +#else +#define DBG(fmt...) do { } while(0) +#endif + +#define MAX_LEGACY_SERIAL_PORTS 8 + +static struct plat_serial8250_port +legacy_serial_ports[MAX_LEGACY_SERIAL_PORTS+1]; +static struct legacy_serial_info { + struct device_node *np; + unsigned int speed; + unsigned int clock; + int irq_check_parent; + phys_addr_t taddr; +} legacy_serial_infos[MAX_LEGACY_SERIAL_PORTS]; + +static struct __initdata of_device_id legacy_serial_parents[] = { + {.type = "soc",}, + {.type = "tsi-bridge",}, + {.type = "opb", }, + {.compatible = "ibm,opb",}, + {.compatible = "simple-bus",}, + {.compatible = "wrs,epld-localbus",}, + {}, +}; + +static unsigned int legacy_serial_count; +static int legacy_serial_console = -1; + +static unsigned int tsi_serial_in(struct uart_port *p, int offset) +{ + unsigned int tmp; + offset = offset << p->regshift; + if (offset == UART_IIR) { + tmp = readl(p->membase + (UART_IIR & ~3)); + return (tmp >> 16) & 0xff; /* UART_IIR % 4 == 2 */ + } else + return readb(p->membase + offset); +} + +static void tsi_serial_out(struct uart_port *p, int offset, int value) +{ + offset = offset << p->regshift; + if (!((offset == UART_IER) && (value & UART_IER_UUE))) + writeb(value, p->membase + offset); +} + +static int __init add_legacy_port(struct device_node *np, int want_index, + int iotype, phys_addr_t base, + phys_addr_t taddr, unsigned long irq, + upf_t flags, int irq_check_parent) +{ + const __be32 *clk, *spd; + u32 clock = BASE_BAUD * 16; + int index; + + /* get clock freq. if present */ + clk = of_get_property(np, "clock-frequency", NULL); + if (clk && *clk) + clock = be32_to_cpup(clk); + + /* get default speed if present */ + spd = of_get_property(np, "current-speed", NULL); + + /* If we have a location index, then try to use it */ + if (want_index >= 0 && want_index < MAX_LEGACY_SERIAL_PORTS) + index = want_index; + else + index = legacy_serial_count; + + /* if our index is still out of range, that mean that + * array is full, we could scan for a free slot but that + * make little sense to bother, just skip the port + */ + if (index >= MAX_LEGACY_SERIAL_PORTS) + return -1; + if (index >= legacy_serial_count) + legacy_serial_count = index + 1; + + /* Check if there is a port who already claimed our slot */ + if (legacy_serial_infos[index].np != 0) { + /* if we still have some room, move it, else override */ + if (legacy_serial_count < MAX_LEGACY_SERIAL_PORTS) { + printk(KERN_DEBUG "Moved legacy port %d -> %d\n", + index, legacy_serial_count); + legacy_serial_ports[legacy_serial_count] = + legacy_serial_ports[index]; + legacy_serial_infos[legacy_serial_count] = + legacy_serial_infos[index]; + legacy_serial_count++; + } else { + printk(KERN_DEBUG "Replacing legacy port %d\n", index); + } + } + + /* Now fill the entry */ + memset(&legacy_serial_ports[index], 0, + sizeof(struct plat_serial8250_port)); + if (iotype == UPIO_PORT) + legacy_serial_ports[index].iobase = base; + else + legacy_serial_ports[index].mapbase = base; + + legacy_serial_ports[index].iotype = iotype; + legacy_serial_ports[index].uartclk = clock; + legacy_serial_ports[index].irq = irq; + legacy_serial_ports[index].flags = flags; + legacy_serial_infos[index].taddr = taddr; + legacy_serial_infos[index].np = of_node_get(np); + legacy_serial_infos[index].clock = clock; + legacy_serial_infos[index].speed = spd ? be32_to_cpup(spd) : 0; + legacy_serial_infos[index].irq_check_parent = irq_check_parent; + + if (iotype == UPIO_TSI) { + legacy_serial_ports[index].serial_in = tsi_serial_in; + legacy_serial_ports[index].serial_out = tsi_serial_out; + } + + printk(KERN_DEBUG "Found legacy serial port %d for %s\n", + index, np->full_name); + printk(KERN_DEBUG " %s=%llx, taddr=%llx, irq=%lx, clk=%d, speed=%d\n", + (iotype == UPIO_PORT) ? "port" : "mem", + (unsigned long long)base, (unsigned long long)taddr, irq, + legacy_serial_ports[index].uartclk, + legacy_serial_infos[index].speed); + + return index; +} + +static int __init add_legacy_soc_port(struct device_node *np, + struct device_node *soc_dev) +{ + u64 addr; + const u32 *addrp; + upf_t flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ + | UPF_FIXED_PORT; + struct device_node *tsi = of_get_parent(np); + + /* We only support ports that have a clock frequency properly + * encoded in the device-tree. + */ + if (of_get_property(np, "clock-frequency", NULL) == NULL) + return -1; + + /* if reg-shift or offset, don't try to use it */ + if ((of_get_property(np, "reg-shift", NULL) != NULL) || + (of_get_property(np, "reg-offset", NULL) != NULL)) + return -1; + + /* if rtas uses this device, don't try to use it as well */ + if (of_get_property(np, "used-by-rtas", NULL) != NULL) + return -1; + + /* Get the address */ + addrp = of_get_address(soc_dev, 0, NULL, NULL); + if (addrp == NULL) + return -1; + + addr = of_translate_address(soc_dev, addrp); + if (addr == OF_BAD_ADDR) + return -1; + + /* Add port, irq will be dealt with later. We passed a translated + * IO port value. It will be fixed up later along with the irq + */ + if (tsi && !strcmp(tsi->type, "tsi-bridge")) + return add_legacy_port(np, -1, UPIO_TSI, addr, addr, NO_IRQ, flags, 0); + else + return add_legacy_port(np, -1, UPIO_MEM, addr, addr, NO_IRQ, flags, 0); +} + +static int __init add_legacy_isa_port(struct device_node *np, + struct device_node *isa_brg) +{ + const __be32 *reg; + const char *typep; + int index = -1; + u64 taddr; + + DBG(" -> add_legacy_isa_port(%s)\n", np->full_name); + + /* Get the ISA port number */ + reg = of_get_property(np, "reg", NULL); + if (reg == NULL) + return -1; + + /* Verify it's an IO port, we don't support anything else */ + if (!(be32_to_cpu(reg[0]) & 0x00000001)) + return -1; + + /* Now look for an "ibm,aix-loc" property that gives us ordering + * if any... + */ + typep = of_get_property(np, "ibm,aix-loc", NULL); + + /* If we have a location index, then use it */ + if (typep && *typep == 'S') + index = simple_strtol(typep+1, NULL, 0) - 1; + + /* Translate ISA address. If it fails, we still register the port + * with no translated address so that it can be picked up as an IO + * port later by the serial driver + */ + taddr = of_translate_address(np, reg); + if (taddr == OF_BAD_ADDR) + taddr = 0; + + /* Add port, irq will be dealt with later */ + return add_legacy_port(np, index, UPIO_PORT, be32_to_cpu(reg[1]), taddr, + NO_IRQ, UPF_BOOT_AUTOCONF, 0); + +} + +#ifdef CONFIG_PCI +static int __init add_legacy_pci_port(struct device_node *np, + struct device_node *pci_dev) +{ + u64 addr, base; + const u32 *addrp; + unsigned int flags; + int iotype, index = -1, lindex = 0; + + DBG(" -> add_legacy_pci_port(%s)\n", np->full_name); + + /* We only support ports that have a clock frequency properly + * encoded in the device-tree (that is have an fcode). Anything + * else can't be used that early and will be normally probed by + * the generic 8250_pci driver later on. The reason is that 8250 + * compatible UARTs on PCI need all sort of quirks (port offsets + * etc...) that this code doesn't know about + */ + if (of_get_property(np, "clock-frequency", NULL) == NULL) + return -1; + + /* Get the PCI address. Assume BAR 0 */ + addrp = of_get_pci_address(pci_dev, 0, NULL, &flags); + if (addrp == NULL) + return -1; + + /* We only support BAR 0 for now */ + iotype = (flags & IORESOURCE_MEM) ? UPIO_MEM : UPIO_PORT; + addr = of_translate_address(pci_dev, addrp); + if (addr == OF_BAD_ADDR) + return -1; + + /* Set the IO base to the same as the translated address for MMIO, + * or to the domain local IO base for PIO (it will be fixed up later) + */ + if (iotype == UPIO_MEM) + base = addr; + else + base = addrp[2]; + + /* Try to guess an index... If we have subdevices of the pci dev, + * we get to their "reg" property + */ + if (np != pci_dev) { + const __be32 *reg = of_get_property(np, "reg", NULL); + if (reg && (be32_to_cpup(reg) < 4)) + index = lindex = be32_to_cpup(reg); + } + + /* Local index means it's the Nth port in the PCI chip. Unfortunately + * the offset to add here is device specific. We know about those + * EXAR ports and we default to the most common case. If your UART + * doesn't work for these settings, you'll have to add your own special + * cases here + */ + if (of_device_is_compatible(pci_dev, "pci13a8,152") || + of_device_is_compatible(pci_dev, "pci13a8,154") || + of_device_is_compatible(pci_dev, "pci13a8,158")) { + addr += 0x200 * lindex; + base += 0x200 * lindex; + } else { + addr += 8 * lindex; + base += 8 * lindex; + } + + /* Add port, irq will be dealt with later. We passed a translated + * IO port value. It will be fixed up later along with the irq + */ + return add_legacy_port(np, index, iotype, base, addr, NO_IRQ, + UPF_BOOT_AUTOCONF, np != pci_dev); +} +#endif + +static void __init setup_legacy_serial_console(int console) +{ + struct legacy_serial_info *info = + &legacy_serial_infos[console]; + void __iomem *addr; + + if (info->taddr == 0) + return; + addr = ioremap(info->taddr, 0x1000); + if (addr == NULL) + return; + if (info->speed == 0) + info->speed = udbg_probe_uart_speed(addr, info->clock); + DBG("default console speed = %d\n", info->speed); + udbg_init_uart(addr, info->speed, info->clock); +} + +/* + * This is called very early, as part of setup_system() or eventually + * setup_arch(), basically before anything else in this file. This function + * will try to build a list of all the available 8250-compatible serial ports + * in the machine using the Open Firmware device-tree. It currently only deals + * with ISA and PCI busses but could be extended. It allows a very early boot + * console to be initialized, that list is also used later to provide 8250 with + * the machine non-PCI ports and to properly pick the default console port + */ +void __init find_legacy_serial_ports(void) +{ + struct device_node *np, *stdout = NULL; + const char *path; + int index; + + DBG(" -> find_legacy_serial_port()\n"); + + /* Now find out if one of these is out firmware console */ + path = of_get_property(of_chosen, "linux,stdout-path", NULL); + if (path != NULL) { + stdout = of_find_node_by_path(path); + if (stdout) + DBG("stdout is %s\n", stdout->full_name); + } else { + DBG(" no linux,stdout-path !\n"); + } + + /* Iterate over all the 16550 ports, looking for known parents */ + for_each_compatible_node(np, "serial", "ns16550") { + struct device_node *parent = of_get_parent(np); + if (!parent) + continue; + if (of_match_node(legacy_serial_parents, parent) != NULL) { + if (of_device_is_available(np)) { + index = add_legacy_soc_port(np, np); + if (index >= 0 && np == stdout) + legacy_serial_console = index; + } + } + of_node_put(parent); + } + + /* Next, fill our array with ISA ports */ + for_each_node_by_type(np, "serial") { + struct device_node *isa = of_get_parent(np); + if (isa && !strcmp(isa->name, "isa")) { + index = add_legacy_isa_port(np, isa); + if (index >= 0 && np == stdout) + legacy_serial_console = index; + } + of_node_put(isa); + } + +#ifdef CONFIG_PCI + /* Next, try to locate PCI ports */ + for (np = NULL; (np = of_find_all_nodes(np));) { + struct device_node *pci, *parent = of_get_parent(np); + if (parent && !strcmp(parent->name, "isa")) { + of_node_put(parent); + continue; + } + if (strcmp(np->name, "serial") && strcmp(np->type, "serial")) { + of_node_put(parent); + continue; + } + /* Check for known pciclass, and also check wether we have + * a device with child nodes for ports or not + */ + if (of_device_is_compatible(np, "pciclass,0700") || + of_device_is_compatible(np, "pciclass,070002")) + pci = np; + else if (of_device_is_compatible(parent, "pciclass,0700") || + of_device_is_compatible(parent, "pciclass,070002")) + pci = parent; + else { + of_node_put(parent); + continue; + } + index = add_legacy_pci_port(np, pci); + if (index >= 0 && np == stdout) + legacy_serial_console = index; + of_node_put(parent); + } +#endif + + DBG("legacy_serial_console = %d\n", legacy_serial_console); + if (legacy_serial_console >= 0) + setup_legacy_serial_console(legacy_serial_console); + DBG(" <- find_legacy_serial_port()\n"); +} + +static struct platform_device serial_device = { + .name = "serial8250", + .id = PLAT8250_DEV_PLATFORM, + .dev = { + .platform_data = legacy_serial_ports, + }, +}; + +static void __init fixup_port_irq(int index, + struct device_node *np, + struct plat_serial8250_port *port) +{ + unsigned int virq; + + DBG("fixup_port_irq(%d)\n", index); + + virq = irq_of_parse_and_map(np, 0); + if (virq == NO_IRQ && legacy_serial_infos[index].irq_check_parent) { + np = of_get_parent(np); + if (np == NULL) + return; + virq = irq_of_parse_and_map(np, 0); + of_node_put(np); + } + if (virq == NO_IRQ) + return; + + port->irq = virq; + +#ifdef CONFIG_SERIAL_8250_FSL + if (of_device_is_compatible(np, "fsl,ns16550")) + port->handle_irq = fsl8250_handle_irq; +#endif +} + +static void __init fixup_port_pio(int index, + struct device_node *np, + struct plat_serial8250_port *port) +{ +#ifdef CONFIG_PCI + struct pci_controller *hose; + + DBG("fixup_port_pio(%d)\n", index); + + hose = pci_find_hose_for_OF_device(np); + if (hose) { + unsigned long offset = (unsigned long)hose->io_base_virt - +#ifdef CONFIG_PPC64 + pci_io_base; +#else + isa_io_base; +#endif + DBG("port %d, IO %lx -> %lx\n", + index, port->iobase, port->iobase + offset); + port->iobase += offset; + } +#endif +} + +static void __init fixup_port_mmio(int index, + struct device_node *np, + struct plat_serial8250_port *port) +{ + DBG("fixup_port_mmio(%d)\n", index); + + port->membase = ioremap(port->mapbase, 0x100); +} + +/* + * This is called as an arch initcall, hopefully before the PCI bus is + * probed and/or the 8250 driver loaded since we need to register our + * platform devices before 8250 PCI ones are detected as some of them + * must properly "override" the platform ones. + * + * This function fixes up the interrupt value for platform ports as it + * couldn't be done earlier before interrupt maps have been parsed. It + * also "corrects" the IO address for PIO ports for the same reason, + * since earlier, the PHBs virtual IO space wasn't assigned yet. It then + * registers all those platform ports for use by the 8250 driver when it + * finally loads. + */ +static int __init serial_dev_init(void) +{ + int i; + + if (legacy_serial_count == 0) + return -ENODEV; + + /* + * Before we register the platform serial devices, we need + * to fixup their interrupts and their IO ports. + */ + DBG("Fixing serial ports interrupts and IO ports ...\n"); + + for (i = 0; i < legacy_serial_count; i++) { + struct plat_serial8250_port *port = &legacy_serial_ports[i]; + struct device_node *np = legacy_serial_infos[i].np; + + if (port->irq == NO_IRQ) + fixup_port_irq(i, np, port); + if (port->iotype == UPIO_PORT) + fixup_port_pio(i, np, port); + if ((port->iotype == UPIO_MEM) || (port->iotype == UPIO_TSI)) + fixup_port_mmio(i, np, port); + } + + DBG("Registering platform serial ports\n"); + + return platform_device_register(&serial_device); +} +device_initcall(serial_dev_init); + + +#ifdef CONFIG_SERIAL_8250_CONSOLE +/* + * This is called very early, as part of console_init() (typically just after + * time_init()). This function is respondible for trying to find a good + * default console on serial ports. It tries to match the open firmware + * default output with one of the available serial console drivers that have + * been probed earlier by find_legacy_serial_ports() + */ +static int __init check_legacy_serial_console(void) +{ + struct device_node *prom_stdout = NULL; + int i, speed = 0, offset = 0; + const char *name; + const __be32 *spd; + + DBG(" -> check_legacy_serial_console()\n"); + + /* The user has requested a console so this is already set up. */ + if (strstr(boot_command_line, "console=")) { + DBG(" console was specified !\n"); + return -EBUSY; + } + + if (!of_chosen) { + DBG(" of_chosen is NULL !\n"); + return -ENODEV; + } + + if (legacy_serial_console < 0) { + DBG(" legacy_serial_console not found !\n"); + return -ENODEV; + } + /* We are getting a weird phandle from OF ... */ + /* ... So use the full path instead */ + name = of_get_property(of_chosen, "linux,stdout-path", NULL); + if (name == NULL) { + DBG(" no linux,stdout-path !\n"); + return -ENODEV; + } + prom_stdout = of_find_node_by_path(name); + if (!prom_stdout) { + DBG(" can't find stdout package %s !\n", name); + return -ENODEV; + } + DBG("stdout is %s\n", prom_stdout->full_name); + + name = of_get_property(prom_stdout, "name", NULL); + if (!name) { + DBG(" stdout package has no name !\n"); + goto not_found; + } + spd = of_get_property(prom_stdout, "current-speed", NULL); + if (spd) + speed = be32_to_cpup(spd); + + if (strcmp(name, "serial") != 0) + goto not_found; + + /* Look for it in probed array */ + for (i = 0; i < legacy_serial_count; i++) { + if (prom_stdout != legacy_serial_infos[i].np) + continue; + offset = i; + speed = legacy_serial_infos[i].speed; + break; + } + if (i >= legacy_serial_count) + goto not_found; + + of_node_put(prom_stdout); + + DBG("Found serial console at ttyS%d\n", offset); + + if (speed) { + static char __initdata opt[16]; + sprintf(opt, "%d", speed); + return add_preferred_console("ttyS", offset, opt); + } else + return add_preferred_console("ttyS", offset, NULL); + + not_found: + DBG("No preferred console found !\n"); + of_node_put(prom_stdout); + return -ENODEV; +} +console_initcall(check_legacy_serial_console); + +#endif /* CONFIG_SERIAL_8250_CONSOLE */ diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c new file mode 100644 index 00000000..f5725bce --- /dev/null +++ b/arch/powerpc/kernel/lparcfg.c @@ -0,0 +1,718 @@ +/* + * PowerPC64 LPAR Configuration Information Driver + * + * Dave Engebretsen engebret@us.ibm.com + * Copyright (c) 2003 Dave Engebretsen + * Will Schmidt willschm@us.ibm.com + * SPLPAR updates, Copyright (c) 2003 Will Schmidt IBM Corporation. + * seq_file updates, Copyright (c) 2004 Will Schmidt IBM Corporation. + * Nathan Lynch nathanl@austin.ibm.com + * Added lparcfg_write, Copyright (C) 2004 Nathan Lynch IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This driver creates a proc file at /proc/ppc64/lparcfg which contains + * keyword - value pairs that specify the configuration of the partition. + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/proc_fs.h> +#include <linux/init.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <asm/uaccess.h> +#include <asm/lppaca.h> +#include <asm/hvcall.h> +#include <asm/firmware.h> +#include <asm/rtas.h> +#include <asm/time.h> +#include <asm/prom.h> +#include <asm/vdso_datapage.h> +#include <asm/vio.h> +#include <asm/mmu.h> + +#define MODULE_VERS "1.9" +#define MODULE_NAME "lparcfg" + +/* #define LPARCFG_DEBUG */ + +static struct proc_dir_entry *proc_ppc64_lparcfg; + +/* + * Track sum of all purrs across all processors. This is used to further + * calculate usage values by different applications + */ +static unsigned long get_purr(void) +{ + unsigned long sum_purr = 0; + int cpu; + + for_each_possible_cpu(cpu) { + struct cpu_usage *cu; + + cu = &per_cpu(cpu_usage_array, cpu); + sum_purr += cu->current_tb; + } + return sum_purr; +} + +/* + * Methods used to fetch LPAR data when running on a pSeries platform. + */ + +struct hvcall_ppp_data { + u64 entitlement; + u64 unallocated_entitlement; + u16 group_num; + u16 pool_num; + u8 capped; + u8 weight; + u8 unallocated_weight; + u16 active_procs_in_pool; + u16 active_system_procs; + u16 phys_platform_procs; + u32 max_proc_cap_avail; + u32 entitled_proc_cap_avail; +}; + +/* + * H_GET_PPP hcall returns info in 4 parms. + * entitled_capacity,unallocated_capacity, + * aggregation, resource_capability). + * + * R4 = Entitled Processor Capacity Percentage. + * R5 = Unallocated Processor Capacity Percentage. + * R6 (AABBCCDDEEFFGGHH). + * XXXX - reserved (0) + * XXXX - reserved (0) + * XXXX - Group Number + * XXXX - Pool Number. + * R7 (IIJJKKLLMMNNOOPP). + * XX - reserved. (0) + * XX - bit 0-6 reserved (0). bit 7 is Capped indicator. + * XX - variable processor Capacity Weight + * XX - Unallocated Variable Processor Capacity Weight. + * XXXX - Active processors in Physical Processor Pool. + * XXXX - Processors active on platform. + * R8 (QQQQRRRRRRSSSSSS). if ibm,partition-performance-parameters-level >= 1 + * XXXX - Physical platform procs allocated to virtualization. + * XXXXXX - Max procs capacity % available to the partitions pool. + * XXXXXX - Entitled procs capacity % available to the + * partitions pool. + */ +static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data) +{ + unsigned long rc; + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + + rc = plpar_hcall9(H_GET_PPP, retbuf); + + ppp_data->entitlement = retbuf[0]; + ppp_data->unallocated_entitlement = retbuf[1]; + + ppp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff; + ppp_data->pool_num = retbuf[2] & 0xffff; + + ppp_data->capped = (retbuf[3] >> 6 * 8) & 0x01; + ppp_data->weight = (retbuf[3] >> 5 * 8) & 0xff; + ppp_data->unallocated_weight = (retbuf[3] >> 4 * 8) & 0xff; + ppp_data->active_procs_in_pool = (retbuf[3] >> 2 * 8) & 0xffff; + ppp_data->active_system_procs = retbuf[3] & 0xffff; + + ppp_data->phys_platform_procs = retbuf[4] >> 6 * 8; + ppp_data->max_proc_cap_avail = (retbuf[4] >> 3 * 8) & 0xffffff; + ppp_data->entitled_proc_cap_avail = retbuf[4] & 0xffffff; + + return rc; +} + +static unsigned h_pic(unsigned long *pool_idle_time, + unsigned long *num_procs) +{ + unsigned long rc; + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + + rc = plpar_hcall(H_PIC, retbuf); + + *pool_idle_time = retbuf[0]; + *num_procs = retbuf[1]; + + return rc; +} + +/* + * parse_ppp_data + * Parse out the data returned from h_get_ppp and h_pic + */ +static void parse_ppp_data(struct seq_file *m) +{ + struct hvcall_ppp_data ppp_data; + struct device_node *root; + const int *perf_level; + int rc; + + rc = h_get_ppp(&ppp_data); + if (rc) + return; + + seq_printf(m, "partition_entitled_capacity=%lld\n", + ppp_data.entitlement); + seq_printf(m, "group=%d\n", ppp_data.group_num); + seq_printf(m, "system_active_processors=%d\n", + ppp_data.active_system_procs); + + /* pool related entries are appropriate for shared configs */ + if (lppaca_of(0).shared_proc) { + unsigned long pool_idle_time, pool_procs; + + seq_printf(m, "pool=%d\n", ppp_data.pool_num); + + /* report pool_capacity in percentage */ + seq_printf(m, "pool_capacity=%d\n", + ppp_data.active_procs_in_pool * 100); + + h_pic(&pool_idle_time, &pool_procs); + seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time); + seq_printf(m, "pool_num_procs=%ld\n", pool_procs); + } + + seq_printf(m, "unallocated_capacity_weight=%d\n", + ppp_data.unallocated_weight); + seq_printf(m, "capacity_weight=%d\n", ppp_data.weight); + seq_printf(m, "capped=%d\n", ppp_data.capped); + seq_printf(m, "unallocated_capacity=%lld\n", + ppp_data.unallocated_entitlement); + + /* The last bits of information returned from h_get_ppp are only + * valid if the ibm,partition-performance-parameters-level + * property is >= 1. + */ + root = of_find_node_by_path("/"); + if (root) { + perf_level = of_get_property(root, + "ibm,partition-performance-parameters-level", + NULL); + if (perf_level && (*perf_level >= 1)) { + seq_printf(m, + "physical_procs_allocated_to_virtualization=%d\n", + ppp_data.phys_platform_procs); + seq_printf(m, "max_proc_capacity_available=%d\n", + ppp_data.max_proc_cap_avail); + seq_printf(m, "entitled_proc_capacity_available=%d\n", + ppp_data.entitled_proc_cap_avail); + } + + of_node_put(root); + } +} + +/** + * parse_mpp_data + * Parse out data returned from h_get_mpp + */ +static void parse_mpp_data(struct seq_file *m) +{ + struct hvcall_mpp_data mpp_data; + int rc; + + rc = h_get_mpp(&mpp_data); + if (rc) + return; + + seq_printf(m, "entitled_memory=%ld\n", mpp_data.entitled_mem); + + if (mpp_data.mapped_mem != -1) + seq_printf(m, "mapped_entitled_memory=%ld\n", + mpp_data.mapped_mem); + + seq_printf(m, "entitled_memory_group_number=%d\n", mpp_data.group_num); + seq_printf(m, "entitled_memory_pool_number=%d\n", mpp_data.pool_num); + + seq_printf(m, "entitled_memory_weight=%d\n", mpp_data.mem_weight); + seq_printf(m, "unallocated_entitled_memory_weight=%d\n", + mpp_data.unallocated_mem_weight); + seq_printf(m, "unallocated_io_mapping_entitlement=%ld\n", + mpp_data.unallocated_entitlement); + + if (mpp_data.pool_size != -1) + seq_printf(m, "entitled_memory_pool_size=%ld bytes\n", + mpp_data.pool_size); + + seq_printf(m, "entitled_memory_loan_request=%ld\n", + mpp_data.loan_request); + + seq_printf(m, "backing_memory=%ld bytes\n", mpp_data.backing_mem); +} + +/** + * parse_mpp_x_data + * Parse out data returned from h_get_mpp_x + */ +static void parse_mpp_x_data(struct seq_file *m) +{ + struct hvcall_mpp_x_data mpp_x_data; + + if (!firmware_has_feature(FW_FEATURE_XCMO)) + return; + if (h_get_mpp_x(&mpp_x_data)) + return; + + seq_printf(m, "coalesced_bytes=%ld\n", mpp_x_data.coalesced_bytes); + + if (mpp_x_data.pool_coalesced_bytes) + seq_printf(m, "pool_coalesced_bytes=%ld\n", + mpp_x_data.pool_coalesced_bytes); + if (mpp_x_data.pool_purr_cycles) + seq_printf(m, "coalesce_pool_purr=%ld\n", mpp_x_data.pool_purr_cycles); + if (mpp_x_data.pool_spurr_cycles) + seq_printf(m, "coalesce_pool_spurr=%ld\n", mpp_x_data.pool_spurr_cycles); +} + +#define SPLPAR_CHARACTERISTICS_TOKEN 20 +#define SPLPAR_MAXLENGTH 1026*(sizeof(char)) + +/* + * parse_system_parameter_string() + * Retrieve the potential_processors, max_entitled_capacity and friends + * through the get-system-parameter rtas call. Replace keyword strings as + * necessary. + */ +static void parse_system_parameter_string(struct seq_file *m) +{ + int call_status; + + unsigned char *local_buffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL); + if (!local_buffer) { + printk(KERN_ERR "%s %s kmalloc failure at line %d\n", + __FILE__, __func__, __LINE__); + return; + } + + spin_lock(&rtas_data_buf_lock); + memset(rtas_data_buf, 0, SPLPAR_MAXLENGTH); + call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, + NULL, + SPLPAR_CHARACTERISTICS_TOKEN, + __pa(rtas_data_buf), + RTAS_DATA_BUF_SIZE); + memcpy(local_buffer, rtas_data_buf, SPLPAR_MAXLENGTH); + spin_unlock(&rtas_data_buf_lock); + + if (call_status != 0) { + printk(KERN_INFO + "%s %s Error calling get-system-parameter (0x%x)\n", + __FILE__, __func__, call_status); + } else { + int splpar_strlen; + int idx, w_idx; + char *workbuffer = kzalloc(SPLPAR_MAXLENGTH, GFP_KERNEL); + if (!workbuffer) { + printk(KERN_ERR "%s %s kmalloc failure at line %d\n", + __FILE__, __func__, __LINE__); + kfree(local_buffer); + return; + } +#ifdef LPARCFG_DEBUG + printk(KERN_INFO "success calling get-system-parameter\n"); +#endif + splpar_strlen = local_buffer[0] * 256 + local_buffer[1]; + local_buffer += 2; /* step over strlen value */ + + w_idx = 0; + idx = 0; + while ((*local_buffer) && (idx < splpar_strlen)) { + workbuffer[w_idx++] = local_buffer[idx++]; + if ((local_buffer[idx] == ',') + || (local_buffer[idx] == '\0')) { + workbuffer[w_idx] = '\0'; + if (w_idx) { + /* avoid the empty string */ + seq_printf(m, "%s\n", workbuffer); + } + memset(workbuffer, 0, SPLPAR_MAXLENGTH); + idx++; /* skip the comma */ + w_idx = 0; + } else if (local_buffer[idx] == '=') { + /* code here to replace workbuffer contents + with different keyword strings */ + if (0 == strcmp(workbuffer, "MaxEntCap")) { + strcpy(workbuffer, + "partition_max_entitled_capacity"); + w_idx = strlen(workbuffer); + } + if (0 == strcmp(workbuffer, "MaxPlatProcs")) { + strcpy(workbuffer, + "system_potential_processors"); + w_idx = strlen(workbuffer); + } + } + } + kfree(workbuffer); + local_buffer -= 2; /* back up over strlen value */ + } + kfree(local_buffer); +} + +/* Return the number of processors in the system. + * This function reads through the device tree and counts + * the virtual processors, this does not include threads. + */ +static int lparcfg_count_active_processors(void) +{ + struct device_node *cpus_dn = NULL; + int count = 0; + + while ((cpus_dn = of_find_node_by_type(cpus_dn, "cpu"))) { +#ifdef LPARCFG_DEBUG + printk(KERN_ERR "cpus_dn %p\n", cpus_dn); +#endif + count++; + } + return count; +} + +static void pseries_cmo_data(struct seq_file *m) +{ + int cpu; + unsigned long cmo_faults = 0; + unsigned long cmo_fault_time = 0; + + seq_printf(m, "cmo_enabled=%d\n", firmware_has_feature(FW_FEATURE_CMO)); + + if (!firmware_has_feature(FW_FEATURE_CMO)) + return; + + for_each_possible_cpu(cpu) { + cmo_faults += lppaca_of(cpu).cmo_faults; + cmo_fault_time += lppaca_of(cpu).cmo_fault_time; + } + + seq_printf(m, "cmo_faults=%lu\n", cmo_faults); + seq_printf(m, "cmo_fault_time_usec=%lu\n", + cmo_fault_time / tb_ticks_per_usec); + seq_printf(m, "cmo_primary_psp=%d\n", cmo_get_primary_psp()); + seq_printf(m, "cmo_secondary_psp=%d\n", cmo_get_secondary_psp()); + seq_printf(m, "cmo_page_size=%lu\n", cmo_get_page_size()); +} + +static void splpar_dispatch_data(struct seq_file *m) +{ + int cpu; + unsigned long dispatches = 0; + unsigned long dispatch_dispersions = 0; + + for_each_possible_cpu(cpu) { + dispatches += lppaca_of(cpu).yield_count; + dispatch_dispersions += lppaca_of(cpu).dispersion_count; + } + + seq_printf(m, "dispatches=%lu\n", dispatches); + seq_printf(m, "dispatch_dispersions=%lu\n", dispatch_dispersions); +} + +static void parse_em_data(struct seq_file *m) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + + if (plpar_hcall(H_GET_EM_PARMS, retbuf) == H_SUCCESS) + seq_printf(m, "power_mode_data=%016lx\n", retbuf[0]); +} + +static int pseries_lparcfg_data(struct seq_file *m, void *v) +{ + int partition_potential_processors; + int partition_active_processors; + struct device_node *rtas_node; + const int *lrdrp = NULL; + + rtas_node = of_find_node_by_path("/rtas"); + if (rtas_node) + lrdrp = of_get_property(rtas_node, "ibm,lrdr-capacity", NULL); + + if (lrdrp == NULL) { + partition_potential_processors = vdso_data->processorCount; + } else { + partition_potential_processors = *(lrdrp + 4); + } + of_node_put(rtas_node); + + partition_active_processors = lparcfg_count_active_processors(); + + if (firmware_has_feature(FW_FEATURE_SPLPAR)) { + /* this call handles the ibm,get-system-parameter contents */ + parse_system_parameter_string(m); + parse_ppp_data(m); + parse_mpp_data(m); + parse_mpp_x_data(m); + pseries_cmo_data(m); + splpar_dispatch_data(m); + + seq_printf(m, "purr=%ld\n", get_purr()); + } else { /* non SPLPAR case */ + + seq_printf(m, "system_active_processors=%d\n", + partition_potential_processors); + + seq_printf(m, "system_potential_processors=%d\n", + partition_potential_processors); + + seq_printf(m, "partition_max_entitled_capacity=%d\n", + partition_potential_processors * 100); + + seq_printf(m, "partition_entitled_capacity=%d\n", + partition_active_processors * 100); + } + + seq_printf(m, "partition_active_processors=%d\n", + partition_active_processors); + + seq_printf(m, "partition_potential_processors=%d\n", + partition_potential_processors); + + seq_printf(m, "shared_processor_mode=%d\n", lppaca_of(0).shared_proc); + + seq_printf(m, "slb_size=%d\n", mmu_slb_size); + + parse_em_data(m); + + return 0; +} + +static ssize_t update_ppp(u64 *entitlement, u8 *weight) +{ + struct hvcall_ppp_data ppp_data; + u8 new_weight; + u64 new_entitled; + ssize_t retval; + + /* Get our current parameters */ + retval = h_get_ppp(&ppp_data); + if (retval) + return retval; + + if (entitlement) { + new_weight = ppp_data.weight; + new_entitled = *entitlement; + } else if (weight) { + new_weight = *weight; + new_entitled = ppp_data.entitlement; + } else + return -EINVAL; + + pr_debug("%s: current_entitled = %llu, current_weight = %u\n", + __func__, ppp_data.entitlement, ppp_data.weight); + + pr_debug("%s: new_entitled = %llu, new_weight = %u\n", + __func__, new_entitled, new_weight); + + retval = plpar_hcall_norets(H_SET_PPP, new_entitled, new_weight); + return retval; +} + +/** + * update_mpp + * + * Update the memory entitlement and weight for the partition. Caller must + * specify either a new entitlement or weight, not both, to be updated + * since the h_set_mpp call takes both entitlement and weight as parameters. + */ +static ssize_t update_mpp(u64 *entitlement, u8 *weight) +{ + struct hvcall_mpp_data mpp_data; + u64 new_entitled; + u8 new_weight; + ssize_t rc; + + if (entitlement) { + /* Check with vio to ensure the new memory entitlement + * can be handled. + */ + rc = vio_cmo_entitlement_update(*entitlement); + if (rc) + return rc; + } + + rc = h_get_mpp(&mpp_data); + if (rc) + return rc; + + if (entitlement) { + new_weight = mpp_data.mem_weight; + new_entitled = *entitlement; + } else if (weight) { + new_weight = *weight; + new_entitled = mpp_data.entitled_mem; + } else + return -EINVAL; + + pr_debug("%s: current_entitled = %lu, current_weight = %u\n", + __func__, mpp_data.entitled_mem, mpp_data.mem_weight); + + pr_debug("%s: new_entitled = %llu, new_weight = %u\n", + __func__, new_entitled, new_weight); + + rc = plpar_hcall_norets(H_SET_MPP, new_entitled, new_weight); + return rc; +} + +/* + * Interface for changing system parameters (variable capacity weight + * and entitled capacity). Format of input is "param_name=value"; + * anything after value is ignored. Valid parameters at this time are + * "partition_entitled_capacity" and "capacity_weight". We use + * H_SET_PPP to alter parameters. + * + * This function should be invoked only on systems with + * FW_FEATURE_SPLPAR. + */ +static ssize_t lparcfg_write(struct file *file, const char __user * buf, + size_t count, loff_t * off) +{ + int kbuf_sz = 64; + char kbuf[kbuf_sz]; + char *tmp; + u64 new_entitled, *new_entitled_ptr = &new_entitled; + u8 new_weight, *new_weight_ptr = &new_weight; + ssize_t retval; + + if (!firmware_has_feature(FW_FEATURE_SPLPAR)) + return -EINVAL; + + if (count > kbuf_sz) + return -EINVAL; + + if (copy_from_user(kbuf, buf, count)) + return -EFAULT; + + kbuf[count - 1] = '\0'; + tmp = strchr(kbuf, '='); + if (!tmp) + return -EINVAL; + + *tmp++ = '\0'; + + if (!strcmp(kbuf, "partition_entitled_capacity")) { + char *endp; + *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10); + if (endp == tmp) + return -EINVAL; + + retval = update_ppp(new_entitled_ptr, NULL); + } else if (!strcmp(kbuf, "capacity_weight")) { + char *endp; + *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10); + if (endp == tmp) + return -EINVAL; + + retval = update_ppp(NULL, new_weight_ptr); + } else if (!strcmp(kbuf, "entitled_memory")) { + char *endp; + *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10); + if (endp == tmp) + return -EINVAL; + + retval = update_mpp(new_entitled_ptr, NULL); + } else if (!strcmp(kbuf, "entitled_memory_weight")) { + char *endp; + *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10); + if (endp == tmp) + return -EINVAL; + + retval = update_mpp(NULL, new_weight_ptr); + } else + return -EINVAL; + + if (retval == H_SUCCESS || retval == H_CONSTRAINED) { + retval = count; + } else if (retval == H_BUSY) { + retval = -EBUSY; + } else if (retval == H_HARDWARE) { + retval = -EIO; + } else if (retval == H_PARAMETER) { + retval = -EINVAL; + } + + return retval; +} + +static int lparcfg_data(struct seq_file *m, void *v) +{ + struct device_node *rootdn; + const char *model = ""; + const char *system_id = ""; + const char *tmp; + const unsigned int *lp_index_ptr; + unsigned int lp_index = 0; + + seq_printf(m, "%s %s\n", MODULE_NAME, MODULE_VERS); + + rootdn = of_find_node_by_path("/"); + if (rootdn) { + tmp = of_get_property(rootdn, "model", NULL); + if (tmp) + model = tmp; + tmp = of_get_property(rootdn, "system-id", NULL); + if (tmp) + system_id = tmp; + lp_index_ptr = of_get_property(rootdn, "ibm,partition-no", + NULL); + if (lp_index_ptr) + lp_index = *lp_index_ptr; + of_node_put(rootdn); + } + seq_printf(m, "serial_number=%s\n", system_id); + seq_printf(m, "system_type=%s\n", model); + seq_printf(m, "partition_id=%d\n", (int)lp_index); + + return pseries_lparcfg_data(m, v); +} + +static int lparcfg_open(struct inode *inode, struct file *file) +{ + return single_open(file, lparcfg_data, NULL); +} + +static const struct file_operations lparcfg_fops = { + .owner = THIS_MODULE, + .read = seq_read, + .write = lparcfg_write, + .open = lparcfg_open, + .release = single_release, + .llseek = seq_lseek, +}; + +static int __init lparcfg_init(void) +{ + struct proc_dir_entry *ent; + umode_t mode = S_IRUSR | S_IRGRP | S_IROTH; + + /* Allow writing if we have FW_FEATURE_SPLPAR */ + if (firmware_has_feature(FW_FEATURE_SPLPAR)) + mode |= S_IWUSR; + + ent = proc_create("powerpc/lparcfg", mode, NULL, &lparcfg_fops); + if (!ent) { + printk(KERN_ERR "Failed to create powerpc/lparcfg\n"); + return -EIO; + } + + proc_ppc64_lparcfg = ent; + return 0; +} + +static void __exit lparcfg_cleanup(void) +{ + if (proc_ppc64_lparcfg) + remove_proc_entry("lparcfg", proc_ppc64_lparcfg->parent); +} + +module_init(lparcfg_init); +module_exit(lparcfg_cleanup); +MODULE_DESCRIPTION("Interface for LPAR configuration data"); +MODULE_AUTHOR("Dave Engebretsen"); +MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c new file mode 100644 index 00000000..5df77779 --- /dev/null +++ b/arch/powerpc/kernel/machine_kexec.c @@ -0,0 +1,251 @@ +/* + * Code to handle transition of Linux booting another kernel. + * + * Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com> + * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz + * Copyright (C) 2005 IBM Corporation. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <linux/kexec.h> +#include <linux/reboot.h> +#include <linux/threads.h> +#include <linux/memblock.h> +#include <linux/of.h> +#include <linux/irq.h> +#include <linux/ftrace.h> + +#include <asm/machdep.h> +#include <asm/prom.h> +#include <asm/sections.h> + +void machine_kexec_mask_interrupts(void) { + unsigned int i; + struct irq_desc *desc; + + for_each_irq_desc(i, desc) { + struct irq_chip *chip; + + chip = irq_desc_get_chip(desc); + if (!chip) + continue; + + if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data)) + chip->irq_eoi(&desc->irq_data); + + if (chip->irq_mask) + chip->irq_mask(&desc->irq_data); + + if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) + chip->irq_disable(&desc->irq_data); + } +} + +void machine_crash_shutdown(struct pt_regs *regs) +{ + default_machine_crash_shutdown(regs); +} + +/* + * Do what every setup is needed on image and the + * reboot code buffer to allow us to avoid allocations + * later. + */ +int machine_kexec_prepare(struct kimage *image) +{ + if (ppc_md.machine_kexec_prepare) + return ppc_md.machine_kexec_prepare(image); + else + return default_machine_kexec_prepare(image); +} + +void machine_kexec_cleanup(struct kimage *image) +{ +} + +void arch_crash_save_vmcoreinfo(void) +{ + +#ifdef CONFIG_NEED_MULTIPLE_NODES + VMCOREINFO_SYMBOL(node_data); + VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); +#endif +#ifndef CONFIG_NEED_MULTIPLE_NODES + VMCOREINFO_SYMBOL(contig_page_data); +#endif +} + +/* + * Do not allocate memory (or fail in any way) in machine_kexec(). + * We are past the point of no return, committed to rebooting now. + */ +void machine_kexec(struct kimage *image) +{ + int save_ftrace_enabled; + + save_ftrace_enabled = __ftrace_enabled_save(); + + if (ppc_md.machine_kexec) + ppc_md.machine_kexec(image); + else + default_machine_kexec(image); + + __ftrace_enabled_restore(save_ftrace_enabled); + + /* Fall back to normal restart if we're still alive. */ + machine_restart(NULL); + for(;;); +} + +void __init reserve_crashkernel(void) +{ + unsigned long long crash_size, crash_base; + int ret; + + /* use common parsing */ + ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), + &crash_size, &crash_base); + if (ret == 0 && crash_size > 0) { + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; + } + + if (crashk_res.end == crashk_res.start) { + crashk_res.start = crashk_res.end = 0; + return; + } + + /* We might have got these values via the command line or the + * device tree, either way sanitise them now. */ + + crash_size = resource_size(&crashk_res); + +#ifndef CONFIG_NONSTATIC_KERNEL + if (crashk_res.start != KDUMP_KERNELBASE) + printk("Crash kernel location must be 0x%x\n", + KDUMP_KERNELBASE); + + crashk_res.start = KDUMP_KERNELBASE; +#else + if (!crashk_res.start) { +#ifdef CONFIG_PPC64 + /* + * On 64bit we split the RMO in half but cap it at half of + * a small SLB (128MB) since the crash kernel needs to place + * itself and some stacks to be in the first segment. + */ + crashk_res.start = min(0x80000000ULL, (ppc64_rma_size / 2)); +#else + crashk_res.start = KDUMP_KERNELBASE; +#endif + } + + crash_base = PAGE_ALIGN(crashk_res.start); + if (crash_base != crashk_res.start) { + printk("Crash kernel base must be aligned to 0x%lx\n", + PAGE_SIZE); + crashk_res.start = crash_base; + } + +#endif + crash_size = PAGE_ALIGN(crash_size); + crashk_res.end = crashk_res.start + crash_size - 1; + + /* The crash region must not overlap the current kernel */ + if (overlaps_crashkernel(__pa(_stext), _end - _stext)) { + printk(KERN_WARNING + "Crash kernel can not overlap current kernel\n"); + crashk_res.start = crashk_res.end = 0; + return; + } + + /* Crash kernel trumps memory limit */ + if (memory_limit && memory_limit <= crashk_res.end) { + memory_limit = crashk_res.end + 1; + printk("Adjusted memory limit for crashkernel, now 0x%llx\n", + (unsigned long long)memory_limit); + } + + printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " + "for crashkernel (System RAM: %ldMB)\n", + (unsigned long)(crash_size >> 20), + (unsigned long)(crashk_res.start >> 20), + (unsigned long)(memblock_phys_mem_size() >> 20)); + + memblock_reserve(crashk_res.start, crash_size); +} + +int overlaps_crashkernel(unsigned long start, unsigned long size) +{ + return (start + size) > crashk_res.start && start <= crashk_res.end; +} + +/* Values we need to export to the second kernel via the device tree. */ +static phys_addr_t kernel_end; +static phys_addr_t crashk_size; + +static struct property kernel_end_prop = { + .name = "linux,kernel-end", + .length = sizeof(phys_addr_t), + .value = &kernel_end, +}; + +static struct property crashk_base_prop = { + .name = "linux,crashkernel-base", + .length = sizeof(phys_addr_t), + .value = &crashk_res.start, +}; + +static struct property crashk_size_prop = { + .name = "linux,crashkernel-size", + .length = sizeof(phys_addr_t), + .value = &crashk_size, +}; + +static void __init export_crashk_values(struct device_node *node) +{ + struct property *prop; + + /* There might be existing crash kernel properties, but we can't + * be sure what's in them, so remove them. */ + prop = of_find_property(node, "linux,crashkernel-base", NULL); + if (prop) + prom_remove_property(node, prop); + + prop = of_find_property(node, "linux,crashkernel-size", NULL); + if (prop) + prom_remove_property(node, prop); + + if (crashk_res.start != 0) { + prom_add_property(node, &crashk_base_prop); + crashk_size = resource_size(&crashk_res); + prom_add_property(node, &crashk_size_prop); + } +} + +static int __init kexec_setup(void) +{ + struct device_node *node; + struct property *prop; + + node = of_find_node_by_path("/chosen"); + if (!node) + return -ENOENT; + + /* remove any stale properties so ours can be found */ + prop = of_find_property(node, kernel_end_prop.name, NULL); + if (prop) + prom_remove_property(node, prop); + + /* information needed by userspace when using default_machine_kexec */ + kernel_end = __pa(_end); + prom_add_property(node, &kernel_end_prop); + + export_crashk_values(node); + + of_node_put(node); + return 0; +} +late_initcall(kexec_setup); diff --git a/arch/powerpc/kernel/machine_kexec_32.c b/arch/powerpc/kernel/machine_kexec_32.c new file mode 100644 index 00000000..affe5dcc --- /dev/null +++ b/arch/powerpc/kernel/machine_kexec_32.c @@ -0,0 +1,69 @@ +/* + * PPC32 code to handle Linux booting another kernel. + * + * Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com> + * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz + * Copyright (C) 2005 IBM Corporation. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <linux/kexec.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <asm/cacheflush.h> +#include <asm/hw_irq.h> +#include <asm/io.h> + +typedef void (*relocate_new_kernel_t)( + unsigned long indirection_page, + unsigned long reboot_code_buffer, + unsigned long start_address) __noreturn; + +/* + * This is a generic machine_kexec function suitable at least for + * non-OpenFirmware embedded platforms. + * It merely copies the image relocation code to the control page and + * jumps to it. + * A platform specific function may just call this one. + */ +void default_machine_kexec(struct kimage *image) +{ + extern const unsigned char relocate_new_kernel[]; + extern const unsigned int relocate_new_kernel_size; + unsigned long page_list; + unsigned long reboot_code_buffer, reboot_code_buffer_phys; + relocate_new_kernel_t rnk; + + /* Interrupts aren't acceptable while we reboot */ + local_irq_disable(); + + /* mask each interrupt so we are in a more sane state for the + * kexec kernel */ + machine_kexec_mask_interrupts(); + + page_list = image->head; + + /* we need both effective and real address here */ + reboot_code_buffer = + (unsigned long)page_address(image->control_code_page); + reboot_code_buffer_phys = virt_to_phys((void *)reboot_code_buffer); + + /* copy our kernel relocation code to the control code page */ + memcpy((void *)reboot_code_buffer, relocate_new_kernel, + relocate_new_kernel_size); + + flush_icache_range(reboot_code_buffer, + reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE); + printk(KERN_INFO "Bye!\n"); + + /* now call it */ + rnk = (relocate_new_kernel_t) reboot_code_buffer; + (*rnk)(page_list, reboot_code_buffer_phys, image->start); +} + +int default_machine_kexec_prepare(struct kimage *image) +{ + return 0; +} diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c new file mode 100644 index 00000000..d7f60908 --- /dev/null +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -0,0 +1,404 @@ +/* + * PPC64 code to handle Linux booting another kernel. + * + * Copyright (C) 2004-2005, IBM Corp. + * + * Created by: Milton D Miller II + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + + +#include <linux/kexec.h> +#include <linux/smp.h> +#include <linux/thread_info.h> +#include <linux/init_task.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/cpu.h> + +#include <asm/page.h> +#include <asm/current.h> +#include <asm/machdep.h> +#include <asm/cacheflush.h> +#include <asm/paca.h> +#include <asm/mmu.h> +#include <asm/sections.h> /* _end */ +#include <asm/prom.h> +#include <asm/smp.h> +#include <asm/hw_breakpoint.h> + +int default_machine_kexec_prepare(struct kimage *image) +{ + int i; + unsigned long begin, end; /* limits of segment */ + unsigned long low, high; /* limits of blocked memory range */ + struct device_node *node; + const unsigned long *basep; + const unsigned int *sizep; + + if (!ppc_md.hpte_clear_all) + return -ENOENT; + + /* + * Since we use the kernel fault handlers and paging code to + * handle the virtual mode, we must make sure no destination + * overlaps kernel static data or bss. + */ + for (i = 0; i < image->nr_segments; i++) + if (image->segment[i].mem < __pa(_end)) + return -ETXTBSY; + + /* + * For non-LPAR, we absolutely can not overwrite the mmu hash + * table, since we are still using the bolted entries in it to + * do the copy. Check that here. + * + * It is safe if the end is below the start of the blocked + * region (end <= low), or if the beginning is after the + * end of the blocked region (begin >= high). Use the + * boolean identity !(a || b) === (!a && !b). + */ + if (htab_address) { + low = __pa(htab_address); + high = low + htab_size_bytes; + + for (i = 0; i < image->nr_segments; i++) { + begin = image->segment[i].mem; + end = begin + image->segment[i].memsz; + + if ((begin < high) && (end > low)) + return -ETXTBSY; + } + } + + /* We also should not overwrite the tce tables */ + for_each_node_by_type(node, "pci") { + basep = of_get_property(node, "linux,tce-base", NULL); + sizep = of_get_property(node, "linux,tce-size", NULL); + if (basep == NULL || sizep == NULL) + continue; + + low = *basep; + high = low + (*sizep); + + for (i = 0; i < image->nr_segments; i++) { + begin = image->segment[i].mem; + end = begin + image->segment[i].memsz; + + if ((begin < high) && (end > low)) + return -ETXTBSY; + } + } + + return 0; +} + +#define IND_FLAGS (IND_DESTINATION | IND_INDIRECTION | IND_DONE | IND_SOURCE) + +static void copy_segments(unsigned long ind) +{ + unsigned long entry; + unsigned long *ptr; + void *dest; + void *addr; + + /* + * We rely on kexec_load to create a lists that properly + * initializes these pointers before they are used. + * We will still crash if the list is wrong, but at least + * the compiler will be quiet. + */ + ptr = NULL; + dest = NULL; + + for (entry = ind; !(entry & IND_DONE); entry = *ptr++) { + addr = __va(entry & PAGE_MASK); + + switch (entry & IND_FLAGS) { + case IND_DESTINATION: + dest = addr; + break; + case IND_INDIRECTION: + ptr = addr; + break; + case IND_SOURCE: + copy_page(dest, addr); + dest += PAGE_SIZE; + } + } +} + +void kexec_copy_flush(struct kimage *image) +{ + long i, nr_segments = image->nr_segments; + struct kexec_segment ranges[KEXEC_SEGMENT_MAX]; + + /* save the ranges on the stack to efficiently flush the icache */ + memcpy(ranges, image->segment, sizeof(ranges)); + + /* + * After this call we may not use anything allocated in dynamic + * memory, including *image. + * + * Only globals and the stack are allowed. + */ + copy_segments(image->head); + + /* + * we need to clear the icache for all dest pages sometime, + * including ones that were in place on the original copy + */ + for (i = 0; i < nr_segments; i++) + flush_icache_range((unsigned long)__va(ranges[i].mem), + (unsigned long)__va(ranges[i].mem + ranges[i].memsz)); +} + +#ifdef CONFIG_SMP + +static int kexec_all_irq_disabled = 0; + +static void kexec_smp_down(void *arg) +{ + local_irq_disable(); + mb(); /* make sure our irqs are disabled before we say they are */ + get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF; + while(kexec_all_irq_disabled == 0) + cpu_relax(); + mb(); /* make sure all irqs are disabled before this */ + hw_breakpoint_disable(); + /* + * Now every CPU has IRQs off, we can clear out any pending + * IPIs and be sure that no more will come in after this. + */ + if (ppc_md.kexec_cpu_down) + ppc_md.kexec_cpu_down(0, 1); + + kexec_smp_wait(); + /* NOTREACHED */ +} + +static void kexec_prepare_cpus_wait(int wait_state) +{ + int my_cpu, i, notified=-1; + + hw_breakpoint_disable(); + my_cpu = get_cpu(); + /* Make sure each CPU has at least made it to the state we need. + * + * FIXME: There is a (slim) chance of a problem if not all of the CPUs + * are correctly onlined. If somehow we start a CPU on boot with RTAS + * start-cpu, but somehow that CPU doesn't write callin_cpu_map[] in + * time, the boot CPU will timeout. If it does eventually execute + * stuff, the secondary will start up (paca[].cpu_start was written) and + * get into a peculiar state. If the platform supports + * smp_ops->take_timebase(), the secondary CPU will probably be spinning + * in there. If not (i.e. pseries), the secondary will continue on and + * try to online itself/idle/etc. If it survives that, we need to find + * these possible-but-not-online-but-should-be CPUs and chaperone them + * into kexec_smp_wait(). + */ + for_each_online_cpu(i) { + if (i == my_cpu) + continue; + + while (paca[i].kexec_state < wait_state) { + barrier(); + if (i != notified) { + printk(KERN_INFO "kexec: waiting for cpu %d " + "(physical %d) to enter %i state\n", + i, paca[i].hw_cpu_id, wait_state); + notified = i; + } + } + } + mb(); +} + +/* + * We need to make sure each present CPU is online. The next kernel will scan + * the device tree and assume primary threads are online and query secondary + * threads via RTAS to online them if required. If we don't online primary + * threads, they will be stuck. However, we also online secondary threads as we + * may be using 'cede offline'. In this case RTAS doesn't see the secondary + * threads as offline -- and again, these CPUs will be stuck. + * + * So, we online all CPUs that should be running, including secondary threads. + */ +static void wake_offline_cpus(void) +{ + int cpu = 0; + + for_each_present_cpu(cpu) { + if (!cpu_online(cpu)) { + printk(KERN_INFO "kexec: Waking offline cpu %d.\n", + cpu); + cpu_up(cpu); + } + } +} + +static void kexec_prepare_cpus(void) +{ + wake_offline_cpus(); + smp_call_function(kexec_smp_down, NULL, /* wait */0); + local_irq_disable(); + mb(); /* make sure IRQs are disabled before we say they are */ + get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF; + + kexec_prepare_cpus_wait(KEXEC_STATE_IRQS_OFF); + /* we are sure every CPU has IRQs off at this point */ + kexec_all_irq_disabled = 1; + + /* after we tell the others to go down */ + if (ppc_md.kexec_cpu_down) + ppc_md.kexec_cpu_down(0, 0); + + /* + * Before removing MMU mappings make sure all CPUs have entered real + * mode: + */ + kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE); + + put_cpu(); +} + +#else /* ! SMP */ + +static void kexec_prepare_cpus(void) +{ + /* + * move the secondarys to us so that we can copy + * the new kernel 0-0x100 safely + * + * do this if kexec in setup.c ? + * + * We need to release the cpus if we are ever going from an + * UP to an SMP kernel. + */ + smp_release_cpus(); + if (ppc_md.kexec_cpu_down) + ppc_md.kexec_cpu_down(0, 0); + local_irq_disable(); +} + +#endif /* SMP */ + +/* + * kexec thread structure and stack. + * + * We need to make sure that this is 16384-byte aligned due to the + * way process stacks are handled. It also must be statically allocated + * or allocated as part of the kimage, because everything else may be + * overwritten when we copy the kexec image. We piggyback on the + * "init_task" linker section here to statically allocate a stack. + * + * We could use a smaller stack if we don't care about anything using + * current, but that audit has not been performed. + */ +static union thread_union kexec_stack __init_task_data = + { }; + +/* + * For similar reasons to the stack above, the kexecing CPU needs to be on a + * static PACA; we switch to kexec_paca. + */ +struct paca_struct kexec_paca; + +/* Our assembly helper, in kexec_stub.S */ +extern void kexec_sequence(void *newstack, unsigned long start, + void *image, void *control, + void (*clear_all)(void)) __noreturn; + +/* too late to fail here */ +void default_machine_kexec(struct kimage *image) +{ + /* prepare control code if any */ + + /* + * If the kexec boot is the normal one, need to shutdown other cpus + * into our wait loop and quiesce interrupts. + * Otherwise, in the case of crashed mode (crashing_cpu >= 0), + * stopping other CPUs and collecting their pt_regs is done before + * using debugger IPI. + */ + + if (crashing_cpu == -1) + kexec_prepare_cpus(); + + pr_debug("kexec: Starting switchover sequence.\n"); + + /* switch to a staticly allocated stack. Based on irq stack code. + * XXX: the task struct will likely be invalid once we do the copy! + */ + kexec_stack.thread_info.task = current_thread_info()->task; + kexec_stack.thread_info.flags = 0; + + /* We need a static PACA, too; copy this CPU's PACA over and switch to + * it. Also poison per_cpu_offset to catch anyone using non-static + * data. + */ + memcpy(&kexec_paca, get_paca(), sizeof(struct paca_struct)); + kexec_paca.data_offset = 0xedeaddeadeeeeeeeUL; + paca = (struct paca_struct *)RELOC_HIDE(&kexec_paca, 0) - + kexec_paca.paca_index; + setup_paca(&kexec_paca); + + /* XXX: If anyone does 'dynamic lppacas' this will also need to be + * switched to a static version! + */ + + /* Some things are best done in assembly. Finding globals with + * a toc is easier in C, so pass in what we can. + */ + kexec_sequence(&kexec_stack, image->start, image, + page_address(image->control_code_page), + ppc_md.hpte_clear_all); + /* NOTREACHED */ +} + +/* Values we need to export to the second kernel via the device tree. */ +static unsigned long htab_base; + +static struct property htab_base_prop = { + .name = "linux,htab-base", + .length = sizeof(unsigned long), + .value = &htab_base, +}; + +static struct property htab_size_prop = { + .name = "linux,htab-size", + .length = sizeof(unsigned long), + .value = &htab_size_bytes, +}; + +static int __init export_htab_values(void) +{ + struct device_node *node; + struct property *prop; + + /* On machines with no htab htab_address is NULL */ + if (!htab_address) + return -ENODEV; + + node = of_find_node_by_path("/chosen"); + if (!node) + return -ENODEV; + + /* remove any stale propertys so ours can be found */ + prop = of_find_property(node, htab_base_prop.name, NULL); + if (prop) + prom_remove_property(node, prop); + prop = of_find_property(node, htab_size_prop.name, NULL); + if (prop) + prom_remove_property(node, prop); + + htab_base = __pa(htab_address); + prom_add_property(node, &htab_base_prop); + prom_add_property(node, &htab_size_prop); + + of_node_put(node); + return 0; +} +late_initcall(export_htab_values); diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S new file mode 100644 index 00000000..ba16874f --- /dev/null +++ b/arch/powerpc/kernel/misc.S @@ -0,0 +1,123 @@ +/* + * This file contains miscellaneous low-level functions. + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Largely rewritten by Cort Dougan (cort@cs.nmt.edu) + * and Paul Mackerras. + * + * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com) + * + * setjmp/longjmp code by Paul Mackerras. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <asm/ppc_asm.h> +#include <asm/unistd.h> +#include <asm/asm-compat.h> +#include <asm/asm-offsets.h> + + .text + +/* + * Returns (address we are running at) - (address we were linked at) + * for use before the text and data are mapped to KERNELBASE. + */ + +_GLOBAL(reloc_offset) + mflr r0 + bl 1f +1: mflr r3 + PPC_LL r4,(2f-1b)(r3) + subf r3,r4,r3 + mtlr r0 + blr + + .align 3 +2: PPC_LONG 1b + +/* + * add_reloc_offset(x) returns x + reloc_offset(). + */ +_GLOBAL(add_reloc_offset) + mflr r0 + bl 1f +1: mflr r5 + PPC_LL r4,(2f-1b)(r5) + subf r5,r4,r5 + add r3,r3,r5 + mtlr r0 + blr + + .align 3 +2: PPC_LONG 1b + +_GLOBAL(kernel_execve) + li r0,__NR_execve + sc + bnslr + neg r3,r3 + blr + +_GLOBAL(setjmp) + mflr r0 + PPC_STL r0,0(r3) + PPC_STL r1,SZL(r3) + PPC_STL r2,2*SZL(r3) + mfcr r0 + PPC_STL r0,3*SZL(r3) + PPC_STL r13,4*SZL(r3) + PPC_STL r14,5*SZL(r3) + PPC_STL r15,6*SZL(r3) + PPC_STL r16,7*SZL(r3) + PPC_STL r17,8*SZL(r3) + PPC_STL r18,9*SZL(r3) + PPC_STL r19,10*SZL(r3) + PPC_STL r20,11*SZL(r3) + PPC_STL r21,12*SZL(r3) + PPC_STL r22,13*SZL(r3) + PPC_STL r23,14*SZL(r3) + PPC_STL r24,15*SZL(r3) + PPC_STL r25,16*SZL(r3) + PPC_STL r26,17*SZL(r3) + PPC_STL r27,18*SZL(r3) + PPC_STL r28,19*SZL(r3) + PPC_STL r29,20*SZL(r3) + PPC_STL r30,21*SZL(r3) + PPC_STL r31,22*SZL(r3) + li r3,0 + blr + +_GLOBAL(longjmp) + PPC_LCMPI r4,0 + bne 1f + li r4,1 +1: PPC_LL r13,4*SZL(r3) + PPC_LL r14,5*SZL(r3) + PPC_LL r15,6*SZL(r3) + PPC_LL r16,7*SZL(r3) + PPC_LL r17,8*SZL(r3) + PPC_LL r18,9*SZL(r3) + PPC_LL r19,10*SZL(r3) + PPC_LL r20,11*SZL(r3) + PPC_LL r21,12*SZL(r3) + PPC_LL r22,13*SZL(r3) + PPC_LL r23,14*SZL(r3) + PPC_LL r24,15*SZL(r3) + PPC_LL r25,16*SZL(r3) + PPC_LL r26,17*SZL(r3) + PPC_LL r27,18*SZL(r3) + PPC_LL r28,19*SZL(r3) + PPC_LL r29,20*SZL(r3) + PPC_LL r30,21*SZL(r3) + PPC_LL r31,22*SZL(r3) + PPC_LL r0,3*SZL(r3) + mtcrf 0x38,r0 + PPC_LL r0,0(r3) + PPC_LL r1,SZL(r3) + PPC_LL r2,2*SZL(r3) + mtlr r0 + mr r3,r4 + blr diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S new file mode 100644 index 00000000..7cd07b42 --- /dev/null +++ b/arch/powerpc/kernel/misc_32.S @@ -0,0 +1,1010 @@ +/* + * This file contains miscellaneous low-level functions. + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Largely rewritten by Cort Dougan (cort@cs.nmt.edu) + * and Paul Mackerras. + * + * kexec bits: + * Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com> + * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz + * PPC44x port. Copyright (C) 2011, IBM Corporation + * Author: Suzuki Poulose <suzuki@in.ibm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/sys.h> +#include <asm/unistd.h> +#include <asm/errno.h> +#include <asm/reg.h> +#include <asm/page.h> +#include <asm/cache.h> +#include <asm/cputable.h> +#include <asm/mmu.h> +#include <asm/ppc_asm.h> +#include <asm/thread_info.h> +#include <asm/asm-offsets.h> +#include <asm/processor.h> +#include <asm/kexec.h> +#include <asm/bug.h> +#include <asm/ptrace.h> + + .text + +_GLOBAL(call_do_softirq) + mflr r0 + stw r0,4(r1) + stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) + mr r1,r3 + bl __do_softirq + lwz r1,0(r1) + lwz r0,4(r1) + mtlr r0 + blr + +_GLOBAL(call_handle_irq) + mflr r0 + stw r0,4(r1) + mtctr r6 + stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5) + mr r1,r5 + bctrl + lwz r1,0(r1) + lwz r0,4(r1) + mtlr r0 + blr + +/* + * This returns the high 64 bits of the product of two 64-bit numbers. + */ +_GLOBAL(mulhdu) + cmpwi r6,0 + cmpwi cr1,r3,0 + mr r10,r4 + mulhwu r4,r4,r5 + beq 1f + mulhwu r0,r10,r6 + mullw r7,r10,r5 + addc r7,r0,r7 + addze r4,r4 +1: beqlr cr1 /* all done if high part of A is 0 */ + mr r10,r3 + mullw r9,r3,r5 + mulhwu r3,r3,r5 + beq 2f + mullw r0,r10,r6 + mulhwu r8,r10,r6 + addc r7,r0,r7 + adde r4,r4,r8 + addze r3,r3 +2: addc r4,r4,r9 + addze r3,r3 + blr + +/* + * sub_reloc_offset(x) returns x - reloc_offset(). + */ +_GLOBAL(sub_reloc_offset) + mflr r0 + bl 1f +1: mflr r5 + lis r4,1b@ha + addi r4,r4,1b@l + subf r5,r4,r5 + subf r3,r5,r3 + mtlr r0 + blr + +/* + * reloc_got2 runs through the .got2 section adding an offset + * to each entry. + */ +_GLOBAL(reloc_got2) + mflr r11 + lis r7,__got2_start@ha + addi r7,r7,__got2_start@l + lis r8,__got2_end@ha + addi r8,r8,__got2_end@l + subf r8,r7,r8 + srwi. r8,r8,2 + beqlr + mtctr r8 + bl 1f +1: mflr r0 + lis r4,1b@ha + addi r4,r4,1b@l + subf r0,r4,r0 + add r7,r0,r7 +2: lwz r0,0(r7) + add r0,r0,r3 + stw r0,0(r7) + addi r7,r7,4 + bdnz 2b + mtlr r11 + blr + +/* + * call_setup_cpu - call the setup_cpu function for this cpu + * r3 = data offset, r24 = cpu number + * + * Setup function is called with: + * r3 = data offset + * r4 = ptr to CPU spec (relocated) + */ +_GLOBAL(call_setup_cpu) + addis r4,r3,cur_cpu_spec@ha + addi r4,r4,cur_cpu_spec@l + lwz r4,0(r4) + add r4,r4,r3 + lwz r5,CPU_SPEC_SETUP(r4) + cmpwi 0,r5,0 + add r5,r5,r3 + beqlr + mtctr r5 + bctr + +#if defined(CONFIG_CPU_FREQ_PMAC) && defined(CONFIG_6xx) + +/* This gets called by via-pmu.c to switch the PLL selection + * on 750fx CPU. This function should really be moved to some + * other place (as most of the cpufreq code in via-pmu + */ +_GLOBAL(low_choose_750fx_pll) + /* Clear MSR:EE */ + mfmsr r7 + rlwinm r0,r7,0,17,15 + mtmsr r0 + + /* If switching to PLL1, disable HID0:BTIC */ + cmplwi cr0,r3,0 + beq 1f + mfspr r5,SPRN_HID0 + rlwinm r5,r5,0,27,25 + sync + mtspr SPRN_HID0,r5 + isync + sync + +1: + /* Calc new HID1 value */ + mfspr r4,SPRN_HID1 /* Build a HID1:PS bit from parameter */ + rlwinm r5,r3,16,15,15 /* Clear out HID1:PS from value read */ + rlwinm r4,r4,0,16,14 /* Could have I used rlwimi here ? */ + or r4,r4,r5 + mtspr SPRN_HID1,r4 + + /* Store new HID1 image */ + rlwinm r6,r1,0,0,(31-THREAD_SHIFT) + lwz r6,TI_CPU(r6) + slwi r6,r6,2 + addis r6,r6,nap_save_hid1@ha + stw r4,nap_save_hid1@l(r6) + + /* If switching to PLL0, enable HID0:BTIC */ + cmplwi cr0,r3,0 + bne 1f + mfspr r5,SPRN_HID0 + ori r5,r5,HID0_BTIC + sync + mtspr SPRN_HID0,r5 + isync + sync + +1: + /* Return */ + mtmsr r7 + blr + +_GLOBAL(low_choose_7447a_dfs) + /* Clear MSR:EE */ + mfmsr r7 + rlwinm r0,r7,0,17,15 + mtmsr r0 + + /* Calc new HID1 value */ + mfspr r4,SPRN_HID1 + insrwi r4,r3,1,9 /* insert parameter into bit 9 */ + sync + mtspr SPRN_HID1,r4 + sync + isync + + /* Return */ + mtmsr r7 + blr + +#endif /* CONFIG_CPU_FREQ_PMAC && CONFIG_6xx */ + +/* + * complement mask on the msr then "or" some values on. + * _nmask_and_or_msr(nmask, value_to_or) + */ +_GLOBAL(_nmask_and_or_msr) + mfmsr r0 /* Get current msr */ + andc r0,r0,r3 /* And off the bits set in r3 (first parm) */ + or r0,r0,r4 /* Or on the bits in r4 (second parm) */ + SYNC /* Some chip revs have problems here... */ + mtmsr r0 /* Update machine state */ + isync + blr /* Done */ + +#ifdef CONFIG_40x + +/* + * Do an IO access in real mode + */ +_GLOBAL(real_readb) + mfmsr r7 + ori r0,r7,MSR_DR + xori r0,r0,MSR_DR + sync + mtmsr r0 + sync + isync + lbz r3,0(r3) + sync + mtmsr r7 + sync + isync + blr + + /* + * Do an IO access in real mode + */ +_GLOBAL(real_writeb) + mfmsr r7 + ori r0,r7,MSR_DR + xori r0,r0,MSR_DR + sync + mtmsr r0 + sync + isync + stb r3,0(r4) + sync + mtmsr r7 + sync + isync + blr + +#endif /* CONFIG_40x */ + + +/* + * Flush instruction cache. + * This is a no-op on the 601. + */ +_GLOBAL(flush_instruction_cache) +#if defined(CONFIG_8xx) + isync + lis r5, IDC_INVALL@h + mtspr SPRN_IC_CST, r5 +#elif defined(CONFIG_4xx) +#ifdef CONFIG_403GCX + li r3, 512 + mtctr r3 + lis r4, KERNELBASE@h +1: iccci 0, r4 + addi r4, r4, 16 + bdnz 1b +#else + lis r3, KERNELBASE@h + iccci 0,r3 +#endif +#elif CONFIG_FSL_BOOKE +BEGIN_FTR_SECTION + mfspr r3,SPRN_L1CSR0 + ori r3,r3,L1CSR0_CFI|L1CSR0_CLFC + /* msync; isync recommended here */ + mtspr SPRN_L1CSR0,r3 + isync + blr +END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE) + mfspr r3,SPRN_L1CSR1 + ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR + mtspr SPRN_L1CSR1,r3 +#else + mfspr r3,SPRN_PVR + rlwinm r3,r3,16,16,31 + cmpwi 0,r3,1 + beqlr /* for 601, do nothing */ + /* 603/604 processor - use invalidate-all bit in HID0 */ + mfspr r3,SPRN_HID0 + ori r3,r3,HID0_ICFI + mtspr SPRN_HID0,r3 +#endif /* CONFIG_8xx/4xx */ + isync + blr + +/* + * Write any modified data cache blocks out to memory + * and invalidate the corresponding instruction cache blocks. + * This is a no-op on the 601. + * + * flush_icache_range(unsigned long start, unsigned long stop) + */ +_KPROBE(__flush_icache_range) +BEGIN_FTR_SECTION + blr /* for 601, do nothing */ +END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) + li r5,L1_CACHE_BYTES-1 + andc r3,r3,r5 + subf r4,r3,r4 + add r4,r4,r5 + srwi. r4,r4,L1_CACHE_SHIFT + beqlr + mtctr r4 + mr r6,r3 +1: dcbst 0,r3 + addi r3,r3,L1_CACHE_BYTES + bdnz 1b + sync /* wait for dcbst's to get to ram */ +#ifndef CONFIG_44x + mtctr r4 +2: icbi 0,r6 + addi r6,r6,L1_CACHE_BYTES + bdnz 2b +#else + /* Flash invalidate on 44x because we are passed kmapped addresses and + this doesn't work for userspace pages due to the virtually tagged + icache. Sigh. */ + iccci 0, r0 +#endif + sync /* additional sync needed on g4 */ + isync + blr +/* + * Write any modified data cache blocks out to memory. + * Does not invalidate the corresponding cache lines (especially for + * any corresponding instruction cache). + * + * clean_dcache_range(unsigned long start, unsigned long stop) + */ +_GLOBAL(clean_dcache_range) + li r5,L1_CACHE_BYTES-1 + andc r3,r3,r5 + subf r4,r3,r4 + add r4,r4,r5 + srwi. r4,r4,L1_CACHE_SHIFT + beqlr + mtctr r4 + +1: dcbst 0,r3 + addi r3,r3,L1_CACHE_BYTES + bdnz 1b + sync /* wait for dcbst's to get to ram */ + blr + +/* + * Write any modified data cache blocks out to memory and invalidate them. + * Does not invalidate the corresponding instruction cache blocks. + * + * flush_dcache_range(unsigned long start, unsigned long stop) + */ +_GLOBAL(flush_dcache_range) + li r5,L1_CACHE_BYTES-1 + andc r3,r3,r5 + subf r4,r3,r4 + add r4,r4,r5 + srwi. r4,r4,L1_CACHE_SHIFT + beqlr + mtctr r4 + +1: dcbf 0,r3 + addi r3,r3,L1_CACHE_BYTES + bdnz 1b + sync /* wait for dcbst's to get to ram */ + blr + +/* + * Like above, but invalidate the D-cache. This is used by the 8xx + * to invalidate the cache so the PPC core doesn't get stale data + * from the CPM (no cache snooping here :-). + * + * invalidate_dcache_range(unsigned long start, unsigned long stop) + */ +_GLOBAL(invalidate_dcache_range) + li r5,L1_CACHE_BYTES-1 + andc r3,r3,r5 + subf r4,r3,r4 + add r4,r4,r5 + srwi. r4,r4,L1_CACHE_SHIFT + beqlr + mtctr r4 + +1: dcbi 0,r3 + addi r3,r3,L1_CACHE_BYTES + bdnz 1b + sync /* wait for dcbi's to get to ram */ + blr + +/* + * Flush a particular page from the data cache to RAM. + * Note: this is necessary because the instruction cache does *not* + * snoop from the data cache. + * This is a no-op on the 601 which has a unified cache. + * + * void __flush_dcache_icache(void *page) + */ +_GLOBAL(__flush_dcache_icache) +BEGIN_FTR_SECTION + blr +END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) + rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */ + li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */ + mtctr r4 + mr r6,r3 +0: dcbst 0,r3 /* Write line to ram */ + addi r3,r3,L1_CACHE_BYTES + bdnz 0b + sync +#ifdef CONFIG_44x + /* We don't flush the icache on 44x. Those have a virtual icache + * and we don't have access to the virtual address here (it's + * not the page vaddr but where it's mapped in user space). The + * flushing of the icache on these is handled elsewhere, when + * a change in the address space occurs, before returning to + * user space + */ +BEGIN_MMU_FTR_SECTION + blr +END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x) +#endif /* CONFIG_44x */ + mtctr r4 +1: icbi 0,r6 + addi r6,r6,L1_CACHE_BYTES + bdnz 1b + sync + isync + blr + +#ifndef CONFIG_BOOKE +/* + * Flush a particular page from the data cache to RAM, identified + * by its physical address. We turn off the MMU so we can just use + * the physical address (this may be a highmem page without a kernel + * mapping). + * + * void __flush_dcache_icache_phys(unsigned long physaddr) + */ +_GLOBAL(__flush_dcache_icache_phys) +BEGIN_FTR_SECTION + blr /* for 601, do nothing */ +END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) + mfmsr r10 + rlwinm r0,r10,0,28,26 /* clear DR */ + mtmsr r0 + isync + rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */ + li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */ + mtctr r4 + mr r6,r3 +0: dcbst 0,r3 /* Write line to ram */ + addi r3,r3,L1_CACHE_BYTES + bdnz 0b + sync + mtctr r4 +1: icbi 0,r6 + addi r6,r6,L1_CACHE_BYTES + bdnz 1b + sync + mtmsr r10 /* restore DR */ + isync + blr +#endif /* CONFIG_BOOKE */ + +/* + * Clear pages using the dcbz instruction, which doesn't cause any + * memory traffic (except to write out any cache lines which get + * displaced). This only works on cacheable memory. + * + * void clear_pages(void *page, int order) ; + */ +_GLOBAL(clear_pages) + li r0,PAGE_SIZE/L1_CACHE_BYTES + slw r0,r0,r4 + mtctr r0 +1: dcbz 0,r3 + addi r3,r3,L1_CACHE_BYTES + bdnz 1b + blr + +/* + * Copy a whole page. We use the dcbz instruction on the destination + * to reduce memory traffic (it eliminates the unnecessary reads of + * the destination into cache). This requires that the destination + * is cacheable. + */ +#define COPY_16_BYTES \ + lwz r6,4(r4); \ + lwz r7,8(r4); \ + lwz r8,12(r4); \ + lwzu r9,16(r4); \ + stw r6,4(r3); \ + stw r7,8(r3); \ + stw r8,12(r3); \ + stwu r9,16(r3) + +_GLOBAL(copy_page) + addi r3,r3,-4 + addi r4,r4,-4 + + li r5,4 + +#if MAX_COPY_PREFETCH > 1 + li r0,MAX_COPY_PREFETCH + li r11,4 + mtctr r0 +11: dcbt r11,r4 + addi r11,r11,L1_CACHE_BYTES + bdnz 11b +#else /* MAX_COPY_PREFETCH == 1 */ + dcbt r5,r4 + li r11,L1_CACHE_BYTES+4 +#endif /* MAX_COPY_PREFETCH */ + li r0,PAGE_SIZE/L1_CACHE_BYTES - MAX_COPY_PREFETCH + crclr 4*cr0+eq +2: + mtctr r0 +1: + dcbt r11,r4 + dcbz r5,r3 + COPY_16_BYTES +#if L1_CACHE_BYTES >= 32 + COPY_16_BYTES +#if L1_CACHE_BYTES >= 64 + COPY_16_BYTES + COPY_16_BYTES +#if L1_CACHE_BYTES >= 128 + COPY_16_BYTES + COPY_16_BYTES + COPY_16_BYTES + COPY_16_BYTES +#endif +#endif +#endif + bdnz 1b + beqlr + crnot 4*cr0+eq,4*cr0+eq + li r0,MAX_COPY_PREFETCH + li r11,4 + b 2b + +/* + * void atomic_clear_mask(atomic_t mask, atomic_t *addr) + * void atomic_set_mask(atomic_t mask, atomic_t *addr); + */ +_GLOBAL(atomic_clear_mask) +10: lwarx r5,0,r4 + andc r5,r5,r3 + PPC405_ERR77(0,r4) + stwcx. r5,0,r4 + bne- 10b + blr +_GLOBAL(atomic_set_mask) +10: lwarx r5,0,r4 + or r5,r5,r3 + PPC405_ERR77(0,r4) + stwcx. r5,0,r4 + bne- 10b + blr + +/* + * Extended precision shifts. + * + * Updated to be valid for shift counts from 0 to 63 inclusive. + * -- Gabriel + * + * R3/R4 has 64 bit value + * R5 has shift count + * result in R3/R4 + * + * ashrdi3: arithmetic right shift (sign propagation) + * lshrdi3: logical right shift + * ashldi3: left shift + */ +_GLOBAL(__ashrdi3) + subfic r6,r5,32 + srw r4,r4,r5 # LSW = count > 31 ? 0 : LSW >> count + addi r7,r5,32 # could be xori, or addi with -32 + slw r6,r3,r6 # t1 = count > 31 ? 0 : MSW << (32-count) + rlwinm r8,r7,0,32 # t3 = (count < 32) ? 32 : 0 + sraw r7,r3,r7 # t2 = MSW >> (count-32) + or r4,r4,r6 # LSW |= t1 + slw r7,r7,r8 # t2 = (count < 32) ? 0 : t2 + sraw r3,r3,r5 # MSW = MSW >> count + or r4,r4,r7 # LSW |= t2 + blr + +_GLOBAL(__ashldi3) + subfic r6,r5,32 + slw r3,r3,r5 # MSW = count > 31 ? 0 : MSW << count + addi r7,r5,32 # could be xori, or addi with -32 + srw r6,r4,r6 # t1 = count > 31 ? 0 : LSW >> (32-count) + slw r7,r4,r7 # t2 = count < 32 ? 0 : LSW << (count-32) + or r3,r3,r6 # MSW |= t1 + slw r4,r4,r5 # LSW = LSW << count + or r3,r3,r7 # MSW |= t2 + blr + +_GLOBAL(__lshrdi3) + subfic r6,r5,32 + srw r4,r4,r5 # LSW = count > 31 ? 0 : LSW >> count + addi r7,r5,32 # could be xori, or addi with -32 + slw r6,r3,r6 # t1 = count > 31 ? 0 : MSW << (32-count) + srw r7,r3,r7 # t2 = count < 32 ? 0 : MSW >> (count-32) + or r4,r4,r6 # LSW |= t1 + srw r3,r3,r5 # MSW = MSW >> count + or r4,r4,r7 # LSW |= t2 + blr + +/* + * 64-bit comparison: __ucmpdi2(u64 a, u64 b) + * Returns 0 if a < b, 1 if a == b, 2 if a > b. + */ +_GLOBAL(__ucmpdi2) + cmplw r3,r5 + li r3,1 + bne 1f + cmplw r4,r6 + beqlr +1: li r3,0 + bltlr + li r3,2 + blr + +_GLOBAL(abs) + srawi r4,r3,31 + xor r3,r3,r4 + sub r3,r3,r4 + blr + +/* + * Create a kernel thread + * kernel_thread(fn, arg, flags) + */ +_GLOBAL(kernel_thread) + stwu r1,-16(r1) + stw r30,8(r1) + stw r31,12(r1) + mr r30,r3 /* function */ + mr r31,r4 /* argument */ + ori r3,r5,CLONE_VM /* flags */ + oris r3,r3,CLONE_UNTRACED>>16 + li r4,0 /* new sp (unused) */ + li r0,__NR_clone + sc + bns+ 1f /* did system call indicate error? */ + neg r3,r3 /* if so, make return code negative */ +1: cmpwi 0,r3,0 /* parent or child? */ + bne 2f /* return if parent */ + li r0,0 /* make top-level stack frame */ + stwu r0,-16(r1) + mtlr r30 /* fn addr in lr */ + mr r3,r31 /* load arg and call fn */ + PPC440EP_ERR42 + blrl + li r0,__NR_exit /* exit if function returns */ + li r3,0 + sc +2: lwz r30,8(r1) + lwz r31,12(r1) + addi r1,r1,16 + blr + +#ifdef CONFIG_SMP +_GLOBAL(start_secondary_resume) + /* Reset stack */ + rlwinm r1,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */ + addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + li r3,0 + stw r3,0(r1) /* Zero the stack frame pointer */ + bl start_secondary + b . +#endif /* CONFIG_SMP */ + +/* + * This routine is just here to keep GCC happy - sigh... + */ +_GLOBAL(__main) + blr + +#ifdef CONFIG_KEXEC + /* + * Must be relocatable PIC code callable as a C function. + */ + .globl relocate_new_kernel +relocate_new_kernel: + /* r3 = page_list */ + /* r4 = reboot_code_buffer */ + /* r5 = start_address */ + +#ifdef CONFIG_FSL_BOOKE + + mr r29, r3 + mr r30, r4 + mr r31, r5 + +#define ENTRY_MAPPING_KEXEC_SETUP +#include "fsl_booke_entry_mapping.S" +#undef ENTRY_MAPPING_KEXEC_SETUP + + mr r3, r29 + mr r4, r30 + mr r5, r31 + + li r0, 0 +#elif defined(CONFIG_44x) && !defined(CONFIG_PPC_47x) + +/* + * Code for setting up 1:1 mapping for PPC440x for KEXEC + * + * We cannot switch off the MMU on PPC44x. + * So we: + * 1) Invalidate all the mappings except the one we are running from. + * 2) Create a tmp mapping for our code in the other address space(TS) and + * jump to it. Invalidate the entry we started in. + * 3) Create a 1:1 mapping for 0-2GiB in chunks of 256M in original TS. + * 4) Jump to the 1:1 mapping in original TS. + * 5) Invalidate the tmp mapping. + * + * - Based on the kexec support code for FSL BookE + * - Doesn't support 47x yet. + * + */ + /* Save our parameters */ + mr r29, r3 + mr r30, r4 + mr r31, r5 + + /* Load our MSR_IS and TID to MMUCR for TLB search */ + mfspr r3,SPRN_PID + mfmsr r4 + andi. r4,r4,MSR_IS@l + beq wmmucr + oris r3,r3,PPC44x_MMUCR_STS@h +wmmucr: + mtspr SPRN_MMUCR,r3 + sync + + /* + * Invalidate all the TLB entries except the current entry + * where we are running from + */ + bl 0f /* Find our address */ +0: mflr r5 /* Make it accessible */ + tlbsx r23,0,r5 /* Find entry we are in */ + li r4,0 /* Start at TLB entry 0 */ + li r3,0 /* Set PAGEID inval value */ +1: cmpw r23,r4 /* Is this our entry? */ + beq skip /* If so, skip the inval */ + tlbwe r3,r4,PPC44x_TLB_PAGEID /* If not, inval the entry */ +skip: + addi r4,r4,1 /* Increment */ + cmpwi r4,64 /* Are we done? */ + bne 1b /* If not, repeat */ + isync + + /* Create a temp mapping and jump to it */ + andi. r6, r23, 1 /* Find the index to use */ + addi r24, r6, 1 /* r24 will contain 1 or 2 */ + + mfmsr r9 /* get the MSR */ + rlwinm r5, r9, 27, 31, 31 /* Extract the MSR[IS] */ + xori r7, r5, 1 /* Use the other address space */ + + /* Read the current mapping entries */ + tlbre r3, r23, PPC44x_TLB_PAGEID + tlbre r4, r23, PPC44x_TLB_XLAT + tlbre r5, r23, PPC44x_TLB_ATTRIB + + /* Save our current XLAT entry */ + mr r25, r4 + + /* Extract the TLB PageSize */ + li r10, 1 /* r10 will hold PageSize */ + rlwinm r11, r3, 0, 24, 27 /* bits 24-27 */ + + /* XXX: As of now we use 256M, 4K pages */ + cmpwi r11, PPC44x_TLB_256M + bne tlb_4k + rotlwi r10, r10, 28 /* r10 = 256M */ + b write_out +tlb_4k: + cmpwi r11, PPC44x_TLB_4K + bne default + rotlwi r10, r10, 12 /* r10 = 4K */ + b write_out +default: + rotlwi r10, r10, 10 /* r10 = 1K */ + +write_out: + /* + * Write out the tmp 1:1 mapping for this code in other address space + * Fixup EPN = RPN , TS=other address space + */ + insrwi r3, r7, 1, 23 /* Bit 23 is TS for PAGEID field */ + + /* Write out the tmp mapping entries */ + tlbwe r3, r24, PPC44x_TLB_PAGEID + tlbwe r4, r24, PPC44x_TLB_XLAT + tlbwe r5, r24, PPC44x_TLB_ATTRIB + + subi r11, r10, 1 /* PageOffset Mask = PageSize - 1 */ + not r10, r11 /* Mask for PageNum */ + + /* Switch to other address space in MSR */ + insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */ + + bl 1f +1: mflr r8 + addi r8, r8, (2f-1b) /* Find the target offset */ + + /* Jump to the tmp mapping */ + mtspr SPRN_SRR0, r8 + mtspr SPRN_SRR1, r9 + rfi + +2: + /* Invalidate the entry we were executing from */ + li r3, 0 + tlbwe r3, r23, PPC44x_TLB_PAGEID + + /* attribute fields. rwx for SUPERVISOR mode */ + li r5, 0 + ori r5, r5, (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G) + + /* Create 1:1 mapping in 256M pages */ + xori r7, r7, 1 /* Revert back to Original TS */ + + li r8, 0 /* PageNumber */ + li r6, 3 /* TLB Index, start at 3 */ + +next_tlb: + rotlwi r3, r8, 28 /* Create EPN (bits 0-3) */ + mr r4, r3 /* RPN = EPN */ + ori r3, r3, (PPC44x_TLB_VALID | PPC44x_TLB_256M) /* SIZE = 256M, Valid */ + insrwi r3, r7, 1, 23 /* Set TS from r7 */ + + tlbwe r3, r6, PPC44x_TLB_PAGEID /* PageID field : EPN, V, SIZE */ + tlbwe r4, r6, PPC44x_TLB_XLAT /* Address translation : RPN */ + tlbwe r5, r6, PPC44x_TLB_ATTRIB /* Attributes */ + + addi r8, r8, 1 /* Increment PN */ + addi r6, r6, 1 /* Increment TLB Index */ + cmpwi r8, 8 /* Are we done ? */ + bne next_tlb + isync + + /* Jump to the new mapping 1:1 */ + li r9,0 + insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */ + + bl 1f +1: mflr r8 + and r8, r8, r11 /* Get our offset within page */ + addi r8, r8, (2f-1b) + + and r5, r25, r10 /* Get our target PageNum */ + or r8, r8, r5 /* Target jump address */ + + mtspr SPRN_SRR0, r8 + mtspr SPRN_SRR1, r9 + rfi +2: + /* Invalidate the tmp entry we used */ + li r3, 0 + tlbwe r3, r24, PPC44x_TLB_PAGEID + sync + + /* Restore the parameters */ + mr r3, r29 + mr r4, r30 + mr r5, r31 + + li r0, 0 +#else + li r0, 0 + + /* + * Set Machine Status Register to a known status, + * switch the MMU off and jump to 1: in a single step. + */ + + mr r8, r0 + ori r8, r8, MSR_RI|MSR_ME + mtspr SPRN_SRR1, r8 + addi r8, r4, 1f - relocate_new_kernel + mtspr SPRN_SRR0, r8 + sync + rfi + +1: +#endif + /* from this point address translation is turned off */ + /* and interrupts are disabled */ + + /* set a new stack at the bottom of our page... */ + /* (not really needed now) */ + addi r1, r4, KEXEC_CONTROL_PAGE_SIZE - 8 /* for LR Save+Back Chain */ + stw r0, 0(r1) + + /* Do the copies */ + li r6, 0 /* checksum */ + mr r0, r3 + b 1f + +0: /* top, read another word for the indirection page */ + lwzu r0, 4(r3) + +1: + /* is it a destination page? (r8) */ + rlwinm. r7, r0, 0, 31, 31 /* IND_DESTINATION (1<<0) */ + beq 2f + + rlwinm r8, r0, 0, 0, 19 /* clear kexec flags, page align */ + b 0b + +2: /* is it an indirection page? (r3) */ + rlwinm. r7, r0, 0, 30, 30 /* IND_INDIRECTION (1<<1) */ + beq 2f + + rlwinm r3, r0, 0, 0, 19 /* clear kexec flags, page align */ + subi r3, r3, 4 + b 0b + +2: /* are we done? */ + rlwinm. r7, r0, 0, 29, 29 /* IND_DONE (1<<2) */ + beq 2f + b 3f + +2: /* is it a source page? (r9) */ + rlwinm. r7, r0, 0, 28, 28 /* IND_SOURCE (1<<3) */ + beq 0b + + rlwinm r9, r0, 0, 0, 19 /* clear kexec flags, page align */ + + li r7, PAGE_SIZE / 4 + mtctr r7 + subi r9, r9, 4 + subi r8, r8, 4 +9: + lwzu r0, 4(r9) /* do the copy */ + xor r6, r6, r0 + stwu r0, 4(r8) + dcbst 0, r8 + sync + icbi 0, r8 + bdnz 9b + + addi r9, r9, 4 + addi r8, r8, 4 + b 0b + +3: + + /* To be certain of avoiding problems with self-modifying code + * execute a serializing instruction here. + */ + isync + sync + + mfspr r3, SPRN_PIR /* current core we are running on */ + mr r4, r5 /* load physical address of chunk called */ + + /* jump to the entry point, usually the setup routine */ + mtlr r5 + blrl + +1: b 1b + +relocate_new_kernel_end: + + .globl relocate_new_kernel_size +relocate_new_kernel_size: + .long relocate_new_kernel_end - relocate_new_kernel +#endif diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S new file mode 100644 index 00000000..616921ef --- /dev/null +++ b/arch/powerpc/kernel/misc_64.S @@ -0,0 +1,633 @@ +/* + * This file contains miscellaneous low-level functions. + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Largely rewritten by Cort Dougan (cort@cs.nmt.edu) + * and Paul Mackerras. + * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com) + * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/sys.h> +#include <asm/unistd.h> +#include <asm/errno.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/cache.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/kexec.h> +#include <asm/ptrace.h> + + .text + +_GLOBAL(call_do_softirq) + mflr r0 + std r0,16(r1) + stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) + mr r1,r3 + bl .__do_softirq + ld r1,0(r1) + ld r0,16(r1) + mtlr r0 + blr + +_GLOBAL(call_handle_irq) + ld r8,0(r6) + mflr r0 + std r0,16(r1) + mtctr r8 + stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5) + mr r1,r5 + bctrl + ld r1,0(r1) + ld r0,16(r1) + mtlr r0 + blr + + .section ".toc","aw" +PPC64_CACHES: + .tc ppc64_caches[TC],ppc64_caches + .section ".text" + +/* + * Write any modified data cache blocks out to memory + * and invalidate the corresponding instruction cache blocks. + * + * flush_icache_range(unsigned long start, unsigned long stop) + * + * flush all bytes from start through stop-1 inclusive + */ + +_KPROBE(__flush_icache_range) + +/* + * Flush the data cache to memory + * + * Different systems have different cache line sizes + * and in some cases i-cache and d-cache line sizes differ from + * each other. + */ + ld r10,PPC64_CACHES@toc(r2) + lwz r7,DCACHEL1LINESIZE(r10)/* Get cache line size */ + addi r5,r7,-1 + andc r6,r3,r5 /* round low to line bdy */ + subf r8,r6,r4 /* compute length */ + add r8,r8,r5 /* ensure we get enough */ + lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of cache line size */ + srw. r8,r8,r9 /* compute line count */ + beqlr /* nothing to do? */ + mtctr r8 +1: dcbst 0,r6 + add r6,r6,r7 + bdnz 1b + sync + +/* Now invalidate the instruction cache */ + + lwz r7,ICACHEL1LINESIZE(r10) /* Get Icache line size */ + addi r5,r7,-1 + andc r6,r3,r5 /* round low to line bdy */ + subf r8,r6,r4 /* compute length */ + add r8,r8,r5 + lwz r9,ICACHEL1LOGLINESIZE(r10) /* Get log-2 of Icache line size */ + srw. r8,r8,r9 /* compute line count */ + beqlr /* nothing to do? */ + mtctr r8 +2: icbi 0,r6 + add r6,r6,r7 + bdnz 2b + isync + blr + .previous .text +/* + * Like above, but only do the D-cache. + * + * flush_dcache_range(unsigned long start, unsigned long stop) + * + * flush all bytes from start to stop-1 inclusive + */ +_GLOBAL(flush_dcache_range) + +/* + * Flush the data cache to memory + * + * Different systems have different cache line sizes + */ + ld r10,PPC64_CACHES@toc(r2) + lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */ + addi r5,r7,-1 + andc r6,r3,r5 /* round low to line bdy */ + subf r8,r6,r4 /* compute length */ + add r8,r8,r5 /* ensure we get enough */ + lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of dcache line size */ + srw. r8,r8,r9 /* compute line count */ + beqlr /* nothing to do? */ + mtctr r8 +0: dcbst 0,r6 + add r6,r6,r7 + bdnz 0b + sync + blr + +/* + * Like above, but works on non-mapped physical addresses. + * Use only for non-LPAR setups ! It also assumes real mode + * is cacheable. Used for flushing out the DART before using + * it as uncacheable memory + * + * flush_dcache_phys_range(unsigned long start, unsigned long stop) + * + * flush all bytes from start to stop-1 inclusive + */ +_GLOBAL(flush_dcache_phys_range) + ld r10,PPC64_CACHES@toc(r2) + lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */ + addi r5,r7,-1 + andc r6,r3,r5 /* round low to line bdy */ + subf r8,r6,r4 /* compute length */ + add r8,r8,r5 /* ensure we get enough */ + lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of dcache line size */ + srw. r8,r8,r9 /* compute line count */ + beqlr /* nothing to do? */ + mfmsr r5 /* Disable MMU Data Relocation */ + ori r0,r5,MSR_DR + xori r0,r0,MSR_DR + sync + mtmsr r0 + sync + isync + mtctr r8 +0: dcbst 0,r6 + add r6,r6,r7 + bdnz 0b + sync + isync + mtmsr r5 /* Re-enable MMU Data Relocation */ + sync + isync + blr + +_GLOBAL(flush_inval_dcache_range) + ld r10,PPC64_CACHES@toc(r2) + lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */ + addi r5,r7,-1 + andc r6,r3,r5 /* round low to line bdy */ + subf r8,r6,r4 /* compute length */ + add r8,r8,r5 /* ensure we get enough */ + lwz r9,DCACHEL1LOGLINESIZE(r10)/* Get log-2 of dcache line size */ + srw. r8,r8,r9 /* compute line count */ + beqlr /* nothing to do? */ + sync + isync + mtctr r8 +0: dcbf 0,r6 + add r6,r6,r7 + bdnz 0b + sync + isync + blr + + +/* + * Flush a particular page from the data cache to RAM. + * Note: this is necessary because the instruction cache does *not* + * snoop from the data cache. + * + * void __flush_dcache_icache(void *page) + */ +_GLOBAL(__flush_dcache_icache) +/* + * Flush the data cache to memory + * + * Different systems have different cache line sizes + */ + +/* Flush the dcache */ + ld r7,PPC64_CACHES@toc(r2) + clrrdi r3,r3,PAGE_SHIFT /* Page align */ + lwz r4,DCACHEL1LINESPERPAGE(r7) /* Get # dcache lines per page */ + lwz r5,DCACHEL1LINESIZE(r7) /* Get dcache line size */ + mr r6,r3 + mtctr r4 +0: dcbst 0,r6 + add r6,r6,r5 + bdnz 0b + sync + +/* Now invalidate the icache */ + + lwz r4,ICACHEL1LINESPERPAGE(r7) /* Get # icache lines per page */ + lwz r5,ICACHEL1LINESIZE(r7) /* Get icache line size */ + mtctr r4 +1: icbi 0,r3 + add r3,r3,r5 + bdnz 1b + isync + blr + + +#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) +/* + * Do an IO access in real mode + */ +_GLOBAL(real_readb) + mfmsr r7 + ori r0,r7,MSR_DR + xori r0,r0,MSR_DR + sync + mtmsrd r0 + sync + isync + mfspr r6,SPRN_HID4 + rldicl r5,r6,32,0 + ori r5,r5,0x100 + rldicl r5,r5,32,0 + sync + mtspr SPRN_HID4,r5 + isync + slbia + isync + lbz r3,0(r3) + sync + mtspr SPRN_HID4,r6 + isync + slbia + isync + mtmsrd r7 + sync + isync + blr + + /* + * Do an IO access in real mode + */ +_GLOBAL(real_writeb) + mfmsr r7 + ori r0,r7,MSR_DR + xori r0,r0,MSR_DR + sync + mtmsrd r0 + sync + isync + mfspr r6,SPRN_HID4 + rldicl r5,r6,32,0 + ori r5,r5,0x100 + rldicl r5,r5,32,0 + sync + mtspr SPRN_HID4,r5 + isync + slbia + isync + stb r3,0(r4) + sync + mtspr SPRN_HID4,r6 + isync + slbia + isync + mtmsrd r7 + sync + isync + blr +#endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) */ + +#ifdef CONFIG_PPC_PASEMI + +/* No support in all binutils for these yet, so use defines */ +#define LBZCIX(RT,RA,RB) .long (0x7c0006aa|(RT<<21)|(RA<<16)|(RB << 11)) +#define STBCIX(RS,RA,RB) .long (0x7c0007aa|(RS<<21)|(RA<<16)|(RB << 11)) + + +_GLOBAL(real_205_readb) + mfmsr r7 + ori r0,r7,MSR_DR + xori r0,r0,MSR_DR + sync + mtmsrd r0 + sync + isync + LBZCIX(r3,0,r3) + isync + mtmsrd r7 + sync + isync + blr + +_GLOBAL(real_205_writeb) + mfmsr r7 + ori r0,r7,MSR_DR + xori r0,r0,MSR_DR + sync + mtmsrd r0 + sync + isync + STBCIX(r3,0,r4) + isync + mtmsrd r7 + sync + isync + blr + +#endif /* CONFIG_PPC_PASEMI */ + + +#if defined(CONFIG_CPU_FREQ_PMAC64) || defined(CONFIG_CPU_FREQ_MAPLE) +/* + * SCOM access functions for 970 (FX only for now) + * + * unsigned long scom970_read(unsigned int address); + * void scom970_write(unsigned int address, unsigned long value); + * + * The address passed in is the 24 bits register address. This code + * is 970 specific and will not check the status bits, so you should + * know what you are doing. + */ +_GLOBAL(scom970_read) + /* interrupts off */ + mfmsr r4 + ori r0,r4,MSR_EE + xori r0,r0,MSR_EE + mtmsrd r0,1 + + /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits + * (including parity). On current CPUs they must be 0'd, + * and finally or in RW bit + */ + rlwinm r3,r3,8,0,15 + ori r3,r3,0x8000 + + /* do the actual scom read */ + sync + mtspr SPRN_SCOMC,r3 + isync + mfspr r3,SPRN_SCOMD + isync + mfspr r0,SPRN_SCOMC + isync + + /* XXX: fixup result on some buggy 970's (ouch ! we lost a bit, bah + * that's the best we can do). Not implemented yet as we don't use + * the scom on any of the bogus CPUs yet, but may have to be done + * ultimately + */ + + /* restore interrupts */ + mtmsrd r4,1 + blr + + +_GLOBAL(scom970_write) + /* interrupts off */ + mfmsr r5 + ori r0,r5,MSR_EE + xori r0,r0,MSR_EE + mtmsrd r0,1 + + /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits + * (including parity). On current CPUs they must be 0'd. + */ + + rlwinm r3,r3,8,0,15 + + sync + mtspr SPRN_SCOMD,r4 /* write data */ + isync + mtspr SPRN_SCOMC,r3 /* write command */ + isync + mfspr 3,SPRN_SCOMC + isync + + /* restore interrupts */ + mtmsrd r5,1 + blr +#endif /* CONFIG_CPU_FREQ_PMAC64 || CONFIG_CPU_FREQ_MAPLE */ + + +/* + * Create a kernel thread + * kernel_thread(fn, arg, flags) + */ +_GLOBAL(kernel_thread) + std r29,-24(r1) + std r30,-16(r1) + stdu r1,-STACK_FRAME_OVERHEAD(r1) + mr r29,r3 + mr r30,r4 + ori r3,r5,CLONE_VM /* flags */ + oris r3,r3,(CLONE_UNTRACED>>16) + li r4,0 /* new sp (unused) */ + li r0,__NR_clone + sc + bns+ 1f /* did system call indicate error? */ + neg r3,r3 /* if so, make return code negative */ +1: cmpdi 0,r3,0 /* parent or child? */ + bne 2f /* return if parent */ + li r0,0 + stdu r0,-STACK_FRAME_OVERHEAD(r1) + ld r2,8(r29) + ld r29,0(r29) + mtlr r29 /* fn addr in lr */ + mr r3,r30 /* load arg and call fn */ + blrl + li r0,__NR_exit /* exit after child exits */ + li r3,0 + sc +2: addi r1,r1,STACK_FRAME_OVERHEAD + ld r29,-24(r1) + ld r30,-16(r1) + blr + +/* + * disable_kernel_fp() + * Disable the FPU. + */ +_GLOBAL(disable_kernel_fp) + mfmsr r3 + rldicl r0,r3,(63-MSR_FP_LG),1 + rldicl r3,r0,(MSR_FP_LG+1),0 + mtmsrd r3 /* disable use of fpu now */ + isync + blr + +/* kexec_wait(phys_cpu) + * + * wait for the flag to change, indicating this kernel is going away but + * the slave code for the next one is at addresses 0 to 100. + * + * This is used by all slaves, even those that did not find a matching + * paca in the secondary startup code. + * + * Physical (hardware) cpu id should be in r3. + */ +_GLOBAL(kexec_wait) + bl 1f +1: mflr r5 + addi r5,r5,kexec_flag-1b + +99: HMT_LOW +#ifdef CONFIG_KEXEC /* use no memory without kexec */ + lwz r4,0(r5) + cmpwi 0,r4,0 + bnea 0x60 +#endif + b 99b + +/* this can be in text because we won't change it until we are + * running in real anyways + */ +kexec_flag: + .long 0 + + +#ifdef CONFIG_KEXEC + +/* kexec_smp_wait(void) + * + * call with interrupts off + * note: this is a terminal routine, it does not save lr + * + * get phys id from paca + * switch to real mode + * mark the paca as no longer used + * join other cpus in kexec_wait(phys_id) + */ +_GLOBAL(kexec_smp_wait) + lhz r3,PACAHWCPUID(r13) + bl real_mode + + li r4,KEXEC_STATE_REAL_MODE + stb r4,PACAKEXECSTATE(r13) + SYNC + + b .kexec_wait + +/* + * switch to real mode (turn mmu off) + * we use the early kernel trick that the hardware ignores bits + * 0 and 1 (big endian) of the effective address in real mode + * + * don't overwrite r3 here, it is live for kexec_wait above. + */ +real_mode: /* assume normal blr return */ +1: li r9,MSR_RI + li r10,MSR_DR|MSR_IR + mflr r11 /* return address to SRR0 */ + mfmsr r12 + andc r9,r12,r9 + andc r10,r12,r10 + + mtmsrd r9,1 + mtspr SPRN_SRR1,r10 + mtspr SPRN_SRR0,r11 + rfid + + +/* + * kexec_sequence(newstack, start, image, control, clear_all()) + * + * does the grungy work with stack switching and real mode switches + * also does simple calls to other code + */ + +_GLOBAL(kexec_sequence) + mflr r0 + std r0,16(r1) + + /* switch stacks to newstack -- &kexec_stack.stack */ + stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) + mr r1,r3 + + li r0,0 + std r0,16(r1) + + /* save regs for local vars on new stack. + * yes, we won't go back, but ... + */ + std r31,-8(r1) + std r30,-16(r1) + std r29,-24(r1) + std r28,-32(r1) + std r27,-40(r1) + std r26,-48(r1) + std r25,-56(r1) + + stdu r1,-STACK_FRAME_OVERHEAD-64(r1) + + /* save args into preserved regs */ + mr r31,r3 /* newstack (both) */ + mr r30,r4 /* start (real) */ + mr r29,r5 /* image (virt) */ + mr r28,r6 /* control, unused */ + mr r27,r7 /* clear_all() fn desc */ + mr r26,r8 /* spare */ + lhz r25,PACAHWCPUID(r13) /* get our phys cpu from paca */ + + /* disable interrupts, we are overwriting kernel data next */ + mfmsr r3 + rlwinm r3,r3,0,17,15 + mtmsrd r3,1 + + /* copy dest pages, flush whole dest image */ + mr r3,r29 + bl .kexec_copy_flush /* (image) */ + + /* turn off mmu */ + bl real_mode + + /* copy 0x100 bytes starting at start to 0 */ + li r3,0 + mr r4,r30 /* start, aka phys mem offset */ + li r5,0x100 + li r6,0 + bl .copy_and_flush /* (dest, src, copy limit, start offset) */ +1: /* assume normal blr return */ + + /* release other cpus to the new kernel secondary start at 0x60 */ + mflr r5 + li r6,1 + stw r6,kexec_flag-1b(5) + + /* clear out hardware hash page table and tlb */ + ld r5,0(r27) /* deref function descriptor */ + mtctr r5 + bctrl /* ppc_md.hpte_clear_all(void); */ + +/* + * kexec image calling is: + * the first 0x100 bytes of the entry point are copied to 0 + * + * all slaves branch to slave = 0x60 (absolute) + * slave(phys_cpu_id); + * + * master goes to start = entry point + * start(phys_cpu_id, start, 0); + * + * + * a wrapper is needed to call existing kernels, here is an approximate + * description of one method: + * + * v2: (2.6.10) + * start will be near the boot_block (maybe 0x100 bytes before it?) + * it will have a 0x60, which will b to boot_block, where it will wait + * and 0 will store phys into struct boot-block and load r3 from there, + * copy kernel 0-0x100 and tell slaves to back down to 0x60 again + * + * v1: (2.6.9) + * boot block will have all cpus scanning device tree to see if they + * are the boot cpu ????? + * other device tree differences (prop sizes, va vs pa, etc)... + */ + mr r3,r25 # my phys cpu + mr r4,r30 # start, aka phys mem offset + mtlr 4 + li r5,0 + blr /* image->start(physid, image->start, 0); */ +#endif /* CONFIG_KEXEC */ diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c new file mode 100644 index 00000000..2d275707 --- /dev/null +++ b/arch/powerpc/kernel/module.c @@ -0,0 +1,80 @@ +/* Kernel module help for powerpc. + Copyright (C) 2001, 2003 Rusty Russell IBM Corporation. + Copyright (C) 2008 Freescale Semiconductor, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#include <linux/elf.h> +#include <linux/moduleloader.h> +#include <linux/err.h> +#include <linux/vmalloc.h> +#include <linux/bug.h> +#include <asm/module.h> +#include <asm/uaccess.h> +#include <asm/firmware.h> +#include <linux/sort.h> + +#include "setup.h" + +LIST_HEAD(module_bug_list); + +static const Elf_Shdr *find_section(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + const char *name) +{ + char *secstrings; + unsigned int i; + + secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + for (i = 1; i < hdr->e_shnum; i++) + if (strcmp(secstrings+sechdrs[i].sh_name, name) == 0) + return &sechdrs[i]; + return NULL; +} + +int module_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, struct module *me) +{ + const Elf_Shdr *sect; + + /* Apply feature fixups */ + sect = find_section(hdr, sechdrs, "__ftr_fixup"); + if (sect != NULL) + do_feature_fixups(cur_cpu_spec->cpu_features, + (void *)sect->sh_addr, + (void *)sect->sh_addr + sect->sh_size); + + sect = find_section(hdr, sechdrs, "__mmu_ftr_fixup"); + if (sect != NULL) + do_feature_fixups(cur_cpu_spec->mmu_features, + (void *)sect->sh_addr, + (void *)sect->sh_addr + sect->sh_size); + +#ifdef CONFIG_PPC64 + sect = find_section(hdr, sechdrs, "__fw_ftr_fixup"); + if (sect != NULL) + do_feature_fixups(powerpc_firmware_features, + (void *)sect->sh_addr, + (void *)sect->sh_addr + sect->sh_size); +#endif + + sect = find_section(hdr, sechdrs, "__lwsync_fixup"); + if (sect != NULL) + do_lwsync_fixups(cur_cpu_spec->cpu_features, + (void *)sect->sh_addr, + (void *)sect->sh_addr + sect->sh_size); + + return 0; +} diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c new file mode 100644 index 00000000..2e3200ca --- /dev/null +++ b/arch/powerpc/kernel/module_32.c @@ -0,0 +1,308 @@ +/* Kernel module help for PPC. + Copyright (C) 2001 Rusty Russell. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#include <linux/module.h> +#include <linux/moduleloader.h> +#include <linux/elf.h> +#include <linux/vmalloc.h> +#include <linux/fs.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/ftrace.h> +#include <linux/cache.h> +#include <linux/bug.h> +#include <linux/sort.h> + +#include "setup.h" + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(fmt , ...) +#endif + +/* Count how many different relocations (different symbol, different + addend) */ +static unsigned int count_relocs(const Elf32_Rela *rela, unsigned int num) +{ + unsigned int i, r_info, r_addend, _count_relocs; + + _count_relocs = 0; + r_info = 0; + r_addend = 0; + for (i = 0; i < num; i++) + /* Only count 24-bit relocs, others don't need stubs */ + if (ELF32_R_TYPE(rela[i].r_info) == R_PPC_REL24 && + (r_info != ELF32_R_SYM(rela[i].r_info) || + r_addend != rela[i].r_addend)) { + _count_relocs++; + r_info = ELF32_R_SYM(rela[i].r_info); + r_addend = rela[i].r_addend; + } + +#ifdef CONFIG_DYNAMIC_FTRACE + _count_relocs++; /* add one for ftrace_caller */ +#endif + return _count_relocs; +} + +static int relacmp(const void *_x, const void *_y) +{ + const Elf32_Rela *x, *y; + + y = (Elf32_Rela *)_x; + x = (Elf32_Rela *)_y; + + /* Compare the entire r_info (as opposed to ELF32_R_SYM(r_info) only) to + * make the comparison cheaper/faster. It won't affect the sorting or + * the counting algorithms' performance + */ + if (x->r_info < y->r_info) + return -1; + else if (x->r_info > y->r_info) + return 1; + else if (x->r_addend < y->r_addend) + return -1; + else if (x->r_addend > y->r_addend) + return 1; + else + return 0; +} + +static void relaswap(void *_x, void *_y, int size) +{ + uint32_t *x, *y, tmp; + int i; + + y = (uint32_t *)_x; + x = (uint32_t *)_y; + + for (i = 0; i < sizeof(Elf32_Rela) / sizeof(uint32_t); i++) { + tmp = x[i]; + x[i] = y[i]; + y[i] = tmp; + } +} + +/* Get the potential trampolines size required of the init and + non-init sections */ +static unsigned long get_plt_size(const Elf32_Ehdr *hdr, + const Elf32_Shdr *sechdrs, + const char *secstrings, + int is_init) +{ + unsigned long ret = 0; + unsigned i; + + /* Everything marked ALLOC (this includes the exported + symbols) */ + for (i = 1; i < hdr->e_shnum; i++) { + /* If it's called *.init*, and we're not init, we're + not interested */ + if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != 0) + != is_init) + continue; + + /* We don't want to look at debug sections. */ + if (strstr(secstrings + sechdrs[i].sh_name, ".debug") != 0) + continue; + + if (sechdrs[i].sh_type == SHT_RELA) { + DEBUGP("Found relocations in section %u\n", i); + DEBUGP("Ptr: %p. Number: %u\n", + (void *)hdr + sechdrs[i].sh_offset, + sechdrs[i].sh_size / sizeof(Elf32_Rela)); + + /* Sort the relocation information based on a symbol and + * addend key. This is a stable O(n*log n) complexity + * alogrithm but it will reduce the complexity of + * count_relocs() to linear complexity O(n) + */ + sort((void *)hdr + sechdrs[i].sh_offset, + sechdrs[i].sh_size / sizeof(Elf32_Rela), + sizeof(Elf32_Rela), relacmp, relaswap); + + ret += count_relocs((void *)hdr + + sechdrs[i].sh_offset, + sechdrs[i].sh_size + / sizeof(Elf32_Rela)) + * sizeof(struct ppc_plt_entry); + } + } + + return ret; +} + +int module_frob_arch_sections(Elf32_Ehdr *hdr, + Elf32_Shdr *sechdrs, + char *secstrings, + struct module *me) +{ + unsigned int i; + + /* Find .plt and .init.plt sections */ + for (i = 0; i < hdr->e_shnum; i++) { + if (strcmp(secstrings + sechdrs[i].sh_name, ".init.plt") == 0) + me->arch.init_plt_section = i; + else if (strcmp(secstrings + sechdrs[i].sh_name, ".plt") == 0) + me->arch.core_plt_section = i; + } + if (!me->arch.core_plt_section || !me->arch.init_plt_section) { + printk("Module doesn't contain .plt or .init.plt sections.\n"); + return -ENOEXEC; + } + + /* Override their sizes */ + sechdrs[me->arch.core_plt_section].sh_size + = get_plt_size(hdr, sechdrs, secstrings, 0); + sechdrs[me->arch.init_plt_section].sh_size + = get_plt_size(hdr, sechdrs, secstrings, 1); + return 0; +} + +static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val) +{ + if (entry->jump[0] == 0x3d800000 + ((val + 0x8000) >> 16) + && entry->jump[1] == 0x398c0000 + (val & 0xffff)) + return 1; + return 0; +} + +/* Set up a trampoline in the PLT to bounce us to the distant function */ +static uint32_t do_plt_call(void *location, + Elf32_Addr val, + Elf32_Shdr *sechdrs, + struct module *mod) +{ + struct ppc_plt_entry *entry; + + DEBUGP("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location); + /* Init, or core PLT? */ + if (location >= mod->module_core + && location < mod->module_core + mod->core_size) + entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr; + else + entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr; + + /* Find this entry, or if that fails, the next avail. entry */ + while (entry->jump[0]) { + if (entry_matches(entry, val)) return (uint32_t)entry; + entry++; + } + + entry->jump[0] = 0x3d800000+((val+0x8000)>>16); /* lis r12,sym@ha */ + entry->jump[1] = 0x398c0000 + (val&0xffff); /* addi r12,r12,sym@l*/ + entry->jump[2] = 0x7d8903a6; /* mtctr r12 */ + entry->jump[3] = 0x4e800420; /* bctr */ + + DEBUGP("Initialized plt for 0x%x at %p\n", val, entry); + return (uint32_t)entry; +} + +int apply_relocate_add(Elf32_Shdr *sechdrs, + const char *strtab, + unsigned int symindex, + unsigned int relsec, + struct module *module) +{ + unsigned int i; + Elf32_Rela *rela = (void *)sechdrs[relsec].sh_addr; + Elf32_Sym *sym; + uint32_t *location; + uint32_t value; + + DEBUGP("Applying ADD relocate section %u to %u\n", relsec, + sechdrs[relsec].sh_info); + for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) { + /* This is where to make the change */ + location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + + rela[i].r_offset; + /* This is the symbol it is referring to. Note that all + undefined symbols have been resolved. */ + sym = (Elf32_Sym *)sechdrs[symindex].sh_addr + + ELF32_R_SYM(rela[i].r_info); + /* `Everything is relative'. */ + value = sym->st_value + rela[i].r_addend; + + switch (ELF32_R_TYPE(rela[i].r_info)) { + case R_PPC_ADDR32: + /* Simply set it */ + *(uint32_t *)location = value; + break; + + case R_PPC_ADDR16_LO: + /* Low half of the symbol */ + *(uint16_t *)location = value; + break; + + case R_PPC_ADDR16_HI: + /* Higher half of the symbol */ + *(uint16_t *)location = (value >> 16); + break; + + case R_PPC_ADDR16_HA: + /* Sign-adjusted lower 16 bits: PPC ELF ABI says: + (((x >> 16) + ((x & 0x8000) ? 1 : 0))) & 0xFFFF. + This is the same, only sane. + */ + *(uint16_t *)location = (value + 0x8000) >> 16; + break; + + case R_PPC_REL24: + if ((int)(value - (uint32_t)location) < -0x02000000 + || (int)(value - (uint32_t)location) >= 0x02000000) + value = do_plt_call(location, value, + sechdrs, module); + + /* Only replace bits 2 through 26 */ + DEBUGP("REL24 value = %08X. location = %08X\n", + value, (uint32_t)location); + DEBUGP("Location before: %08X.\n", + *(uint32_t *)location); + *(uint32_t *)location + = (*(uint32_t *)location & ~0x03fffffc) + | ((value - (uint32_t)location) + & 0x03fffffc); + DEBUGP("Location after: %08X.\n", + *(uint32_t *)location); + DEBUGP("ie. jump to %08X+%08X = %08X\n", + *(uint32_t *)location & 0x03fffffc, + (uint32_t)location, + (*(uint32_t *)location & 0x03fffffc) + + (uint32_t)location); + break; + + case R_PPC_REL32: + /* 32-bit relative jump. */ + *(uint32_t *)location = value - (uint32_t)location; + break; + + default: + printk("%s: unknown ADD relocation: %u\n", + module->name, + ELF32_R_TYPE(rela[i].r_info)); + return -ENOEXEC; + } + } +#ifdef CONFIG_DYNAMIC_FTRACE + module->arch.tramp = + do_plt_call(module->module_core, + (unsigned long)ftrace_caller, + sechdrs, module); +#endif + return 0; +} diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c new file mode 100644 index 00000000..9f44a775 --- /dev/null +++ b/arch/powerpc/kernel/module_64.c @@ -0,0 +1,448 @@ +/* Kernel module help for PPC64. + Copyright (C) 2001, 2003 Rusty Russell IBM Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#include <linux/module.h> +#include <linux/elf.h> +#include <linux/moduleloader.h> +#include <linux/err.h> +#include <linux/vmalloc.h> +#include <linux/ftrace.h> +#include <linux/bug.h> +#include <asm/module.h> +#include <asm/firmware.h> +#include <asm/code-patching.h> +#include <linux/sort.h> + +#include "setup.h" + +/* FIXME: We don't do .init separately. To do this, we'd need to have + a separate r2 value in the init and core section, and stub between + them, too. + + Using a magic allocator which places modules within 32MB solves + this, and makes other things simpler. Anton? + --RR. */ +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(fmt , ...) +#endif + +/* Like PPC32, we need little trampolines to do > 24-bit jumps (into + the kernel itself). But on PPC64, these need to be used for every + jump, actually, to reset r2 (TOC+0x8000). */ +struct ppc64_stub_entry +{ + /* 28 byte jump instruction sequence (7 instructions) */ + unsigned char jump[28]; + unsigned char unused[4]; + /* Data for the above code */ + struct ppc64_opd_entry opd; +}; + +/* We use a stub to fix up r2 (TOC ptr) and to jump to the (external) + function which may be more than 24-bits away. We could simply + patch the new r2 value and function pointer into the stub, but it's + significantly shorter to put these values at the end of the stub + code, and patch the stub address (32-bits relative to the TOC ptr, + r2) into the stub. */ +static struct ppc64_stub_entry ppc64_stub = +{ .jump = { + 0x3d, 0x82, 0x00, 0x00, /* addis r12,r2, <high> */ + 0x39, 0x8c, 0x00, 0x00, /* addi r12,r12, <low> */ + /* Save current r2 value in magic place on the stack. */ + 0xf8, 0x41, 0x00, 0x28, /* std r2,40(r1) */ + 0xe9, 0x6c, 0x00, 0x20, /* ld r11,32(r12) */ + 0xe8, 0x4c, 0x00, 0x28, /* ld r2,40(r12) */ + 0x7d, 0x69, 0x03, 0xa6, /* mtctr r11 */ + 0x4e, 0x80, 0x04, 0x20 /* bctr */ +} }; + +/* Count how many different 24-bit relocations (different symbol, + different addend) */ +static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num) +{ + unsigned int i, r_info, r_addend, _count_relocs; + + /* FIXME: Only count external ones --RR */ + _count_relocs = 0; + r_info = 0; + r_addend = 0; + for (i = 0; i < num; i++) + /* Only count 24-bit relocs, others don't need stubs */ + if (ELF64_R_TYPE(rela[i].r_info) == R_PPC_REL24 && + (r_info != ELF64_R_SYM(rela[i].r_info) || + r_addend != rela[i].r_addend)) { + _count_relocs++; + r_info = ELF64_R_SYM(rela[i].r_info); + r_addend = rela[i].r_addend; + } + + return _count_relocs; +} + +static int relacmp(const void *_x, const void *_y) +{ + const Elf64_Rela *x, *y; + + y = (Elf64_Rela *)_x; + x = (Elf64_Rela *)_y; + + /* Compare the entire r_info (as opposed to ELF64_R_SYM(r_info) only) to + * make the comparison cheaper/faster. It won't affect the sorting or + * the counting algorithms' performance + */ + if (x->r_info < y->r_info) + return -1; + else if (x->r_info > y->r_info) + return 1; + else if (x->r_addend < y->r_addend) + return -1; + else if (x->r_addend > y->r_addend) + return 1; + else + return 0; +} + +static void relaswap(void *_x, void *_y, int size) +{ + uint64_t *x, *y, tmp; + int i; + + y = (uint64_t *)_x; + x = (uint64_t *)_y; + + for (i = 0; i < sizeof(Elf64_Rela) / sizeof(uint64_t); i++) { + tmp = x[i]; + x[i] = y[i]; + y[i] = tmp; + } +} + +/* Get size of potential trampolines required. */ +static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, + const Elf64_Shdr *sechdrs) +{ + /* One extra reloc so it's always 0-funcaddr terminated */ + unsigned long relocs = 1; + unsigned i; + + /* Every relocated section... */ + for (i = 1; i < hdr->e_shnum; i++) { + if (sechdrs[i].sh_type == SHT_RELA) { + DEBUGP("Found relocations in section %u\n", i); + DEBUGP("Ptr: %p. Number: %lu\n", + (void *)sechdrs[i].sh_addr, + sechdrs[i].sh_size / sizeof(Elf64_Rela)); + + /* Sort the relocation information based on a symbol and + * addend key. This is a stable O(n*log n) complexity + * alogrithm but it will reduce the complexity of + * count_relocs() to linear complexity O(n) + */ + sort((void *)sechdrs[i].sh_addr, + sechdrs[i].sh_size / sizeof(Elf64_Rela), + sizeof(Elf64_Rela), relacmp, relaswap); + + relocs += count_relocs((void *)sechdrs[i].sh_addr, + sechdrs[i].sh_size + / sizeof(Elf64_Rela)); + } + } + +#ifdef CONFIG_DYNAMIC_FTRACE + /* make the trampoline to the ftrace_caller */ + relocs++; +#endif + + DEBUGP("Looks like a total of %lu stubs, max\n", relocs); + return relocs * sizeof(struct ppc64_stub_entry); +} + +static void dedotify_versions(struct modversion_info *vers, + unsigned long size) +{ + struct modversion_info *end; + + for (end = (void *)vers + size; vers < end; vers++) + if (vers->name[0] == '.') + memmove(vers->name, vers->name+1, strlen(vers->name)); +} + +/* Undefined symbols which refer to .funcname, hack to funcname */ +static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab) +{ + unsigned int i; + + for (i = 1; i < numsyms; i++) { + if (syms[i].st_shndx == SHN_UNDEF) { + char *name = strtab + syms[i].st_name; + if (name[0] == '.') + memmove(name, name+1, strlen(name)); + } + } +} + +int module_frob_arch_sections(Elf64_Ehdr *hdr, + Elf64_Shdr *sechdrs, + char *secstrings, + struct module *me) +{ + unsigned int i; + + /* Find .toc and .stubs sections, symtab and strtab */ + for (i = 1; i < hdr->e_shnum; i++) { + char *p; + if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0) + me->arch.stubs_section = i; + else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0) + me->arch.toc_section = i; + else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0) + dedotify_versions((void *)hdr + sechdrs[i].sh_offset, + sechdrs[i].sh_size); + + /* We don't handle .init for the moment: rename to _init */ + while ((p = strstr(secstrings + sechdrs[i].sh_name, ".init"))) + p[0] = '_'; + + if (sechdrs[i].sh_type == SHT_SYMTAB) + dedotify((void *)hdr + sechdrs[i].sh_offset, + sechdrs[i].sh_size / sizeof(Elf64_Sym), + (void *)hdr + + sechdrs[sechdrs[i].sh_link].sh_offset); + } + + if (!me->arch.stubs_section) { + printk("%s: doesn't contain .stubs.\n", me->name); + return -ENOEXEC; + } + + /* If we don't have a .toc, just use .stubs. We need to set r2 + to some reasonable value in case the module calls out to + other functions via a stub, or if a function pointer escapes + the module by some means. */ + if (!me->arch.toc_section) + me->arch.toc_section = me->arch.stubs_section; + + /* Override the stubs size */ + sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs); + return 0; +} + +/* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this + gives the value maximum span in an instruction which uses a signed + offset) */ +static inline unsigned long my_r2(Elf64_Shdr *sechdrs, struct module *me) +{ + return sechdrs[me->arch.toc_section].sh_addr + 0x8000; +} + +/* Both low and high 16 bits are added as SIGNED additions, so if low + 16 bits has high bit set, high 16 bits must be adjusted. These + macros do that (stolen from binutils). */ +#define PPC_LO(v) ((v) & 0xffff) +#define PPC_HI(v) (((v) >> 16) & 0xffff) +#define PPC_HA(v) PPC_HI ((v) + 0x8000) + +/* Patch stub to reference function and correct r2 value. */ +static inline int create_stub(Elf64_Shdr *sechdrs, + struct ppc64_stub_entry *entry, + struct ppc64_opd_entry *opd, + struct module *me) +{ + Elf64_Half *loc1, *loc2; + long reladdr; + + *entry = ppc64_stub; + + loc1 = (Elf64_Half *)&entry->jump[2]; + loc2 = (Elf64_Half *)&entry->jump[6]; + + /* Stub uses address relative to r2. */ + reladdr = (unsigned long)entry - my_r2(sechdrs, me); + if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { + printk("%s: Address %p of stub out of range of %p.\n", + me->name, (void *)reladdr, (void *)my_r2); + return 0; + } + DEBUGP("Stub %p get data from reladdr %li\n", entry, reladdr); + + *loc1 = PPC_HA(reladdr); + *loc2 = PPC_LO(reladdr); + entry->opd.funcaddr = opd->funcaddr; + entry->opd.r2 = opd->r2; + return 1; +} + +/* Create stub to jump to function described in this OPD: we need the + stub to set up the TOC ptr (r2) for the function. */ +static unsigned long stub_for_addr(Elf64_Shdr *sechdrs, + unsigned long opdaddr, + struct module *me) +{ + struct ppc64_stub_entry *stubs; + struct ppc64_opd_entry *opd = (void *)opdaddr; + unsigned int i, num_stubs; + + num_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*stubs); + + /* Find this stub, or if that fails, the next avail. entry */ + stubs = (void *)sechdrs[me->arch.stubs_section].sh_addr; + for (i = 0; stubs[i].opd.funcaddr; i++) { + BUG_ON(i >= num_stubs); + + if (stubs[i].opd.funcaddr == opd->funcaddr) + return (unsigned long)&stubs[i]; + } + + if (!create_stub(sechdrs, &stubs[i], opd, me)) + return 0; + + return (unsigned long)&stubs[i]; +} + +/* We expect a noop next: if it is, replace it with instruction to + restore r2. */ +static int restore_r2(u32 *instruction, struct module *me) +{ + if (*instruction != PPC_INST_NOP) { + printk("%s: Expect noop after relocate, got %08x\n", + me->name, *instruction); + return 0; + } + *instruction = 0xe8410028; /* ld r2,40(r1) */ + return 1; +} + +int apply_relocate_add(Elf64_Shdr *sechdrs, + const char *strtab, + unsigned int symindex, + unsigned int relsec, + struct module *me) +{ + unsigned int i; + Elf64_Rela *rela = (void *)sechdrs[relsec].sh_addr; + Elf64_Sym *sym; + unsigned long *location; + unsigned long value; + + DEBUGP("Applying ADD relocate section %u to %u\n", relsec, + sechdrs[relsec].sh_info); + for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) { + /* This is where to make the change */ + location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + + rela[i].r_offset; + /* This is the symbol it is referring to */ + sym = (Elf64_Sym *)sechdrs[symindex].sh_addr + + ELF64_R_SYM(rela[i].r_info); + + DEBUGP("RELOC at %p: %li-type as %s (%lu) + %li\n", + location, (long)ELF64_R_TYPE(rela[i].r_info), + strtab + sym->st_name, (unsigned long)sym->st_value, + (long)rela[i].r_addend); + + /* `Everything is relative'. */ + value = sym->st_value + rela[i].r_addend; + + switch (ELF64_R_TYPE(rela[i].r_info)) { + case R_PPC64_ADDR32: + /* Simply set it */ + *(u32 *)location = value; + break; + + case R_PPC64_ADDR64: + /* Simply set it */ + *(unsigned long *)location = value; + break; + + case R_PPC64_TOC: + *(unsigned long *)location = my_r2(sechdrs, me); + break; + + case R_PPC64_TOC16: + /* Subtract TOC pointer */ + value -= my_r2(sechdrs, me); + if (value + 0x8000 > 0xffff) { + printk("%s: bad TOC16 relocation (%lu)\n", + me->name, value); + return -ENOEXEC; + } + *((uint16_t *) location) + = (*((uint16_t *) location) & ~0xffff) + | (value & 0xffff); + break; + + case R_PPC64_TOC16_DS: + /* Subtract TOC pointer */ + value -= my_r2(sechdrs, me); + if ((value & 3) != 0 || value + 0x8000 > 0xffff) { + printk("%s: bad TOC16_DS relocation (%lu)\n", + me->name, value); + return -ENOEXEC; + } + *((uint16_t *) location) + = (*((uint16_t *) location) & ~0xfffc) + | (value & 0xfffc); + break; + + case R_PPC_REL24: + /* FIXME: Handle weak symbols here --RR */ + if (sym->st_shndx == SHN_UNDEF) { + /* External: go via stub */ + value = stub_for_addr(sechdrs, value, me); + if (!value) + return -ENOENT; + if (!restore_r2((u32 *)location + 1, me)) + return -ENOEXEC; + } + + /* Convert value to relative */ + value -= (unsigned long)location; + if (value + 0x2000000 > 0x3ffffff || (value & 3) != 0){ + printk("%s: REL24 %li out of range!\n", + me->name, (long int)value); + return -ENOEXEC; + } + + /* Only replace bits 2 through 26 */ + *(uint32_t *)location + = (*(uint32_t *)location & ~0x03fffffc) + | (value & 0x03fffffc); + break; + + case R_PPC64_REL64: + /* 64 bits relative (used by features fixups) */ + *location = value - (unsigned long)location; + break; + + default: + printk("%s: Unknown ADD relocation: %lu\n", + me->name, + (unsigned long)ELF64_R_TYPE(rela[i].r_info)); + return -ENOEXEC; + } + } + +#ifdef CONFIG_DYNAMIC_FTRACE + me->arch.toc = my_r2(sechdrs, me); + me->arch.tramp = stub_for_addr(sechdrs, + (unsigned long)ftrace_caller, + me); +#endif + + return 0; +} diff --git a/arch/powerpc/kernel/msi.c b/arch/powerpc/kernel/msi.c new file mode 100644 index 00000000..8bbc12d2 --- /dev/null +++ b/arch/powerpc/kernel/msi.c @@ -0,0 +1,43 @@ +/* + * Copyright 2006-2007, Michael Ellerman, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/msi.h> +#include <linux/pci.h> + +#include <asm/machdep.h> + +int arch_msi_check_device(struct pci_dev* dev, int nvec, int type) +{ + if (!ppc_md.setup_msi_irqs || !ppc_md.teardown_msi_irqs) { + pr_debug("msi: Platform doesn't provide MSI callbacks.\n"); + return -ENOSYS; + } + + /* PowerPC doesn't support multiple MSI yet */ + if (type == PCI_CAP_ID_MSI && nvec > 1) + return 1; + + if (ppc_md.msi_check_device) { + pr_debug("msi: Using platform check routine.\n"); + return ppc_md.msi_check_device(dev, nvec, type); + } + + return 0; +} + +int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + return ppc_md.setup_msi_irqs(dev, nvec, type); +} + +void arch_teardown_msi_irqs(struct pci_dev *dev) +{ + ppc_md.teardown_msi_irqs(dev); +} diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c new file mode 100644 index 00000000..bec1e930 --- /dev/null +++ b/arch/powerpc/kernel/nvram_64.c @@ -0,0 +1,564 @@ +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * /dev/nvram driver for PPC64 + * + * This perhaps should live in drivers/char + * + * TODO: Split the /dev/nvram part (that one can use + * drivers/char/generic_nvram.c) from the arch & partition + * parsing code. + */ + +#include <linux/module.h> + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/miscdevice.h> +#include <linux/fcntl.h> +#include <linux/nvram.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <asm/uaccess.h> +#include <asm/nvram.h> +#include <asm/rtas.h> +#include <asm/prom.h> +#include <asm/machdep.h> + +#undef DEBUG_NVRAM + +#define NVRAM_HEADER_LEN sizeof(struct nvram_header) +#define NVRAM_BLOCK_LEN NVRAM_HEADER_LEN + +/* If change this size, then change the size of NVNAME_LEN */ +struct nvram_header { + unsigned char signature; + unsigned char checksum; + unsigned short length; + /* Terminating null required only for names < 12 chars. */ + char name[12]; +}; + +struct nvram_partition { + struct list_head partition; + struct nvram_header header; + unsigned int index; +}; + +static LIST_HEAD(nvram_partitions); + +static loff_t dev_nvram_llseek(struct file *file, loff_t offset, int origin) +{ + int size; + + if (ppc_md.nvram_size == NULL) + return -ENODEV; + size = ppc_md.nvram_size(); + + switch (origin) { + case 1: + offset += file->f_pos; + break; + case 2: + offset += size; + break; + } + if (offset < 0) + return -EINVAL; + file->f_pos = offset; + return file->f_pos; +} + + +static ssize_t dev_nvram_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + ssize_t ret; + char *tmp = NULL; + ssize_t size; + + ret = -ENODEV; + if (!ppc_md.nvram_size) + goto out; + + ret = 0; + size = ppc_md.nvram_size(); + if (*ppos >= size || size < 0) + goto out; + + count = min_t(size_t, count, size - *ppos); + count = min(count, PAGE_SIZE); + + ret = -ENOMEM; + tmp = kmalloc(count, GFP_KERNEL); + if (!tmp) + goto out; + + ret = ppc_md.nvram_read(tmp, count, ppos); + if (ret <= 0) + goto out; + + if (copy_to_user(buf, tmp, ret)) + ret = -EFAULT; + +out: + kfree(tmp); + return ret; + +} + +static ssize_t dev_nvram_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + ssize_t ret; + char *tmp = NULL; + ssize_t size; + + ret = -ENODEV; + if (!ppc_md.nvram_size) + goto out; + + ret = 0; + size = ppc_md.nvram_size(); + if (*ppos >= size || size < 0) + goto out; + + count = min_t(size_t, count, size - *ppos); + count = min(count, PAGE_SIZE); + + ret = -ENOMEM; + tmp = kmalloc(count, GFP_KERNEL); + if (!tmp) + goto out; + + ret = -EFAULT; + if (copy_from_user(tmp, buf, count)) + goto out; + + ret = ppc_md.nvram_write(tmp, count, ppos); + +out: + kfree(tmp); + return ret; + +} + +static long dev_nvram_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + switch(cmd) { +#ifdef CONFIG_PPC_PMAC + case OBSOLETE_PMAC_NVRAM_GET_OFFSET: + printk(KERN_WARNING "nvram: Using obsolete PMAC_NVRAM_GET_OFFSET ioctl\n"); + case IOC_NVRAM_GET_OFFSET: { + int part, offset; + + if (!machine_is(powermac)) + return -EINVAL; + if (copy_from_user(&part, (void __user*)arg, sizeof(part)) != 0) + return -EFAULT; + if (part < pmac_nvram_OF || part > pmac_nvram_NR) + return -EINVAL; + offset = pmac_get_partition(part); + if (offset < 0) + return offset; + if (copy_to_user((void __user*)arg, &offset, sizeof(offset)) != 0) + return -EFAULT; + return 0; + } +#endif /* CONFIG_PPC_PMAC */ + default: + return -EINVAL; + } +} + +const struct file_operations nvram_fops = { + .owner = THIS_MODULE, + .llseek = dev_nvram_llseek, + .read = dev_nvram_read, + .write = dev_nvram_write, + .unlocked_ioctl = dev_nvram_ioctl, +}; + +static struct miscdevice nvram_dev = { + NVRAM_MINOR, + "nvram", + &nvram_fops +}; + + +#ifdef DEBUG_NVRAM +static void __init nvram_print_partitions(char * label) +{ + struct nvram_partition * tmp_part; + + printk(KERN_WARNING "--------%s---------\n", label); + printk(KERN_WARNING "indx\t\tsig\tchks\tlen\tname\n"); + list_for_each_entry(tmp_part, &nvram_partitions, partition) { + printk(KERN_WARNING "%4d \t%02x\t%02x\t%d\t%12s\n", + tmp_part->index, tmp_part->header.signature, + tmp_part->header.checksum, tmp_part->header.length, + tmp_part->header.name); + } +} +#endif + + +static int __init nvram_write_header(struct nvram_partition * part) +{ + loff_t tmp_index; + int rc; + + tmp_index = part->index; + rc = ppc_md.nvram_write((char *)&part->header, NVRAM_HEADER_LEN, &tmp_index); + + return rc; +} + + +static unsigned char __init nvram_checksum(struct nvram_header *p) +{ + unsigned int c_sum, c_sum2; + unsigned short *sp = (unsigned short *)p->name; /* assume 6 shorts */ + c_sum = p->signature + p->length + sp[0] + sp[1] + sp[2] + sp[3] + sp[4] + sp[5]; + + /* The sum may have spilled into the 3rd byte. Fold it back. */ + c_sum = ((c_sum & 0xffff) + (c_sum >> 16)) & 0xffff; + /* The sum cannot exceed 2 bytes. Fold it into a checksum */ + c_sum2 = (c_sum >> 8) + (c_sum << 8); + c_sum = ((c_sum + c_sum2) >> 8) & 0xff; + return c_sum; +} + +/* + * Per the criteria passed via nvram_remove_partition(), should this + * partition be removed? 1=remove, 0=keep + */ +static int nvram_can_remove_partition(struct nvram_partition *part, + const char *name, int sig, const char *exceptions[]) +{ + if (part->header.signature != sig) + return 0; + if (name) { + if (strncmp(name, part->header.name, 12)) + return 0; + } else if (exceptions) { + const char **except; + for (except = exceptions; *except; except++) { + if (!strncmp(*except, part->header.name, 12)) + return 0; + } + } + return 1; +} + +/** + * nvram_remove_partition - Remove one or more partitions in nvram + * @name: name of the partition to remove, or NULL for a + * signature only match + * @sig: signature of the partition(s) to remove + * @exceptions: When removing all partitions with a matching signature, + * leave these alone. + */ + +int __init nvram_remove_partition(const char *name, int sig, + const char *exceptions[]) +{ + struct nvram_partition *part, *prev, *tmp; + int rc; + + list_for_each_entry(part, &nvram_partitions, partition) { + if (!nvram_can_remove_partition(part, name, sig, exceptions)) + continue; + + /* Make partition a free partition */ + part->header.signature = NVRAM_SIG_FREE; + strncpy(part->header.name, "wwwwwwwwwwww", 12); + part->header.checksum = nvram_checksum(&part->header); + rc = nvram_write_header(part); + if (rc <= 0) { + printk(KERN_ERR "nvram_remove_partition: nvram_write failed (%d)\n", rc); + return rc; + } + } + + /* Merge contiguous ones */ + prev = NULL; + list_for_each_entry_safe(part, tmp, &nvram_partitions, partition) { + if (part->header.signature != NVRAM_SIG_FREE) { + prev = NULL; + continue; + } + if (prev) { + prev->header.length += part->header.length; + prev->header.checksum = nvram_checksum(&part->header); + rc = nvram_write_header(part); + if (rc <= 0) { + printk(KERN_ERR "nvram_remove_partition: nvram_write failed (%d)\n", rc); + return rc; + } + list_del(&part->partition); + kfree(part); + } else + prev = part; + } + + return 0; +} + +/** + * nvram_create_partition - Create a partition in nvram + * @name: name of the partition to create + * @sig: signature of the partition to create + * @req_size: size of data to allocate in bytes + * @min_size: minimum acceptable size (0 means req_size) + * + * Returns a negative error code or a positive nvram index + * of the beginning of the data area of the newly created + * partition. If you provided a min_size smaller than req_size + * you need to query for the actual size yourself after the + * call using nvram_partition_get_size(). + */ +loff_t __init nvram_create_partition(const char *name, int sig, + int req_size, int min_size) +{ + struct nvram_partition *part; + struct nvram_partition *new_part; + struct nvram_partition *free_part = NULL; + static char nv_init_vals[16]; + loff_t tmp_index; + long size = 0; + int rc; + + /* Convert sizes from bytes to blocks */ + req_size = _ALIGN_UP(req_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN; + min_size = _ALIGN_UP(min_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN; + + /* If no minimum size specified, make it the same as the + * requested size + */ + if (min_size == 0) + min_size = req_size; + if (min_size > req_size) + return -EINVAL; + + /* Now add one block to each for the header */ + req_size += 1; + min_size += 1; + + /* Find a free partition that will give us the maximum needed size + If can't find one that will give us the minimum size needed */ + list_for_each_entry(part, &nvram_partitions, partition) { + if (part->header.signature != NVRAM_SIG_FREE) + continue; + + if (part->header.length >= req_size) { + size = req_size; + free_part = part; + break; + } + if (part->header.length > size && + part->header.length >= min_size) { + size = part->header.length; + free_part = part; + } + } + if (!size) + return -ENOSPC; + + /* Create our OS partition */ + new_part = kmalloc(sizeof(*new_part), GFP_KERNEL); + if (!new_part) { + pr_err("nvram_create_os_partition: kmalloc failed\n"); + return -ENOMEM; + } + + new_part->index = free_part->index; + new_part->header.signature = sig; + new_part->header.length = size; + strncpy(new_part->header.name, name, 12); + new_part->header.checksum = nvram_checksum(&new_part->header); + + rc = nvram_write_header(new_part); + if (rc <= 0) { + pr_err("nvram_create_os_partition: nvram_write_header " + "failed (%d)\n", rc); + return rc; + } + list_add_tail(&new_part->partition, &free_part->partition); + + /* Adjust or remove the partition we stole the space from */ + if (free_part->header.length > size) { + free_part->index += size * NVRAM_BLOCK_LEN; + free_part->header.length -= size; + free_part->header.checksum = nvram_checksum(&free_part->header); + rc = nvram_write_header(free_part); + if (rc <= 0) { + pr_err("nvram_create_os_partition: nvram_write_header " + "failed (%d)\n", rc); + return rc; + } + } else { + list_del(&free_part->partition); + kfree(free_part); + } + + /* Clear the new partition */ + for (tmp_index = new_part->index + NVRAM_HEADER_LEN; + tmp_index < ((size - 1) * NVRAM_BLOCK_LEN); + tmp_index += NVRAM_BLOCK_LEN) { + rc = ppc_md.nvram_write(nv_init_vals, NVRAM_BLOCK_LEN, &tmp_index); + if (rc <= 0) { + pr_err("nvram_create_partition: nvram_write failed (%d)\n", rc); + return rc; + } + } + + return new_part->index + NVRAM_HEADER_LEN; +} + +/** + * nvram_get_partition_size - Get the data size of an nvram partition + * @data_index: This is the offset of the start of the data of + * the partition. The same value that is returned by + * nvram_create_partition(). + */ +int nvram_get_partition_size(loff_t data_index) +{ + struct nvram_partition *part; + + list_for_each_entry(part, &nvram_partitions, partition) { + if (part->index + NVRAM_HEADER_LEN == data_index) + return (part->header.length - 1) * NVRAM_BLOCK_LEN; + } + return -1; +} + + +/** + * nvram_find_partition - Find an nvram partition by signature and name + * @name: Name of the partition or NULL for any name + * @sig: Signature to test against + * @out_size: if non-NULL, returns the size of the data part of the partition + */ +loff_t nvram_find_partition(const char *name, int sig, int *out_size) +{ + struct nvram_partition *p; + + list_for_each_entry(p, &nvram_partitions, partition) { + if (p->header.signature == sig && + (!name || !strncmp(p->header.name, name, 12))) { + if (out_size) + *out_size = (p->header.length - 1) * + NVRAM_BLOCK_LEN; + return p->index + NVRAM_HEADER_LEN; + } + } + return 0; +} + +int __init nvram_scan_partitions(void) +{ + loff_t cur_index = 0; + struct nvram_header phead; + struct nvram_partition * tmp_part; + unsigned char c_sum; + char * header; + int total_size; + int err; + + if (ppc_md.nvram_size == NULL || ppc_md.nvram_size() <= 0) + return -ENODEV; + total_size = ppc_md.nvram_size(); + + header = kmalloc(NVRAM_HEADER_LEN, GFP_KERNEL); + if (!header) { + printk(KERN_ERR "nvram_scan_partitions: Failed kmalloc\n"); + return -ENOMEM; + } + + while (cur_index < total_size) { + + err = ppc_md.nvram_read(header, NVRAM_HEADER_LEN, &cur_index); + if (err != NVRAM_HEADER_LEN) { + printk(KERN_ERR "nvram_scan_partitions: Error parsing " + "nvram partitions\n"); + goto out; + } + + cur_index -= NVRAM_HEADER_LEN; /* nvram_read will advance us */ + + memcpy(&phead, header, NVRAM_HEADER_LEN); + + err = 0; + c_sum = nvram_checksum(&phead); + if (c_sum != phead.checksum) { + printk(KERN_WARNING "WARNING: nvram partition checksum" + " was %02x, should be %02x!\n", + phead.checksum, c_sum); + printk(KERN_WARNING "Terminating nvram partition scan\n"); + goto out; + } + if (!phead.length) { + printk(KERN_WARNING "WARNING: nvram corruption " + "detected: 0-length partition\n"); + goto out; + } + tmp_part = (struct nvram_partition *) + kmalloc(sizeof(struct nvram_partition), GFP_KERNEL); + err = -ENOMEM; + if (!tmp_part) { + printk(KERN_ERR "nvram_scan_partitions: kmalloc failed\n"); + goto out; + } + + memcpy(&tmp_part->header, &phead, NVRAM_HEADER_LEN); + tmp_part->index = cur_index; + list_add_tail(&tmp_part->partition, &nvram_partitions); + + cur_index += phead.length * NVRAM_BLOCK_LEN; + } + err = 0; + +#ifdef DEBUG_NVRAM + nvram_print_partitions("NVRAM Partitions"); +#endif + + out: + kfree(header); + return err; +} + +static int __init nvram_init(void) +{ + int rc; + + BUILD_BUG_ON(NVRAM_BLOCK_LEN != 16); + + if (ppc_md.nvram_size == NULL || ppc_md.nvram_size() <= 0) + return -ENODEV; + + rc = misc_register(&nvram_dev); + if (rc != 0) { + printk(KERN_ERR "nvram_init: failed to register device\n"); + return rc; + } + + return rc; +} + +void __exit nvram_cleanup(void) +{ + misc_deregister( &nvram_dev ); +} + +module_init(nvram_init); +module_exit(nvram_cleanup); +MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c new file mode 100644 index 00000000..2049f2d0 --- /dev/null +++ b/arch/powerpc/kernel/of_platform.c @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * and Arnd Bergmann, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#undef DEBUG + +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/export.h> +#include <linux/mod_devicetable.h> +#include <linux/pci.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/of_platform.h> +#include <linux/atomic.h> + +#include <asm/errno.h> +#include <asm/topology.h> +#include <asm/pci-bridge.h> +#include <asm/ppc-pci.h> +#include <asm/eeh.h> + +#ifdef CONFIG_PPC_OF_PLATFORM_PCI + +/* The probing of PCI controllers from of_platform is currently + * 64 bits only, mostly due to gratuitous differences between + * the 32 and 64 bits PCI code on PowerPC and the 32 bits one + * lacking some bits needed here. + */ + +static int __devinit of_pci_phb_probe(struct platform_device *dev) +{ + struct pci_controller *phb; + + /* Check if we can do that ... */ + if (ppc_md.pci_setup_phb == NULL) + return -ENODEV; + + pr_info("Setting up PCI bus %s\n", dev->dev.of_node->full_name); + + /* Alloc and setup PHB data structure */ + phb = pcibios_alloc_controller(dev->dev.of_node); + if (!phb) + return -ENODEV; + + /* Setup parent in sysfs */ + phb->parent = &dev->dev; + + /* Setup the PHB using arch provided callback */ + if (ppc_md.pci_setup_phb(phb)) { + pcibios_free_controller(phb); + return -ENODEV; + } + + /* Process "ranges" property */ + pci_process_bridge_OF_ranges(phb, dev->dev.of_node, 0); + + /* Init pci_dn data structures */ + pci_devs_phb_init_dynamic(phb); + + /* Create EEH devices for the PHB */ + eeh_dev_phb_init_dynamic(phb); + + /* Register devices with EEH */ +#ifdef CONFIG_EEH + if (dev->dev.of_node->child) + eeh_add_device_tree_early(dev->dev.of_node); +#endif /* CONFIG_EEH */ + + /* Scan the bus */ + pcibios_scan_phb(phb); + if (phb->bus == NULL) + return -ENXIO; + + /* Claim resources. This might need some rework as well depending + * wether we are doing probe-only or not, like assigning unassigned + * resources etc... + */ + pcibios_claim_one_bus(phb->bus); + + /* Finish EEH setup */ +#ifdef CONFIG_EEH + eeh_add_device_tree_late(phb->bus); +#endif + + /* Add probed PCI devices to the device model */ + pci_bus_add_devices(phb->bus); + + return 0; +} + +static struct of_device_id of_pci_phb_ids[] = { + { .type = "pci", }, + { .type = "pcix", }, + { .type = "pcie", }, + { .type = "pciex", }, + { .type = "ht", }, + {} +}; + +static struct platform_driver of_pci_phb_driver = { + .probe = of_pci_phb_probe, + .driver = { + .name = "of-pci", + .owner = THIS_MODULE, + .of_match_table = of_pci_phb_ids, + }, +}; + +static __init int of_pci_phb_init(void) +{ + return platform_driver_register(&of_pci_phb_driver); +} + +device_initcall(of_pci_phb_init); + +#endif /* CONFIG_PPC_OF_PLATFORM_PCI */ diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c new file mode 100644 index 00000000..0bb1f986 --- /dev/null +++ b/arch/powerpc/kernel/paca.c @@ -0,0 +1,219 @@ +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/smp.h> +#include <linux/export.h> +#include <linux/memblock.h> + +#include <asm/lppaca.h> +#include <asm/paca.h> +#include <asm/sections.h> +#include <asm/pgtable.h> +#include <asm/kexec.h> + +/* This symbol is provided by the linker - let it fill in the paca + * field correctly */ +extern unsigned long __toc_start; + +#ifdef CONFIG_PPC_BOOK3S + +/* + * The structure which the hypervisor knows about - this structure + * should not cross a page boundary. The vpa_init/register_vpa call + * is now known to fail if the lppaca structure crosses a page + * boundary. The lppaca is also used on POWER5 pSeries boxes. + * The lppaca is 640 bytes long, and cannot readily + * change since the hypervisor knows its layout, so a 1kB alignment + * will suffice to ensure that it doesn't cross a page boundary. + */ +struct lppaca lppaca[] = { + [0 ... (NR_LPPACAS-1)] = { + .desc = 0xd397d781, /* "LpPa" */ + .size = sizeof(struct lppaca), + .dyn_proc_status = 2, + .decr_val = 0x00ff0000, + .fpregs_in_use = 1, + .end_of_quantum = 0xfffffffffffffffful, + .slb_count = 64, + .vmxregs_in_use = 0, + .page_ins = 0, + }, +}; + +static struct lppaca *extra_lppacas; +static long __initdata lppaca_size; + +static void allocate_lppacas(int nr_cpus, unsigned long limit) +{ + if (nr_cpus <= NR_LPPACAS) + return; + + lppaca_size = PAGE_ALIGN(sizeof(struct lppaca) * + (nr_cpus - NR_LPPACAS)); + extra_lppacas = __va(memblock_alloc_base(lppaca_size, + PAGE_SIZE, limit)); +} + +static struct lppaca *new_lppaca(int cpu) +{ + struct lppaca *lp; + + if (cpu < NR_LPPACAS) + return &lppaca[cpu]; + + lp = extra_lppacas + (cpu - NR_LPPACAS); + *lp = lppaca[0]; + + return lp; +} + +static void free_lppacas(void) +{ + long new_size = 0, nr; + + if (!lppaca_size) + return; + nr = num_possible_cpus() - NR_LPPACAS; + if (nr > 0) + new_size = PAGE_ALIGN(nr * sizeof(struct lppaca)); + if (new_size >= lppaca_size) + return; + + memblock_free(__pa(extra_lppacas) + new_size, lppaca_size - new_size); + lppaca_size = new_size; +} + +#else + +static inline void allocate_lppacas(int nr_cpus, unsigned long limit) { } +static inline void free_lppacas(void) { } + +#endif /* CONFIG_PPC_BOOK3S */ + +#ifdef CONFIG_PPC_STD_MMU_64 + +/* + * 3 persistent SLBs are registered here. The buffer will be zero + * initially, hence will all be invaild until we actually write them. + */ +struct slb_shadow slb_shadow[] __cacheline_aligned = { + [0 ... (NR_CPUS-1)] = { + .persistent = SLB_NUM_BOLTED, + .buffer_length = sizeof(struct slb_shadow), + }, +}; + +#endif /* CONFIG_PPC_STD_MMU_64 */ + +/* The Paca is an array with one entry per processor. Each contains an + * lppaca, which contains the information shared between the + * hypervisor and Linux. + * On systems with hardware multi-threading, there are two threads + * per processor. The Paca array must contain an entry for each thread. + * The VPD Areas will give a max logical processors = 2 * max physical + * processors. The processor VPD array needs one entry per physical + * processor (not thread). + */ +struct paca_struct *paca; +EXPORT_SYMBOL(paca); + +struct paca_struct boot_paca; + +void __init initialise_paca(struct paca_struct *new_paca, int cpu) +{ + /* The TOC register (GPR2) points 32kB into the TOC, so that 64kB + * of the TOC can be addressed using a single machine instruction. + */ + unsigned long kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL; + +#ifdef CONFIG_PPC_BOOK3S + new_paca->lppaca_ptr = new_lppaca(cpu); +#else + new_paca->kernel_pgd = swapper_pg_dir; +#endif + new_paca->lock_token = 0x8000; + new_paca->paca_index = cpu; + new_paca->kernel_toc = kernel_toc; + new_paca->kernelbase = (unsigned long) _stext; + new_paca->kernel_msr = MSR_KERNEL; + new_paca->hw_cpu_id = 0xffff; + new_paca->kexec_state = KEXEC_STATE_NONE; + new_paca->__current = &init_task; +#ifdef CONFIG_PPC_STD_MMU_64 + new_paca->slb_shadow_ptr = &slb_shadow[cpu]; +#endif /* CONFIG_PPC_STD_MMU_64 */ +} + +/* Put the paca pointer into r13 and SPRG_PACA */ +void setup_paca(struct paca_struct *new_paca) +{ + /* Setup r13 */ + local_paca = new_paca; + +#ifdef CONFIG_PPC_BOOK3E + /* On Book3E, initialize the TLB miss exception frames */ + mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb); +#else + /* In HV mode, we setup both HPACA and PACA to avoid problems + * if we do a GET_PACA() before the feature fixups have been + * applied + */ + if (cpu_has_feature(CPU_FTR_HVMODE)) + mtspr(SPRN_SPRG_HPACA, local_paca); +#endif + mtspr(SPRN_SPRG_PACA, local_paca); + +} + +static int __initdata paca_size; + +void __init allocate_pacas(void) +{ + int cpu, limit; + + /* + * We can't take SLB misses on the paca, and we want to access them + * in real mode, so allocate them within the RMA and also within + * the first segment. + */ + limit = min(0x10000000ULL, ppc64_rma_size); + + paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids); + + paca = __va(memblock_alloc_base(paca_size, PAGE_SIZE, limit)); + memset(paca, 0, paca_size); + + printk(KERN_DEBUG "Allocated %u bytes for %d pacas at %p\n", + paca_size, nr_cpu_ids, paca); + + allocate_lppacas(nr_cpu_ids, limit); + + /* Can't use for_each_*_cpu, as they aren't functional yet */ + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + initialise_paca(&paca[cpu], cpu); +} + +void __init free_unused_pacas(void) +{ + int new_size; + + new_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids); + + if (new_size >= paca_size) + return; + + memblock_free(__pa(paca) + new_size, paca_size - new_size); + + printk(KERN_DEBUG "Freed %u bytes for unused pacas\n", + paca_size - new_size); + + paca_size = new_size; + + free_lppacas(); +} diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c new file mode 100644 index 00000000..8e78e93c --- /dev/null +++ b/arch/powerpc/kernel/pci-common.c @@ -0,0 +1,1702 @@ +/* + * Contains common pci routines for ALL ppc platform + * (based on pci_32.c and pci_64.c) + * + * Port for PPC64 David Engebretsen, IBM Corp. + * Contains common pci routines for ppc64 platform, pSeries and iSeries brands. + * + * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM + * Rework, based on alpha PCI code. + * + * Common pmac/prep/chrp pci routines. -- Cort + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/export.h> +#include <linux/of_address.h> +#include <linux/of_pci.h> +#include <linux/mm.h> +#include <linux/list.h> +#include <linux/syscalls.h> +#include <linux/irq.h> +#include <linux/vmalloc.h> +#include <linux/slab.h> + +#include <asm/processor.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/pci-bridge.h> +#include <asm/byteorder.h> +#include <asm/machdep.h> +#include <asm/ppc-pci.h> +#include <asm/eeh.h> + +static DEFINE_SPINLOCK(hose_spinlock); +LIST_HEAD(hose_list); + +/* XXX kill that some day ... */ +static int global_phb_number; /* Global phb counter */ + +/* ISA Memory physical address */ +resource_size_t isa_mem_base; + + +static struct dma_map_ops *pci_dma_ops = &dma_direct_ops; + +void set_pci_dma_ops(struct dma_map_ops *dma_ops) +{ + pci_dma_ops = dma_ops; +} + +struct dma_map_ops *get_pci_dma_ops(void) +{ + return pci_dma_ops; +} +EXPORT_SYMBOL(get_pci_dma_ops); + +struct pci_controller *pcibios_alloc_controller(struct device_node *dev) +{ + struct pci_controller *phb; + + phb = zalloc_maybe_bootmem(sizeof(struct pci_controller), GFP_KERNEL); + if (phb == NULL) + return NULL; + spin_lock(&hose_spinlock); + phb->global_number = global_phb_number++; + list_add_tail(&phb->list_node, &hose_list); + spin_unlock(&hose_spinlock); + phb->dn = dev; + phb->is_dynamic = mem_init_done; +#ifdef CONFIG_PPC64 + if (dev) { + int nid = of_node_to_nid(dev); + + if (nid < 0 || !node_online(nid)) + nid = -1; + + PHB_SET_NODE(phb, nid); + } +#endif + return phb; +} + +void pcibios_free_controller(struct pci_controller *phb) +{ + spin_lock(&hose_spinlock); + list_del(&phb->list_node); + spin_unlock(&hose_spinlock); + + if (phb->is_dynamic) + kfree(phb); +} + +static resource_size_t pcibios_io_size(const struct pci_controller *hose) +{ +#ifdef CONFIG_PPC64 + return hose->pci_io_size; +#else + return resource_size(&hose->io_resource); +#endif +} + +int pcibios_vaddr_is_ioport(void __iomem *address) +{ + int ret = 0; + struct pci_controller *hose; + resource_size_t size; + + spin_lock(&hose_spinlock); + list_for_each_entry(hose, &hose_list, list_node) { + size = pcibios_io_size(hose); + if (address >= hose->io_base_virt && + address < (hose->io_base_virt + size)) { + ret = 1; + break; + } + } + spin_unlock(&hose_spinlock); + return ret; +} + +unsigned long pci_address_to_pio(phys_addr_t address) +{ + struct pci_controller *hose; + resource_size_t size; + unsigned long ret = ~0; + + spin_lock(&hose_spinlock); + list_for_each_entry(hose, &hose_list, list_node) { + size = pcibios_io_size(hose); + if (address >= hose->io_base_phys && + address < (hose->io_base_phys + size)) { + unsigned long base = + (unsigned long)hose->io_base_virt - _IO_BASE; + ret = base + (address - hose->io_base_phys); + break; + } + } + spin_unlock(&hose_spinlock); + + return ret; +} +EXPORT_SYMBOL_GPL(pci_address_to_pio); + +/* + * Return the domain number for this bus. + */ +int pci_domain_nr(struct pci_bus *bus) +{ + struct pci_controller *hose = pci_bus_to_host(bus); + + return hose->global_number; +} +EXPORT_SYMBOL(pci_domain_nr); + +/* This routine is meant to be used early during boot, when the + * PCI bus numbers have not yet been assigned, and you need to + * issue PCI config cycles to an OF device. + * It could also be used to "fix" RTAS config cycles if you want + * to set pci_assign_all_buses to 1 and still use RTAS for PCI + * config cycles. + */ +struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node) +{ + while(node) { + struct pci_controller *hose, *tmp; + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) + if (hose->dn == node) + return hose; + node = node->parent; + } + return NULL; +} + +static ssize_t pci_show_devspec(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pci_dev *pdev; + struct device_node *np; + + pdev = to_pci_dev (dev); + np = pci_device_to_OF_node(pdev); + if (np == NULL || np->full_name == NULL) + return 0; + return sprintf(buf, "%s", np->full_name); +} +static DEVICE_ATTR(devspec, S_IRUGO, pci_show_devspec, NULL); + +/* Add sysfs properties */ +int pcibios_add_platform_entries(struct pci_dev *pdev) +{ + return device_create_file(&pdev->dev, &dev_attr_devspec); +} + +char __devinit *pcibios_setup(char *str) +{ + return str; +} + +/* + * Reads the interrupt pin to determine if interrupt is use by card. + * If the interrupt is used, then gets the interrupt line from the + * openfirmware and sets it in the pci_dev and pci_config line. + */ +static int pci_read_irq_line(struct pci_dev *pci_dev) +{ + struct of_irq oirq; + unsigned int virq; + + pr_debug("PCI: Try to map irq for %s...\n", pci_name(pci_dev)); + +#ifdef DEBUG + memset(&oirq, 0xff, sizeof(oirq)); +#endif + /* Try to get a mapping from the device-tree */ + if (of_irq_map_pci(pci_dev, &oirq)) { + u8 line, pin; + + /* If that fails, lets fallback to what is in the config + * space and map that through the default controller. We + * also set the type to level low since that's what PCI + * interrupts are. If your platform does differently, then + * either provide a proper interrupt tree or don't use this + * function. + */ + if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &pin)) + return -1; + if (pin == 0) + return -1; + if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_LINE, &line) || + line == 0xff || line == 0) { + return -1; + } + pr_debug(" No map ! Using line %d (pin %d) from PCI config\n", + line, pin); + + virq = irq_create_mapping(NULL, line); + if (virq != NO_IRQ) + irq_set_irq_type(virq, IRQ_TYPE_LEVEL_LOW); + } else { + pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %s\n", + oirq.size, oirq.specifier[0], oirq.specifier[1], + oirq.controller ? oirq.controller->full_name : + "<default>"); + + virq = irq_create_of_mapping(oirq.controller, oirq.specifier, + oirq.size); + } + if(virq == NO_IRQ) { + pr_debug(" Failed to map !\n"); + return -1; + } + + pr_debug(" Mapped to linux irq %d\n", virq); + + pci_dev->irq = virq; + + return 0; +} + +/* + * Platform support for /proc/bus/pci/X/Y mmap()s, + * modelled on the sparc64 implementation by Dave Miller. + * -- paulus. + */ + +/* + * Adjust vm_pgoff of VMA such that it is the physical page offset + * corresponding to the 32-bit pci bus offset for DEV requested by the user. + * + * Basically, the user finds the base address for his device which he wishes + * to mmap. They read the 32-bit value from the config space base register, + * add whatever PAGE_SIZE multiple offset they wish, and feed this into the + * offset parameter of mmap on /proc/bus/pci/XXX for that device. + * + * Returns negative error code on failure, zero on success. + */ +static struct resource *__pci_mmap_make_offset(struct pci_dev *dev, + resource_size_t *offset, + enum pci_mmap_state mmap_state) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + unsigned long io_offset = 0; + int i, res_bit; + + if (hose == 0) + return NULL; /* should never happen */ + + /* If memory, add on the PCI bridge address offset */ + if (mmap_state == pci_mmap_mem) { +#if 0 /* See comment in pci_resource_to_user() for why this is disabled */ + *offset += hose->pci_mem_offset; +#endif + res_bit = IORESOURCE_MEM; + } else { + io_offset = (unsigned long)hose->io_base_virt - _IO_BASE; + *offset += io_offset; + res_bit = IORESOURCE_IO; + } + + /* + * Check that the offset requested corresponds to one of the + * resources of the device. + */ + for (i = 0; i <= PCI_ROM_RESOURCE; i++) { + struct resource *rp = &dev->resource[i]; + int flags = rp->flags; + + /* treat ROM as memory (should be already) */ + if (i == PCI_ROM_RESOURCE) + flags |= IORESOURCE_MEM; + + /* Active and same type? */ + if ((flags & res_bit) == 0) + continue; + + /* In the range of this resource? */ + if (*offset < (rp->start & PAGE_MASK) || *offset > rp->end) + continue; + + /* found it! construct the final physical address */ + if (mmap_state == pci_mmap_io) + *offset += hose->io_base_phys - io_offset; + return rp; + } + + return NULL; +} + +/* + * Set vm_page_prot of VMA, as appropriate for this architecture, for a pci + * device mapping. + */ +static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp, + pgprot_t protection, + enum pci_mmap_state mmap_state, + int write_combine) +{ + unsigned long prot = pgprot_val(protection); + + /* Write combine is always 0 on non-memory space mappings. On + * memory space, if the user didn't pass 1, we check for a + * "prefetchable" resource. This is a bit hackish, but we use + * this to workaround the inability of /sysfs to provide a write + * combine bit + */ + if (mmap_state != pci_mmap_mem) + write_combine = 0; + else if (write_combine == 0) { + if (rp->flags & IORESOURCE_PREFETCH) + write_combine = 1; + } + + /* XXX would be nice to have a way to ask for write-through */ + if (write_combine) + return pgprot_noncached_wc(prot); + else + return pgprot_noncached(prot); +} + +/* + * This one is used by /dev/mem and fbdev who have no clue about the + * PCI device, it tries to find the PCI device first and calls the + * above routine + */ +pgprot_t pci_phys_mem_access_prot(struct file *file, + unsigned long pfn, + unsigned long size, + pgprot_t prot) +{ + struct pci_dev *pdev = NULL; + struct resource *found = NULL; + resource_size_t offset = ((resource_size_t)pfn) << PAGE_SHIFT; + int i; + + if (page_is_ram(pfn)) + return prot; + + prot = pgprot_noncached(prot); + for_each_pci_dev(pdev) { + for (i = 0; i <= PCI_ROM_RESOURCE; i++) { + struct resource *rp = &pdev->resource[i]; + int flags = rp->flags; + + /* Active and same type? */ + if ((flags & IORESOURCE_MEM) == 0) + continue; + /* In the range of this resource? */ + if (offset < (rp->start & PAGE_MASK) || + offset > rp->end) + continue; + found = rp; + break; + } + if (found) + break; + } + if (found) { + if (found->flags & IORESOURCE_PREFETCH) + prot = pgprot_noncached_wc(prot); + pci_dev_put(pdev); + } + + pr_debug("PCI: Non-PCI map for %llx, prot: %lx\n", + (unsigned long long)offset, pgprot_val(prot)); + + return prot; +} + + +/* + * Perform the actual remap of the pages for a PCI device mapping, as + * appropriate for this architecture. The region in the process to map + * is described by vm_start and vm_end members of VMA, the base physical + * address is found in vm_pgoff. + * The pci device structure is provided so that architectures may make mapping + * decisions on a per-device or per-bus basis. + * + * Returns a negative error code on failure, zero on success. + */ +int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, int write_combine) +{ + resource_size_t offset = + ((resource_size_t)vma->vm_pgoff) << PAGE_SHIFT; + struct resource *rp; + int ret; + + rp = __pci_mmap_make_offset(dev, &offset, mmap_state); + if (rp == NULL) + return -EINVAL; + + vma->vm_pgoff = offset >> PAGE_SHIFT; + vma->vm_page_prot = __pci_mmap_set_pgprot(dev, rp, + vma->vm_page_prot, + mmap_state, write_combine); + + ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + vma->vm_end - vma->vm_start, vma->vm_page_prot); + + return ret; +} + +/* This provides legacy IO read access on a bus */ +int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t size) +{ + unsigned long offset; + struct pci_controller *hose = pci_bus_to_host(bus); + struct resource *rp = &hose->io_resource; + void __iomem *addr; + + /* Check if port can be supported by that bus. We only check + * the ranges of the PHB though, not the bus itself as the rules + * for forwarding legacy cycles down bridges are not our problem + * here. So if the host bridge supports it, we do it. + */ + offset = (unsigned long)hose->io_base_virt - _IO_BASE; + offset += port; + + if (!(rp->flags & IORESOURCE_IO)) + return -ENXIO; + if (offset < rp->start || (offset + size) > rp->end) + return -ENXIO; + addr = hose->io_base_virt + port; + + switch(size) { + case 1: + *((u8 *)val) = in_8(addr); + return 1; + case 2: + if (port & 1) + return -EINVAL; + *((u16 *)val) = in_le16(addr); + return 2; + case 4: + if (port & 3) + return -EINVAL; + *((u32 *)val) = in_le32(addr); + return 4; + } + return -EINVAL; +} + +/* This provides legacy IO write access on a bus */ +int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val, size_t size) +{ + unsigned long offset; + struct pci_controller *hose = pci_bus_to_host(bus); + struct resource *rp = &hose->io_resource; + void __iomem *addr; + + /* Check if port can be supported by that bus. We only check + * the ranges of the PHB though, not the bus itself as the rules + * for forwarding legacy cycles down bridges are not our problem + * here. So if the host bridge supports it, we do it. + */ + offset = (unsigned long)hose->io_base_virt - _IO_BASE; + offset += port; + + if (!(rp->flags & IORESOURCE_IO)) + return -ENXIO; + if (offset < rp->start || (offset + size) > rp->end) + return -ENXIO; + addr = hose->io_base_virt + port; + + /* WARNING: The generic code is idiotic. It gets passed a pointer + * to what can be a 1, 2 or 4 byte quantity and always reads that + * as a u32, which means that we have to correct the location of + * the data read within those 32 bits for size 1 and 2 + */ + switch(size) { + case 1: + out_8(addr, val >> 24); + return 1; + case 2: + if (port & 1) + return -EINVAL; + out_le16(addr, val >> 16); + return 2; + case 4: + if (port & 3) + return -EINVAL; + out_le32(addr, val); + return 4; + } + return -EINVAL; +} + +/* This provides legacy IO or memory mmap access on a bus */ +int pci_mmap_legacy_page_range(struct pci_bus *bus, + struct vm_area_struct *vma, + enum pci_mmap_state mmap_state) +{ + struct pci_controller *hose = pci_bus_to_host(bus); + resource_size_t offset = + ((resource_size_t)vma->vm_pgoff) << PAGE_SHIFT; + resource_size_t size = vma->vm_end - vma->vm_start; + struct resource *rp; + + pr_debug("pci_mmap_legacy_page_range(%04x:%02x, %s @%llx..%llx)\n", + pci_domain_nr(bus), bus->number, + mmap_state == pci_mmap_mem ? "MEM" : "IO", + (unsigned long long)offset, + (unsigned long long)(offset + size - 1)); + + if (mmap_state == pci_mmap_mem) { + /* Hack alert ! + * + * Because X is lame and can fail starting if it gets an error trying + * to mmap legacy_mem (instead of just moving on without legacy memory + * access) we fake it here by giving it anonymous memory, effectively + * behaving just like /dev/zero + */ + if ((offset + size) > hose->isa_mem_size) { + printk(KERN_DEBUG + "Process %s (pid:%d) mapped non-existing PCI legacy memory for 0%04x:%02x\n", + current->comm, current->pid, pci_domain_nr(bus), bus->number); + if (vma->vm_flags & VM_SHARED) + return shmem_zero_setup(vma); + return 0; + } + offset += hose->isa_mem_phys; + } else { + unsigned long io_offset = (unsigned long)hose->io_base_virt - _IO_BASE; + unsigned long roffset = offset + io_offset; + rp = &hose->io_resource; + if (!(rp->flags & IORESOURCE_IO)) + return -ENXIO; + if (roffset < rp->start || (roffset + size) > rp->end) + return -ENXIO; + offset += hose->io_base_phys; + } + pr_debug(" -> mapping phys %llx\n", (unsigned long long)offset); + + vma->vm_pgoff = offset >> PAGE_SHIFT; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); +} + +void pci_resource_to_user(const struct pci_dev *dev, int bar, + const struct resource *rsrc, + resource_size_t *start, resource_size_t *end) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + resource_size_t offset = 0; + + if (hose == NULL) + return; + + if (rsrc->flags & IORESOURCE_IO) + offset = (unsigned long)hose->io_base_virt - _IO_BASE; + + /* We pass a fully fixed up address to userland for MMIO instead of + * a BAR value because X is lame and expects to be able to use that + * to pass to /dev/mem ! + * + * That means that we'll have potentially 64 bits values where some + * userland apps only expect 32 (like X itself since it thinks only + * Sparc has 64 bits MMIO) but if we don't do that, we break it on + * 32 bits CHRPs :-( + * + * Hopefully, the sysfs insterface is immune to that gunk. Once X + * has been fixed (and the fix spread enough), we can re-enable the + * 2 lines below and pass down a BAR value to userland. In that case + * we'll also have to re-enable the matching code in + * __pci_mmap_make_offset(). + * + * BenH. + */ +#if 0 + else if (rsrc->flags & IORESOURCE_MEM) + offset = hose->pci_mem_offset; +#endif + + *start = rsrc->start - offset; + *end = rsrc->end - offset; +} + +/** + * pci_process_bridge_OF_ranges - Parse PCI bridge resources from device tree + * @hose: newly allocated pci_controller to be setup + * @dev: device node of the host bridge + * @primary: set if primary bus (32 bits only, soon to be deprecated) + * + * This function will parse the "ranges" property of a PCI host bridge device + * node and setup the resource mapping of a pci controller based on its + * content. + * + * Life would be boring if it wasn't for a few issues that we have to deal + * with here: + * + * - We can only cope with one IO space range and up to 3 Memory space + * ranges. However, some machines (thanks Apple !) tend to split their + * space into lots of small contiguous ranges. So we have to coalesce. + * + * - We can only cope with all memory ranges having the same offset + * between CPU addresses and PCI addresses. Unfortunately, some bridges + * are setup for a large 1:1 mapping along with a small "window" which + * maps PCI address 0 to some arbitrary high address of the CPU space in + * order to give access to the ISA memory hole. + * The way out of here that I've chosen for now is to always set the + * offset based on the first resource found, then override it if we + * have a different offset and the previous was set by an ISA hole. + * + * - Some busses have IO space not starting at 0, which causes trouble with + * the way we do our IO resource renumbering. The code somewhat deals with + * it for 64 bits but I would expect problems on 32 bits. + * + * - Some 32 bits platforms such as 4xx can have physical space larger than + * 32 bits so we need to use 64 bits values for the parsing + */ +void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose, + struct device_node *dev, + int primary) +{ + const u32 *ranges; + int rlen; + int pna = of_n_addr_cells(dev); + int np = pna + 5; + int memno = 0, isa_hole = -1; + u32 pci_space; + unsigned long long pci_addr, cpu_addr, pci_next, cpu_next, size; + unsigned long long isa_mb = 0; + struct resource *res; + + printk(KERN_INFO "PCI host bridge %s %s ranges:\n", + dev->full_name, primary ? "(primary)" : ""); + + /* Get ranges property */ + ranges = of_get_property(dev, "ranges", &rlen); + if (ranges == NULL) + return; + + /* Parse it */ + while ((rlen -= np * 4) >= 0) { + /* Read next ranges element */ + pci_space = ranges[0]; + pci_addr = of_read_number(ranges + 1, 2); + cpu_addr = of_translate_address(dev, ranges + 3); + size = of_read_number(ranges + pna + 3, 2); + ranges += np; + + /* If we failed translation or got a zero-sized region + * (some FW try to feed us with non sensical zero sized regions + * such as power3 which look like some kind of attempt at exposing + * the VGA memory hole) + */ + if (cpu_addr == OF_BAD_ADDR || size == 0) + continue; + + /* Now consume following elements while they are contiguous */ + for (; rlen >= np * sizeof(u32); + ranges += np, rlen -= np * 4) { + if (ranges[0] != pci_space) + break; + pci_next = of_read_number(ranges + 1, 2); + cpu_next = of_translate_address(dev, ranges + 3); + if (pci_next != pci_addr + size || + cpu_next != cpu_addr + size) + break; + size += of_read_number(ranges + pna + 3, 2); + } + + /* Act based on address space type */ + res = NULL; + switch ((pci_space >> 24) & 0x3) { + case 1: /* PCI IO space */ + printk(KERN_INFO + " IO 0x%016llx..0x%016llx -> 0x%016llx\n", + cpu_addr, cpu_addr + size - 1, pci_addr); + + /* We support only one IO range */ + if (hose->pci_io_size) { + printk(KERN_INFO + " \\--> Skipped (too many) !\n"); + continue; + } +#ifdef CONFIG_PPC32 + /* On 32 bits, limit I/O space to 16MB */ + if (size > 0x01000000) + size = 0x01000000; + + /* 32 bits needs to map IOs here */ + hose->io_base_virt = ioremap(cpu_addr, size); + + /* Expect trouble if pci_addr is not 0 */ + if (primary) + isa_io_base = + (unsigned long)hose->io_base_virt; +#endif /* CONFIG_PPC32 */ + /* pci_io_size and io_base_phys always represent IO + * space starting at 0 so we factor in pci_addr + */ + hose->pci_io_size = pci_addr + size; + hose->io_base_phys = cpu_addr - pci_addr; + + /* Build resource */ + res = &hose->io_resource; + res->flags = IORESOURCE_IO; + res->start = pci_addr; + break; + case 2: /* PCI Memory space */ + case 3: /* PCI 64 bits Memory space */ + printk(KERN_INFO + " MEM 0x%016llx..0x%016llx -> 0x%016llx %s\n", + cpu_addr, cpu_addr + size - 1, pci_addr, + (pci_space & 0x40000000) ? "Prefetch" : ""); + + /* We support only 3 memory ranges */ + if (memno >= 3) { + printk(KERN_INFO + " \\--> Skipped (too many) !\n"); + continue; + } + /* Handles ISA memory hole space here */ + if (pci_addr == 0) { + isa_mb = cpu_addr; + isa_hole = memno; + if (primary || isa_mem_base == 0) + isa_mem_base = cpu_addr; + hose->isa_mem_phys = cpu_addr; + hose->isa_mem_size = size; + } + + /* We get the PCI/Mem offset from the first range or + * the, current one if the offset came from an ISA + * hole. If they don't match, bugger. + */ + if (memno == 0 || + (isa_hole >= 0 && pci_addr != 0 && + hose->pci_mem_offset == isa_mb)) + hose->pci_mem_offset = cpu_addr - pci_addr; + else if (pci_addr != 0 && + hose->pci_mem_offset != cpu_addr - pci_addr) { + printk(KERN_INFO + " \\--> Skipped (offset mismatch) !\n"); + continue; + } + + /* Build resource */ + res = &hose->mem_resources[memno++]; + res->flags = IORESOURCE_MEM; + if (pci_space & 0x40000000) + res->flags |= IORESOURCE_PREFETCH; + res->start = cpu_addr; + break; + } + if (res != NULL) { + res->name = dev->full_name; + res->end = res->start + size - 1; + res->parent = NULL; + res->sibling = NULL; + res->child = NULL; + } + } + + /* If there's an ISA hole and the pci_mem_offset is -not- matching + * the ISA hole offset, then we need to remove the ISA hole from + * the resource list for that brige + */ + if (isa_hole >= 0 && hose->pci_mem_offset != isa_mb) { + unsigned int next = isa_hole + 1; + printk(KERN_INFO " Removing ISA hole at 0x%016llx\n", isa_mb); + if (next < memno) + memmove(&hose->mem_resources[isa_hole], + &hose->mem_resources[next], + sizeof(struct resource) * (memno - next)); + hose->mem_resources[--memno].flags = 0; + } +} + +/* Decide whether to display the domain number in /proc */ +int pci_proc_domain(struct pci_bus *bus) +{ + struct pci_controller *hose = pci_bus_to_host(bus); + + if (!pci_has_flag(PCI_ENABLE_PROC_DOMAINS)) + return 0; + if (pci_has_flag(PCI_COMPAT_DOMAIN_0)) + return hose->global_number != 0; + return 1; +} + +/* This header fixup will do the resource fixup for all devices as they are + * probed, but not for bridge ranges + */ +static void __devinit pcibios_fixup_resources(struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + int i; + + if (!hose) { + printk(KERN_ERR "No host bridge for PCI dev %s !\n", + pci_name(dev)); + return; + } + for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { + struct resource *res = dev->resource + i; + if (!res->flags) + continue; + + /* If we're going to re-assign everything, we mark all resources + * as unset (and 0-base them). In addition, we mark BARs starting + * at 0 as unset as well, except if PCI_PROBE_ONLY is also set + * since in that case, we don't want to re-assign anything + */ + if (pci_has_flag(PCI_REASSIGN_ALL_RSRC) || + (res->start == 0 && !pci_has_flag(PCI_PROBE_ONLY))) { + /* Only print message if not re-assigning */ + if (!pci_has_flag(PCI_REASSIGN_ALL_RSRC)) + pr_debug("PCI:%s Resource %d %016llx-%016llx [%x] " + "is unassigned\n", + pci_name(dev), i, + (unsigned long long)res->start, + (unsigned long long)res->end, + (unsigned int)res->flags); + res->end -= res->start; + res->start = 0; + res->flags |= IORESOURCE_UNSET; + continue; + } + + pr_debug("PCI:%s Resource %d %016llx-%016llx [%x]\n", + pci_name(dev), i, + (unsigned long long)res->start,\ + (unsigned long long)res->end, + (unsigned int)res->flags); + } + + /* Call machine specific resource fixup */ + if (ppc_md.pcibios_fixup_resources) + ppc_md.pcibios_fixup_resources(dev); +} +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_resources); + +/* This function tries to figure out if a bridge resource has been initialized + * by the firmware or not. It doesn't have to be absolutely bullet proof, but + * things go more smoothly when it gets it right. It should covers cases such + * as Apple "closed" bridge resources and bare-metal pSeries unassigned bridges + */ +static int __devinit pcibios_uninitialized_bridge_resource(struct pci_bus *bus, + struct resource *res) +{ + struct pci_controller *hose = pci_bus_to_host(bus); + struct pci_dev *dev = bus->self; + resource_size_t offset; + u16 command; + int i; + + /* We don't do anything if PCI_PROBE_ONLY is set */ + if (pci_has_flag(PCI_PROBE_ONLY)) + return 0; + + /* Job is a bit different between memory and IO */ + if (res->flags & IORESOURCE_MEM) { + /* If the BAR is non-0 (res != pci_mem_offset) then it's probably been + * initialized by somebody + */ + if (res->start != hose->pci_mem_offset) + return 0; + + /* The BAR is 0, let's check if memory decoding is enabled on + * the bridge. If not, we consider it unassigned + */ + pci_read_config_word(dev, PCI_COMMAND, &command); + if ((command & PCI_COMMAND_MEMORY) == 0) + return 1; + + /* Memory decoding is enabled and the BAR is 0. If any of the bridge + * resources covers that starting address (0 then it's good enough for + * us for memory + */ + for (i = 0; i < 3; i++) { + if ((hose->mem_resources[i].flags & IORESOURCE_MEM) && + hose->mem_resources[i].start == hose->pci_mem_offset) + return 0; + } + + /* Well, it starts at 0 and we know it will collide so we may as + * well consider it as unassigned. That covers the Apple case. + */ + return 1; + } else { + /* If the BAR is non-0, then we consider it assigned */ + offset = (unsigned long)hose->io_base_virt - _IO_BASE; + if (((res->start - offset) & 0xfffffffful) != 0) + return 0; + + /* Here, we are a bit different than memory as typically IO space + * starting at low addresses -is- valid. What we do instead if that + * we consider as unassigned anything that doesn't have IO enabled + * in the PCI command register, and that's it. + */ + pci_read_config_word(dev, PCI_COMMAND, &command); + if (command & PCI_COMMAND_IO) + return 0; + + /* It's starting at 0 and IO is disabled in the bridge, consider + * it unassigned + */ + return 1; + } +} + +/* Fixup resources of a PCI<->PCI bridge */ +static void __devinit pcibios_fixup_bridge(struct pci_bus *bus) +{ + struct resource *res; + int i; + + struct pci_dev *dev = bus->self; + + pci_bus_for_each_resource(bus, res, i) { + if (!res || !res->flags) + continue; + if (i >= 3 && bus->self->transparent) + continue; + + /* If we are going to re-assign everything, mark the resource + * as unset and move it down to 0 + */ + if (pci_has_flag(PCI_REASSIGN_ALL_RSRC)) { + res->flags |= IORESOURCE_UNSET; + res->end -= res->start; + res->start = 0; + continue; + } + + pr_debug("PCI:%s Bus rsrc %d %016llx-%016llx [%x]\n", + pci_name(dev), i, + (unsigned long long)res->start,\ + (unsigned long long)res->end, + (unsigned int)res->flags); + + /* Try to detect uninitialized P2P bridge resources, + * and clear them out so they get re-assigned later + */ + if (pcibios_uninitialized_bridge_resource(bus, res)) { + res->flags = 0; + pr_debug("PCI:%s (unassigned)\n", pci_name(dev)); + } + } +} + +void __devinit pcibios_setup_bus_self(struct pci_bus *bus) +{ + /* Fix up the bus resources for P2P bridges */ + if (bus->self != NULL) + pcibios_fixup_bridge(bus); + + /* Platform specific bus fixups. This is currently only used + * by fsl_pci and I'm hoping to get rid of it at some point + */ + if (ppc_md.pcibios_fixup_bus) + ppc_md.pcibios_fixup_bus(bus); + + /* Setup bus DMA mappings */ + if (ppc_md.pci_dma_bus_setup) + ppc_md.pci_dma_bus_setup(bus); +} + +void __devinit pcibios_setup_bus_devices(struct pci_bus *bus) +{ + struct pci_dev *dev; + + pr_debug("PCI: Fixup bus devices %d (%s)\n", + bus->number, bus->self ? pci_name(bus->self) : "PHB"); + + list_for_each_entry(dev, &bus->devices, bus_list) { + /* Cardbus can call us to add new devices to a bus, so ignore + * those who are already fully discovered + */ + if (dev->is_added) + continue; + + /* Fixup NUMA node as it may not be setup yet by the generic + * code and is needed by the DMA init + */ + set_dev_node(&dev->dev, pcibus_to_node(dev->bus)); + + /* Hook up default DMA ops */ + set_dma_ops(&dev->dev, pci_dma_ops); + set_dma_offset(&dev->dev, PCI_DRAM_OFFSET); + + /* Additional platform DMA/iommu setup */ + if (ppc_md.pci_dma_dev_setup) + ppc_md.pci_dma_dev_setup(dev); + + /* Read default IRQs and fixup if necessary */ + pci_read_irq_line(dev); + if (ppc_md.pci_irq_fixup) + ppc_md.pci_irq_fixup(dev); + } +} + +void pcibios_set_master(struct pci_dev *dev) +{ + /* No special bus mastering setup handling */ +} + +void __devinit pcibios_fixup_bus(struct pci_bus *bus) +{ + /* When called from the generic PCI probe, read PCI<->PCI bridge + * bases. This is -not- called when generating the PCI tree from + * the OF device-tree. + */ + if (bus->self != NULL) + pci_read_bridge_bases(bus); + + /* Now fixup the bus bus */ + pcibios_setup_bus_self(bus); + + /* Now fixup devices on that bus */ + pcibios_setup_bus_devices(bus); +} +EXPORT_SYMBOL(pcibios_fixup_bus); + +void __devinit pci_fixup_cardbus(struct pci_bus *bus) +{ + /* Now fixup devices on that bus */ + pcibios_setup_bus_devices(bus); +} + + +static int skip_isa_ioresource_align(struct pci_dev *dev) +{ + if (pci_has_flag(PCI_CAN_SKIP_ISA_ALIGN) && + !(dev->bus->bridge_ctl & PCI_BRIDGE_CTL_ISA)) + return 1; + return 0; +} + +/* + * We need to avoid collisions with `mirrored' VGA ports + * and other strange ISA hardware, so we always want the + * addresses to be allocated in the 0x000-0x0ff region + * modulo 0x400. + * + * Why? Because some silly external IO cards only decode + * the low 10 bits of the IO address. The 0x00-0xff region + * is reserved for motherboard devices that decode all 16 + * bits, so it's ok to allocate at, say, 0x2800-0x28ff, + * but we want to try to avoid allocating at 0x2900-0x2bff + * which might have be mirrored at 0x0100-0x03ff.. + */ +resource_size_t pcibios_align_resource(void *data, const struct resource *res, + resource_size_t size, resource_size_t align) +{ + struct pci_dev *dev = data; + resource_size_t start = res->start; + + if (res->flags & IORESOURCE_IO) { + if (skip_isa_ioresource_align(dev)) + return start; + if (start & 0x300) + start = (start + 0x3ff) & ~0x3ff; + } + + return start; +} +EXPORT_SYMBOL(pcibios_align_resource); + +/* + * Reparent resource children of pr that conflict with res + * under res, and make res replace those children. + */ +static int reparent_resources(struct resource *parent, + struct resource *res) +{ + struct resource *p, **pp; + struct resource **firstpp = NULL; + + for (pp = &parent->child; (p = *pp) != NULL; pp = &p->sibling) { + if (p->end < res->start) + continue; + if (res->end < p->start) + break; + if (p->start < res->start || p->end > res->end) + return -1; /* not completely contained */ + if (firstpp == NULL) + firstpp = pp; + } + if (firstpp == NULL) + return -1; /* didn't find any conflicting entries? */ + res->parent = parent; + res->child = *firstpp; + res->sibling = *pp; + *firstpp = res; + *pp = NULL; + for (p = res->child; p != NULL; p = p->sibling) { + p->parent = res; + pr_debug("PCI: Reparented %s [%llx..%llx] under %s\n", + p->name, + (unsigned long long)p->start, + (unsigned long long)p->end, res->name); + } + return 0; +} + +/* + * Handle resources of PCI devices. If the world were perfect, we could + * just allocate all the resource regions and do nothing more. It isn't. + * On the other hand, we cannot just re-allocate all devices, as it would + * require us to know lots of host bridge internals. So we attempt to + * keep as much of the original configuration as possible, but tweak it + * when it's found to be wrong. + * + * Known BIOS problems we have to work around: + * - I/O or memory regions not configured + * - regions configured, but not enabled in the command register + * - bogus I/O addresses above 64K used + * - expansion ROMs left enabled (this may sound harmless, but given + * the fact the PCI specs explicitly allow address decoders to be + * shared between expansion ROMs and other resource regions, it's + * at least dangerous) + * + * Our solution: + * (1) Allocate resources for all buses behind PCI-to-PCI bridges. + * This gives us fixed barriers on where we can allocate. + * (2) Allocate resources for all enabled devices. If there is + * a collision, just mark the resource as unallocated. Also + * disable expansion ROMs during this step. + * (3) Try to allocate resources for disabled devices. If the + * resources were assigned correctly, everything goes well, + * if they weren't, they won't disturb allocation of other + * resources. + * (4) Assign new addresses to resources which were either + * not configured at all or misconfigured. If explicitly + * requested by the user, configure expansion ROM address + * as well. + */ + +void pcibios_allocate_bus_resources(struct pci_bus *bus) +{ + struct pci_bus *b; + int i; + struct resource *res, *pr; + + pr_debug("PCI: Allocating bus resources for %04x:%02x...\n", + pci_domain_nr(bus), bus->number); + + pci_bus_for_each_resource(bus, res, i) { + if (!res || !res->flags || res->start > res->end || res->parent) + continue; + + /* If the resource was left unset at this point, we clear it */ + if (res->flags & IORESOURCE_UNSET) + goto clear_resource; + + if (bus->parent == NULL) + pr = (res->flags & IORESOURCE_IO) ? + &ioport_resource : &iomem_resource; + else { + pr = pci_find_parent_resource(bus->self, res); + if (pr == res) { + /* this happens when the generic PCI + * code (wrongly) decides that this + * bridge is transparent -- paulus + */ + continue; + } + } + + pr_debug("PCI: %s (bus %d) bridge rsrc %d: %016llx-%016llx " + "[0x%x], parent %p (%s)\n", + bus->self ? pci_name(bus->self) : "PHB", + bus->number, i, + (unsigned long long)res->start, + (unsigned long long)res->end, + (unsigned int)res->flags, + pr, (pr && pr->name) ? pr->name : "nil"); + + if (pr && !(pr->flags & IORESOURCE_UNSET)) { + if (request_resource(pr, res) == 0) + continue; + /* + * Must be a conflict with an existing entry. + * Move that entry (or entries) under the + * bridge resource and try again. + */ + if (reparent_resources(pr, res) == 0) + continue; + } + pr_warning("PCI: Cannot allocate resource region " + "%d of PCI bridge %d, will remap\n", i, bus->number); + clear_resource: + res->start = res->end = 0; + res->flags = 0; + } + + list_for_each_entry(b, &bus->children, node) + pcibios_allocate_bus_resources(b); +} + +static inline void __devinit alloc_resource(struct pci_dev *dev, int idx) +{ + struct resource *pr, *r = &dev->resource[idx]; + + pr_debug("PCI: Allocating %s: Resource %d: %016llx..%016llx [%x]\n", + pci_name(dev), idx, + (unsigned long long)r->start, + (unsigned long long)r->end, + (unsigned int)r->flags); + + pr = pci_find_parent_resource(dev, r); + if (!pr || (pr->flags & IORESOURCE_UNSET) || + request_resource(pr, r) < 0) { + printk(KERN_WARNING "PCI: Cannot allocate resource region %d" + " of device %s, will remap\n", idx, pci_name(dev)); + if (pr) + pr_debug("PCI: parent is %p: %016llx-%016llx [%x]\n", + pr, + (unsigned long long)pr->start, + (unsigned long long)pr->end, + (unsigned int)pr->flags); + /* We'll assign a new address later */ + r->flags |= IORESOURCE_UNSET; + r->end -= r->start; + r->start = 0; + } +} + +static void __init pcibios_allocate_resources(int pass) +{ + struct pci_dev *dev = NULL; + int idx, disabled; + u16 command; + struct resource *r; + + for_each_pci_dev(dev) { + pci_read_config_word(dev, PCI_COMMAND, &command); + for (idx = 0; idx <= PCI_ROM_RESOURCE; idx++) { + r = &dev->resource[idx]; + if (r->parent) /* Already allocated */ + continue; + if (!r->flags || (r->flags & IORESOURCE_UNSET)) + continue; /* Not assigned at all */ + /* We only allocate ROMs on pass 1 just in case they + * have been screwed up by firmware + */ + if (idx == PCI_ROM_RESOURCE ) + disabled = 1; + if (r->flags & IORESOURCE_IO) + disabled = !(command & PCI_COMMAND_IO); + else + disabled = !(command & PCI_COMMAND_MEMORY); + if (pass == disabled) + alloc_resource(dev, idx); + } + if (pass) + continue; + r = &dev->resource[PCI_ROM_RESOURCE]; + if (r->flags) { + /* Turn the ROM off, leave the resource region, + * but keep it unregistered. + */ + u32 reg; + pci_read_config_dword(dev, dev->rom_base_reg, ®); + if (reg & PCI_ROM_ADDRESS_ENABLE) { + pr_debug("PCI: Switching off ROM of %s\n", + pci_name(dev)); + r->flags &= ~IORESOURCE_ROM_ENABLE; + pci_write_config_dword(dev, dev->rom_base_reg, + reg & ~PCI_ROM_ADDRESS_ENABLE); + } + } + } +} + +static void __init pcibios_reserve_legacy_regions(struct pci_bus *bus) +{ + struct pci_controller *hose = pci_bus_to_host(bus); + resource_size_t offset; + struct resource *res, *pres; + int i; + + pr_debug("Reserving legacy ranges for domain %04x\n", pci_domain_nr(bus)); + + /* Check for IO */ + if (!(hose->io_resource.flags & IORESOURCE_IO)) + goto no_io; + offset = (unsigned long)hose->io_base_virt - _IO_BASE; + res = kzalloc(sizeof(struct resource), GFP_KERNEL); + BUG_ON(res == NULL); + res->name = "Legacy IO"; + res->flags = IORESOURCE_IO; + res->start = offset; + res->end = (offset + 0xfff) & 0xfffffffful; + pr_debug("Candidate legacy IO: %pR\n", res); + if (request_resource(&hose->io_resource, res)) { + printk(KERN_DEBUG + "PCI %04x:%02x Cannot reserve Legacy IO %pR\n", + pci_domain_nr(bus), bus->number, res); + kfree(res); + } + + no_io: + /* Check for memory */ + offset = hose->pci_mem_offset; + pr_debug("hose mem offset: %016llx\n", (unsigned long long)offset); + for (i = 0; i < 3; i++) { + pres = &hose->mem_resources[i]; + if (!(pres->flags & IORESOURCE_MEM)) + continue; + pr_debug("hose mem res: %pR\n", pres); + if ((pres->start - offset) <= 0xa0000 && + (pres->end - offset) >= 0xbffff) + break; + } + if (i >= 3) + return; + res = kzalloc(sizeof(struct resource), GFP_KERNEL); + BUG_ON(res == NULL); + res->name = "Legacy VGA memory"; + res->flags = IORESOURCE_MEM; + res->start = 0xa0000 + offset; + res->end = 0xbffff + offset; + pr_debug("Candidate VGA memory: %pR\n", res); + if (request_resource(pres, res)) { + printk(KERN_DEBUG + "PCI %04x:%02x Cannot reserve VGA memory %pR\n", + pci_domain_nr(bus), bus->number, res); + kfree(res); + } +} + +void __init pcibios_resource_survey(void) +{ + struct pci_bus *b; + + /* Allocate and assign resources */ + list_for_each_entry(b, &pci_root_buses, node) + pcibios_allocate_bus_resources(b); + pcibios_allocate_resources(0); + pcibios_allocate_resources(1); + + /* Before we start assigning unassigned resource, we try to reserve + * the low IO area and the VGA memory area if they intersect the + * bus available resources to avoid allocating things on top of them + */ + if (!pci_has_flag(PCI_PROBE_ONLY)) { + list_for_each_entry(b, &pci_root_buses, node) + pcibios_reserve_legacy_regions(b); + } + + /* Now, if the platform didn't decide to blindly trust the firmware, + * we proceed to assigning things that were left unassigned + */ + if (!pci_has_flag(PCI_PROBE_ONLY)) { + pr_debug("PCI: Assigning unassigned resources...\n"); + pci_assign_unassigned_resources(); + } + + /* Call machine dependent fixup */ + if (ppc_md.pcibios_fixup) + ppc_md.pcibios_fixup(); +} + +#ifdef CONFIG_HOTPLUG + +/* This is used by the PCI hotplug driver to allocate resource + * of newly plugged busses. We can try to consolidate with the + * rest of the code later, for now, keep it as-is as our main + * resource allocation function doesn't deal with sub-trees yet. + */ +void pcibios_claim_one_bus(struct pci_bus *bus) +{ + struct pci_dev *dev; + struct pci_bus *child_bus; + + list_for_each_entry(dev, &bus->devices, bus_list) { + int i; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct resource *r = &dev->resource[i]; + + if (r->parent || !r->start || !r->flags) + continue; + + pr_debug("PCI: Claiming %s: " + "Resource %d: %016llx..%016llx [%x]\n", + pci_name(dev), i, + (unsigned long long)r->start, + (unsigned long long)r->end, + (unsigned int)r->flags); + + pci_claim_resource(dev, i); + } + } + + list_for_each_entry(child_bus, &bus->children, node) + pcibios_claim_one_bus(child_bus); +} + + +/* pcibios_finish_adding_to_bus + * + * This is to be called by the hotplug code after devices have been + * added to a bus, this include calling it for a PHB that is just + * being added + */ +void pcibios_finish_adding_to_bus(struct pci_bus *bus) +{ + pr_debug("PCI: Finishing adding to hotplug bus %04x:%02x\n", + pci_domain_nr(bus), bus->number); + + /* Allocate bus and devices resources */ + pcibios_allocate_bus_resources(bus); + pcibios_claim_one_bus(bus); + + /* Add new devices to global lists. Register in proc, sysfs. */ + pci_bus_add_devices(bus); + + /* Fixup EEH */ + eeh_add_device_tree_late(bus); +} +EXPORT_SYMBOL_GPL(pcibios_finish_adding_to_bus); + +#endif /* CONFIG_HOTPLUG */ + +int pcibios_enable_device(struct pci_dev *dev, int mask) +{ + if (ppc_md.pcibios_enable_device_hook) + if (ppc_md.pcibios_enable_device_hook(dev)) + return -EINVAL; + + return pci_enable_resources(dev, mask); +} + +resource_size_t pcibios_io_space_offset(struct pci_controller *hose) +{ + return (unsigned long) hose->io_base_virt - _IO_BASE; +} + +static void __devinit pcibios_setup_phb_resources(struct pci_controller *hose, struct list_head *resources) +{ + struct resource *res; + int i; + + /* Hookup PHB IO resource */ + res = &hose->io_resource; + + if (!res->flags) { + printk(KERN_WARNING "PCI: I/O resource not set for host" + " bridge %s (domain %d)\n", + hose->dn->full_name, hose->global_number); +#ifdef CONFIG_PPC32 + /* Workaround for lack of IO resource only on 32-bit */ + res->start = (unsigned long)hose->io_base_virt - isa_io_base; + res->end = res->start + IO_SPACE_LIMIT; + res->flags = IORESOURCE_IO; +#endif /* CONFIG_PPC32 */ + } + + pr_debug("PCI: PHB IO resource = %016llx-%016llx [%lx]\n", + (unsigned long long)res->start, + (unsigned long long)res->end, + (unsigned long)res->flags); + pci_add_resource_offset(resources, res, pcibios_io_space_offset(hose)); + + /* Hookup PHB Memory resources */ + for (i = 0; i < 3; ++i) { + res = &hose->mem_resources[i]; + if (!res->flags) { + if (i > 0) + continue; + printk(KERN_ERR "PCI: Memory resource 0 not set for " + "host bridge %s (domain %d)\n", + hose->dn->full_name, hose->global_number); +#ifdef CONFIG_PPC32 + /* Workaround for lack of MEM resource only on 32-bit */ + res->start = hose->pci_mem_offset; + res->end = (resource_size_t)-1LL; + res->flags = IORESOURCE_MEM; +#endif /* CONFIG_PPC32 */ + } + + pr_debug("PCI: PHB MEM resource %d = %016llx-%016llx [%lx]\n", i, + (unsigned long long)res->start, + (unsigned long long)res->end, + (unsigned long)res->flags); + pci_add_resource_offset(resources, res, hose->pci_mem_offset); + } + + pr_debug("PCI: PHB MEM offset = %016llx\n", + (unsigned long long)hose->pci_mem_offset); + pr_debug("PCI: PHB IO offset = %08lx\n", + (unsigned long)hose->io_base_virt - _IO_BASE); + +} + +/* + * Null PCI config access functions, for the case when we can't + * find a hose. + */ +#define NULL_PCI_OP(rw, size, type) \ +static int \ +null_##rw##_config_##size(struct pci_dev *dev, int offset, type val) \ +{ \ + return PCIBIOS_DEVICE_NOT_FOUND; \ +} + +static int +null_read_config(struct pci_bus *bus, unsigned int devfn, int offset, + int len, u32 *val) +{ + return PCIBIOS_DEVICE_NOT_FOUND; +} + +static int +null_write_config(struct pci_bus *bus, unsigned int devfn, int offset, + int len, u32 val) +{ + return PCIBIOS_DEVICE_NOT_FOUND; +} + +static struct pci_ops null_pci_ops = +{ + .read = null_read_config, + .write = null_write_config, +}; + +/* + * These functions are used early on before PCI scanning is done + * and all of the pci_dev and pci_bus structures have been created. + */ +static struct pci_bus * +fake_pci_bus(struct pci_controller *hose, int busnr) +{ + static struct pci_bus bus; + + if (hose == 0) { + printk(KERN_ERR "Can't find hose for PCI bus %d!\n", busnr); + } + bus.number = busnr; + bus.sysdata = hose; + bus.ops = hose? hose->ops: &null_pci_ops; + return &bus; +} + +#define EARLY_PCI_OP(rw, size, type) \ +int early_##rw##_config_##size(struct pci_controller *hose, int bus, \ + int devfn, int offset, type value) \ +{ \ + return pci_bus_##rw##_config_##size(fake_pci_bus(hose, bus), \ + devfn, offset, value); \ +} + +EARLY_PCI_OP(read, byte, u8 *) +EARLY_PCI_OP(read, word, u16 *) +EARLY_PCI_OP(read, dword, u32 *) +EARLY_PCI_OP(write, byte, u8) +EARLY_PCI_OP(write, word, u16) +EARLY_PCI_OP(write, dword, u32) + +extern int pci_bus_find_capability (struct pci_bus *bus, unsigned int devfn, int cap); +int early_find_capability(struct pci_controller *hose, int bus, int devfn, + int cap) +{ + return pci_bus_find_capability(fake_pci_bus(hose, bus), devfn, cap); +} + +struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus) +{ + struct pci_controller *hose = bus->sysdata; + + return of_node_get(hose->dn); +} + +/** + * pci_scan_phb - Given a pci_controller, setup and scan the PCI bus + * @hose: Pointer to the PCI host controller instance structure + */ +void __devinit pcibios_scan_phb(struct pci_controller *hose) +{ + LIST_HEAD(resources); + struct pci_bus *bus; + struct device_node *node = hose->dn; + int mode; + + pr_debug("PCI: Scanning PHB %s\n", + node ? node->full_name : "<NO NAME>"); + + /* Get some IO space for the new PHB */ + pcibios_setup_phb_io_space(hose); + + /* Wire up PHB bus resources */ + pcibios_setup_phb_resources(hose, &resources); + + /* Create an empty bus for the toplevel */ + bus = pci_create_root_bus(hose->parent, hose->first_busno, + hose->ops, hose, &resources); + if (bus == NULL) { + pr_err("Failed to create bus for PCI domain %04x\n", + hose->global_number); + pci_free_resource_list(&resources); + return; + } + bus->secondary = hose->first_busno; + hose->bus = bus; + + /* Get probe mode and perform scan */ + mode = PCI_PROBE_NORMAL; + if (node && ppc_md.pci_probe_mode) + mode = ppc_md.pci_probe_mode(bus); + pr_debug(" probe mode: %d\n", mode); + if (mode == PCI_PROBE_DEVTREE) { + bus->subordinate = hose->last_busno; + of_scan_bus(node, bus); + } + + if (mode == PCI_PROBE_NORMAL) + hose->last_busno = bus->subordinate = pci_scan_child_bus(bus); + + /* Platform gets a chance to do some global fixups before + * we proceed to resource allocation + */ + if (ppc_md.pcibios_fixup_phb) + ppc_md.pcibios_fixup_phb(hose); + + /* Configure PCI Express settings */ + if (bus && !pci_has_flag(PCI_PROBE_ONLY)) { + struct pci_bus *child; + list_for_each_entry(child, &bus->children, node) { + struct pci_dev *self = child->self; + if (!self) + continue; + pcie_bus_configure_settings(child, self->pcie_mpss); + } + } +} + +static void fixup_hide_host_resource_fsl(struct pci_dev *dev) +{ + int i, class = dev->class >> 8; + /* When configured as agent, programing interface = 1 */ + int prog_if = dev->class & 0xf; + + if ((class == PCI_CLASS_PROCESSOR_POWERPC || + class == PCI_CLASS_BRIDGE_OTHER) && + (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) && + (prog_if == 0) && + (dev->bus->parent == NULL)) { + for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { + dev->resource[i].start = 0; + dev->resource[i].end = 0; + dev->resource[i].flags = 0; + } + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MOTOROLA, PCI_ANY_ID, fixup_hide_host_resource_fsl); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_FREESCALE, PCI_ANY_ID, fixup_hide_host_resource_fsl); diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c new file mode 100644 index 00000000..4b06ec5a --- /dev/null +++ b/arch/powerpc/kernel/pci_32.c @@ -0,0 +1,310 @@ +/* + * Common pmac/prep/chrp pci routines. -- Cort + */ + +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/delay.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/capability.h> +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/bootmem.h> +#include <linux/irq.h> +#include <linux/list.h> +#include <linux/of.h> +#include <linux/slab.h> +#include <linux/export.h> + +#include <asm/processor.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/sections.h> +#include <asm/pci-bridge.h> +#include <asm/ppc-pci.h> +#include <asm/byteorder.h> +#include <asm/uaccess.h> +#include <asm/machdep.h> + +#undef DEBUG + +unsigned long isa_io_base = 0; +unsigned long pci_dram_offset = 0; +int pcibios_assign_bus_offset = 1; + +void pcibios_make_OF_bus_map(void); + +static void fixup_cpc710_pci64(struct pci_dev* dev); +static u8* pci_to_OF_bus_map; + +/* By default, we don't re-assign bus numbers. We do this only on + * some pmacs + */ +static int pci_assign_all_buses; + +static int pci_bus_count; + +/* This will remain NULL for now, until isa-bridge.c is made common + * to both 32-bit and 64-bit. + */ +struct pci_dev *isa_bridge_pcidev; +EXPORT_SYMBOL_GPL(isa_bridge_pcidev); + +static void +fixup_cpc710_pci64(struct pci_dev* dev) +{ + /* Hide the PCI64 BARs from the kernel as their content doesn't + * fit well in the resource management + */ + dev->resource[0].start = dev->resource[0].end = 0; + dev->resource[0].flags = 0; + dev->resource[1].start = dev->resource[1].end = 0; + dev->resource[1].flags = 0; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CPC710_PCI64, fixup_cpc710_pci64); + +/* + * Functions below are used on OpenFirmware machines. + */ +static void +make_one_node_map(struct device_node* node, u8 pci_bus) +{ + const int *bus_range; + int len; + + if (pci_bus >= pci_bus_count) + return; + bus_range = of_get_property(node, "bus-range", &len); + if (bus_range == NULL || len < 2 * sizeof(int)) { + printk(KERN_WARNING "Can't get bus-range for %s, " + "assuming it starts at 0\n", node->full_name); + pci_to_OF_bus_map[pci_bus] = 0; + } else + pci_to_OF_bus_map[pci_bus] = bus_range[0]; + + for_each_child_of_node(node, node) { + struct pci_dev* dev; + const unsigned int *class_code, *reg; + + class_code = of_get_property(node, "class-code", NULL); + if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI && + (*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS)) + continue; + reg = of_get_property(node, "reg", NULL); + if (!reg) + continue; + dev = pci_get_bus_and_slot(pci_bus, ((reg[0] >> 8) & 0xff)); + if (!dev || !dev->subordinate) { + pci_dev_put(dev); + continue; + } + make_one_node_map(node, dev->subordinate->number); + pci_dev_put(dev); + } +} + +void +pcibios_make_OF_bus_map(void) +{ + int i; + struct pci_controller *hose, *tmp; + struct property *map_prop; + struct device_node *dn; + + pci_to_OF_bus_map = kmalloc(pci_bus_count, GFP_KERNEL); + if (!pci_to_OF_bus_map) { + printk(KERN_ERR "Can't allocate OF bus map !\n"); + return; + } + + /* We fill the bus map with invalid values, that helps + * debugging. + */ + for (i=0; i<pci_bus_count; i++) + pci_to_OF_bus_map[i] = 0xff; + + /* For each hose, we begin searching bridges */ + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { + struct device_node* node = hose->dn; + + if (!node) + continue; + make_one_node_map(node, hose->first_busno); + } + dn = of_find_node_by_path("/"); + map_prop = of_find_property(dn, "pci-OF-bus-map", NULL); + if (map_prop) { + BUG_ON(pci_bus_count > map_prop->length); + memcpy(map_prop->value, pci_to_OF_bus_map, pci_bus_count); + } + of_node_put(dn); +#ifdef DEBUG + printk("PCI->OF bus map:\n"); + for (i=0; i<pci_bus_count; i++) { + if (pci_to_OF_bus_map[i] == 0xff) + continue; + printk("%d -> %d\n", i, pci_to_OF_bus_map[i]); + } +#endif +} + + +/* + * Returns the PCI device matching a given OF node + */ +int pci_device_from_OF_node(struct device_node *node, u8 *bus, u8 *devfn) +{ + struct pci_dev *dev = NULL; + const __be32 *reg; + int size; + + /* Check if it might have a chance to be a PCI device */ + if (!pci_find_hose_for_OF_device(node)) + return -ENODEV; + + reg = of_get_property(node, "reg", &size); + if (!reg || size < 5 * sizeof(u32)) + return -ENODEV; + + *bus = (be32_to_cpup(®[0]) >> 16) & 0xff; + *devfn = (be32_to_cpup(®[0]) >> 8) & 0xff; + + /* Ok, here we need some tweak. If we have already renumbered + * all busses, we can't rely on the OF bus number any more. + * the pci_to_OF_bus_map is not enough as several PCI busses + * may match the same OF bus number. + */ + if (!pci_to_OF_bus_map) + return 0; + + for_each_pci_dev(dev) + if (pci_to_OF_bus_map[dev->bus->number] == *bus && + dev->devfn == *devfn) { + *bus = dev->bus->number; + pci_dev_put(dev); + return 0; + } + + return -ENODEV; +} +EXPORT_SYMBOL(pci_device_from_OF_node); + +/* We create the "pci-OF-bus-map" property now so it appears in the + * /proc device tree + */ +void __init +pci_create_OF_bus_map(void) +{ + struct property* of_prop; + struct device_node *dn; + + of_prop = (struct property*) alloc_bootmem(sizeof(struct property) + 256); + if (!of_prop) + return; + dn = of_find_node_by_path("/"); + if (dn) { + memset(of_prop, -1, sizeof(struct property) + 256); + of_prop->name = "pci-OF-bus-map"; + of_prop->length = 256; + of_prop->value = &of_prop[1]; + prom_add_property(dn, of_prop); + of_node_put(dn); + } +} + +void __devinit pcibios_setup_phb_io_space(struct pci_controller *hose) +{ + unsigned long io_offset; + struct resource *res = &hose->io_resource; + + /* Fixup IO space offset */ + io_offset = pcibios_io_space_offset(hose); + res->start += io_offset; + res->end += io_offset; +} + +static int __init pcibios_init(void) +{ + struct pci_controller *hose, *tmp; + int next_busno = 0; + + printk(KERN_INFO "PCI: Probing PCI hardware\n"); + + if (pci_has_flag(PCI_REASSIGN_ALL_BUS)) + pci_assign_all_buses = 1; + + /* Scan all of the recorded PCI controllers. */ + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { + if (pci_assign_all_buses) + hose->first_busno = next_busno; + hose->last_busno = 0xff; + pcibios_scan_phb(hose); + pci_bus_add_devices(hose->bus); + if (pci_assign_all_buses || next_busno <= hose->last_busno) + next_busno = hose->last_busno + pcibios_assign_bus_offset; + } + pci_bus_count = next_busno; + + /* OpenFirmware based machines need a map of OF bus + * numbers vs. kernel bus numbers since we may have to + * remap them. + */ + if (pci_assign_all_buses) + pcibios_make_OF_bus_map(); + + /* Call common code to handle resource allocation */ + pcibios_resource_survey(); + + /* Call machine dependent post-init code */ + if (ppc_md.pcibios_after_init) + ppc_md.pcibios_after_init(); + + return 0; +} + +subsys_initcall(pcibios_init); + +static struct pci_controller* +pci_bus_to_hose(int bus) +{ + struct pci_controller *hose, *tmp; + + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) + if (bus >= hose->first_busno && bus <= hose->last_busno) + return hose; + return NULL; +} + +/* Provide information on locations of various I/O regions in physical + * memory. Do this on a per-card basis so that we choose the right + * root bridge. + * Note that the returned IO or memory base is a physical address + */ + +long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn) +{ + struct pci_controller* hose; + long result = -EOPNOTSUPP; + + hose = pci_bus_to_hose(bus); + if (!hose) + return -ENODEV; + + switch (which) { + case IOBASE_BRIDGE_NUMBER: + return (long)hose->first_busno; + case IOBASE_MEMORY: + return (long)hose->pci_mem_offset; + case IOBASE_IO: + return (long)hose->io_base_phys; + case IOBASE_ISA_IO: + return (long)isa_io_base; + case IOBASE_ISA_MEM: + return (long)isa_mem_base; + } + + return result; +} + + diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c new file mode 100644 index 00000000..94a54f61 --- /dev/null +++ b/arch/powerpc/kernel/pci_64.c @@ -0,0 +1,272 @@ +/* + * Port for PPC64 David Engebretsen, IBM Corp. + * Contains common pci routines for ppc64 platform, pSeries and iSeries brands. + * + * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM + * Rework, based on alpha PCI code. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG + +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/export.h> +#include <linux/mm.h> +#include <linux/list.h> +#include <linux/syscalls.h> +#include <linux/irq.h> +#include <linux/vmalloc.h> + +#include <asm/processor.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/pci-bridge.h> +#include <asm/byteorder.h> +#include <asm/machdep.h> +#include <asm/ppc-pci.h> + +/* pci_io_base -- the base address from which io bars are offsets. + * This is the lowest I/O base address (so bar values are always positive), + * and it *must* be the start of ISA space if an ISA bus exists because + * ISA drivers use hard coded offsets. If no ISA bus exists nothing + * is mapped on the first 64K of IO space + */ +unsigned long pci_io_base = ISA_IO_BASE; +EXPORT_SYMBOL(pci_io_base); + +static int __init pcibios_init(void) +{ + struct pci_controller *hose, *tmp; + + printk(KERN_INFO "PCI: Probing PCI hardware\n"); + + /* For now, override phys_mem_access_prot. If we need it,g + * later, we may move that initialization to each ppc_md + */ + ppc_md.phys_mem_access_prot = pci_phys_mem_access_prot; + + /* On ppc64, we always enable PCI domains and we keep domain 0 + * backward compatible in /proc for video cards + */ + pci_add_flags(PCI_ENABLE_PROC_DOMAINS | PCI_COMPAT_DOMAIN_0); + + /* Scan all of the recorded PCI controllers. */ + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { + pcibios_scan_phb(hose); + pci_bus_add_devices(hose->bus); + } + + /* Call common code to handle resource allocation */ + pcibios_resource_survey(); + + printk(KERN_DEBUG "PCI: Probing PCI hardware done\n"); + + return 0; +} + +subsys_initcall(pcibios_init); + +#ifdef CONFIG_HOTPLUG + +int pcibios_unmap_io_space(struct pci_bus *bus) +{ + struct pci_controller *hose; + + WARN_ON(bus == NULL); + + /* If this is not a PHB, we only flush the hash table over + * the area mapped by this bridge. We don't play with the PTE + * mappings since we might have to deal with sub-page alignemnts + * so flushing the hash table is the only sane way to make sure + * that no hash entries are covering that removed bridge area + * while still allowing other busses overlapping those pages + * + * Note: If we ever support P2P hotplug on Book3E, we'll have + * to do an appropriate TLB flush here too + */ + if (bus->self) { +#ifdef CONFIG_PPC_STD_MMU_64 + struct resource *res = bus->resource[0]; +#endif + + pr_debug("IO unmapping for PCI-PCI bridge %s\n", + pci_name(bus->self)); + +#ifdef CONFIG_PPC_STD_MMU_64 + __flush_hash_table_range(&init_mm, res->start + _IO_BASE, + res->end + _IO_BASE + 1); +#endif + return 0; + } + + /* Get the host bridge */ + hose = pci_bus_to_host(bus); + + /* Check if we have IOs allocated */ + if (hose->io_base_alloc == 0) + return 0; + + pr_debug("IO unmapping for PHB %s\n", hose->dn->full_name); + pr_debug(" alloc=0x%p\n", hose->io_base_alloc); + + /* This is a PHB, we fully unmap the IO area */ + vunmap(hose->io_base_alloc); + + return 0; +} +EXPORT_SYMBOL_GPL(pcibios_unmap_io_space); + +#endif /* CONFIG_HOTPLUG */ + +static int __devinit pcibios_map_phb_io_space(struct pci_controller *hose) +{ + struct vm_struct *area; + unsigned long phys_page; + unsigned long size_page; + unsigned long io_virt_offset; + + phys_page = _ALIGN_DOWN(hose->io_base_phys, PAGE_SIZE); + size_page = _ALIGN_UP(hose->pci_io_size, PAGE_SIZE); + + /* Make sure IO area address is clear */ + hose->io_base_alloc = NULL; + + /* If there's no IO to map on that bus, get away too */ + if (hose->pci_io_size == 0 || hose->io_base_phys == 0) + return 0; + + /* Let's allocate some IO space for that guy. We don't pass + * VM_IOREMAP because we don't care about alignment tricks that + * the core does in that case. Maybe we should due to stupid card + * with incomplete address decoding but I'd rather not deal with + * those outside of the reserved 64K legacy region. + */ + area = __get_vm_area(size_page, 0, PHB_IO_BASE, PHB_IO_END); + if (area == NULL) + return -ENOMEM; + hose->io_base_alloc = area->addr; + hose->io_base_virt = (void __iomem *)(area->addr + + hose->io_base_phys - phys_page); + + pr_debug("IO mapping for PHB %s\n", hose->dn->full_name); + pr_debug(" phys=0x%016llx, virt=0x%p (alloc=0x%p)\n", + hose->io_base_phys, hose->io_base_virt, hose->io_base_alloc); + pr_debug(" size=0x%016llx (alloc=0x%016lx)\n", + hose->pci_io_size, size_page); + + /* Establish the mapping */ + if (__ioremap_at(phys_page, area->addr, size_page, + _PAGE_NO_CACHE | _PAGE_GUARDED) == NULL) + return -ENOMEM; + + /* Fixup hose IO resource */ + io_virt_offset = pcibios_io_space_offset(hose); + hose->io_resource.start += io_virt_offset; + hose->io_resource.end += io_virt_offset; + + pr_debug(" hose->io_resource=%pR\n", &hose->io_resource); + + return 0; +} + +int __devinit pcibios_map_io_space(struct pci_bus *bus) +{ + WARN_ON(bus == NULL); + + /* If this not a PHB, nothing to do, page tables still exist and + * thus HPTEs will be faulted in when needed + */ + if (bus->self) { + pr_debug("IO mapping for PCI-PCI bridge %s\n", + pci_name(bus->self)); + pr_debug(" virt=0x%016llx...0x%016llx\n", + bus->resource[0]->start + _IO_BASE, + bus->resource[0]->end + _IO_BASE); + return 0; + } + + return pcibios_map_phb_io_space(pci_bus_to_host(bus)); +} +EXPORT_SYMBOL_GPL(pcibios_map_io_space); + +void __devinit pcibios_setup_phb_io_space(struct pci_controller *hose) +{ + pcibios_map_phb_io_space(hose); +} + +#define IOBASE_BRIDGE_NUMBER 0 +#define IOBASE_MEMORY 1 +#define IOBASE_IO 2 +#define IOBASE_ISA_IO 3 +#define IOBASE_ISA_MEM 4 + +long sys_pciconfig_iobase(long which, unsigned long in_bus, + unsigned long in_devfn) +{ + struct pci_controller* hose; + struct list_head *ln; + struct pci_bus *bus = NULL; + struct device_node *hose_node; + + /* Argh ! Please forgive me for that hack, but that's the + * simplest way to get existing XFree to not lockup on some + * G5 machines... So when something asks for bus 0 io base + * (bus 0 is HT root), we return the AGP one instead. + */ + if (in_bus == 0 && of_machine_is_compatible("MacRISC4")) { + struct device_node *agp; + + agp = of_find_compatible_node(NULL, NULL, "u3-agp"); + if (agp) + in_bus = 0xf0; + of_node_put(agp); + } + + /* That syscall isn't quite compatible with PCI domains, but it's + * used on pre-domains setup. We return the first match + */ + + for (ln = pci_root_buses.next; ln != &pci_root_buses; ln = ln->next) { + bus = pci_bus_b(ln); + if (in_bus >= bus->number && in_bus <= bus->subordinate) + break; + bus = NULL; + } + if (bus == NULL || bus->dev.of_node == NULL) + return -ENODEV; + + hose_node = bus->dev.of_node; + hose = PCI_DN(hose_node)->phb; + + switch (which) { + case IOBASE_BRIDGE_NUMBER: + return (long)hose->first_busno; + case IOBASE_MEMORY: + return (long)hose->pci_mem_offset; + case IOBASE_IO: + return (long)hose->io_base_phys; + case IOBASE_ISA_IO: + return (long)isa_io_base; + case IOBASE_ISA_MEM: + return -EINVAL; + } + + return -EOPNOTSUPP; +} + +#ifdef CONFIG_NUMA +int pcibus_to_node(struct pci_bus *bus) +{ + struct pci_controller *phb = pci_bus_to_host(bus); + return phb->node; +} +EXPORT_SYMBOL(pcibus_to_node); +#endif diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c new file mode 100644 index 00000000..dd9e4a04 --- /dev/null +++ b/arch/powerpc/kernel/pci_dn.c @@ -0,0 +1,165 @@ +/* + * pci_dn.c + * + * Copyright (C) 2001 Todd Inglett, IBM Corporation + * + * PCI manipulation via device_nodes. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/string.h> +#include <linux/export.h> +#include <linux/init.h> +#include <linux/gfp.h> + +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/pci-bridge.h> +#include <asm/ppc-pci.h> +#include <asm/firmware.h> + +/* + * Traverse_func that inits the PCI fields of the device node. + * NOTE: this *must* be done before read/write config to the device. + */ +void * __devinit update_dn_pci_info(struct device_node *dn, void *data) +{ + struct pci_controller *phb = data; + const int *type = + of_get_property(dn, "ibm,pci-config-space-type", NULL); + const u32 *regs; + struct pci_dn *pdn; + + pdn = zalloc_maybe_bootmem(sizeof(*pdn), GFP_KERNEL); + if (pdn == NULL) + return NULL; + dn->data = pdn; + pdn->node = dn; + pdn->phb = phb; +#ifdef CONFIG_PPC_POWERNV + pdn->pe_number = IODA_INVALID_PE; +#endif + regs = of_get_property(dn, "reg", NULL); + if (regs) { + /* First register entry is addr (00BBSS00) */ + pdn->busno = (regs[0] >> 16) & 0xff; + pdn->devfn = (regs[0] >> 8) & 0xff; + } + + pdn->pci_ext_config_space = (type && *type == 1); + return NULL; +} + +/* + * Traverse a device tree stopping each PCI device in the tree. + * This is done depth first. As each node is processed, a "pre" + * function is called and the children are processed recursively. + * + * The "pre" func returns a value. If non-zero is returned from + * the "pre" func, the traversal stops and this value is returned. + * This return value is useful when using traverse as a method of + * finding a device. + * + * NOTE: we do not run the func for devices that do not appear to + * be PCI except for the start node which we assume (this is good + * because the start node is often a phb which may be missing PCI + * properties). + * We use the class-code as an indicator. If we run into + * one of these nodes we also assume its siblings are non-pci for + * performance. + */ +void *traverse_pci_devices(struct device_node *start, traverse_func pre, + void *data) +{ + struct device_node *dn, *nextdn; + void *ret; + + /* We started with a phb, iterate all childs */ + for (dn = start->child; dn; dn = nextdn) { + const u32 *classp; + u32 class; + + nextdn = NULL; + classp = of_get_property(dn, "class-code", NULL); + class = classp ? *classp : 0; + + if (pre && ((ret = pre(dn, data)) != NULL)) + return ret; + + /* If we are a PCI bridge, go down */ + if (dn->child && ((class >> 8) == PCI_CLASS_BRIDGE_PCI || + (class >> 8) == PCI_CLASS_BRIDGE_CARDBUS)) + /* Depth first...do children */ + nextdn = dn->child; + else if (dn->sibling) + /* ok, try next sibling instead. */ + nextdn = dn->sibling; + if (!nextdn) { + /* Walk up to next valid sibling. */ + do { + dn = dn->parent; + if (dn == start) + return NULL; + } while (dn->sibling == NULL); + nextdn = dn->sibling; + } + } + return NULL; +} + +/** + * pci_devs_phb_init_dynamic - setup pci devices under this PHB + * phb: pci-to-host bridge (top-level bridge connecting to cpu) + * + * This routine is called both during boot, (before the memory + * subsystem is set up, before kmalloc is valid) and during the + * dynamic lpar operation of adding a PHB to a running system. + */ +void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb) +{ + struct device_node *dn = phb->dn; + struct pci_dn *pdn; + + /* PHB nodes themselves must not match */ + update_dn_pci_info(dn, phb); + pdn = dn->data; + if (pdn) { + pdn->devfn = pdn->busno = -1; + pdn->phb = phb; + } + + /* Update dn->phb ptrs for new phb and children devices */ + traverse_pci_devices(dn, update_dn_pci_info, phb); +} + +/** + * pci_devs_phb_init - Initialize phbs and pci devs under them. + * + * This routine walks over all phb's (pci-host bridges) on the + * system, and sets up assorted pci-related structures + * (including pci info in the device node structs) for each + * pci device found underneath. This routine runs once, + * early in the boot sequence. + */ +void __init pci_devs_phb_init(void) +{ + struct pci_controller *phb, *tmp; + + /* This must be done first so the device nodes have valid pci info! */ + list_for_each_entry_safe(phb, tmp, &hose_list, list_node) + pci_devs_phb_init_dynamic(phb); +} diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c new file mode 100644 index 00000000..89dde171 --- /dev/null +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -0,0 +1,373 @@ +/* + * Helper routines to scan the device tree for PCI devices and busses + * + * Migrated out of PowerPC architecture pci_64.c file by Grant Likely + * <grant.likely@secretlab.ca> so that these routines are available for + * 32 bit also. + * + * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM + * Rework, based on alpha PCI code. + * Copyright (c) 2009 Secret Lab Technologies Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ + +#include <linux/pci.h> +#include <linux/export.h> +#include <asm/pci-bridge.h> +#include <asm/prom.h> + +/** + * get_int_prop - Decode a u32 from a device tree property + */ +static u32 get_int_prop(struct device_node *np, const char *name, u32 def) +{ + const u32 *prop; + int len; + + prop = of_get_property(np, name, &len); + if (prop && len >= 4) + return *prop; + return def; +} + +/** + * pci_parse_of_flags - Parse the flags cell of a device tree PCI address + * @addr0: value of 1st cell of a device tree PCI address. + * @bridge: Set this flag if the address is from a bridge 'ranges' property + */ +unsigned int pci_parse_of_flags(u32 addr0, int bridge) +{ + unsigned int flags = 0; + + if (addr0 & 0x02000000) { + flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY; + flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64; + flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M; + if (addr0 & 0x40000000) + flags |= IORESOURCE_PREFETCH + | PCI_BASE_ADDRESS_MEM_PREFETCH; + /* Note: We don't know whether the ROM has been left enabled + * by the firmware or not. We mark it as disabled (ie, we do + * not set the IORESOURCE_ROM_ENABLE flag) for now rather than + * do a config space read, it will be force-enabled if needed + */ + if (!bridge && (addr0 & 0xff) == 0x30) + flags |= IORESOURCE_READONLY; + } else if (addr0 & 0x01000000) + flags = IORESOURCE_IO | PCI_BASE_ADDRESS_SPACE_IO; + if (flags) + flags |= IORESOURCE_SIZEALIGN; + return flags; +} + +/** + * of_pci_parse_addrs - Parse PCI addresses assigned in the device tree node + * @node: device tree node for the PCI device + * @dev: pci_dev structure for the device + * + * This function parses the 'assigned-addresses' property of a PCI devices' + * device tree node and writes them into the associated pci_dev structure. + */ +static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev) +{ + u64 base, size; + unsigned int flags; + struct pci_bus_region region; + struct resource *res; + const u32 *addrs; + u32 i; + int proplen; + + addrs = of_get_property(node, "assigned-addresses", &proplen); + if (!addrs) + return; + pr_debug(" parse addresses (%d bytes) @ %p\n", proplen, addrs); + for (; proplen >= 20; proplen -= 20, addrs += 5) { + flags = pci_parse_of_flags(addrs[0], 0); + if (!flags) + continue; + base = of_read_number(&addrs[1], 2); + size = of_read_number(&addrs[3], 2); + if (!size) + continue; + i = addrs[0] & 0xff; + pr_debug(" base: %llx, size: %llx, i: %x\n", + (unsigned long long)base, + (unsigned long long)size, i); + + if (PCI_BASE_ADDRESS_0 <= i && i <= PCI_BASE_ADDRESS_5) { + res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2]; + } else if (i == dev->rom_base_reg) { + res = &dev->resource[PCI_ROM_RESOURCE]; + flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE; + } else { + printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i); + continue; + } + res->flags = flags; + res->name = pci_name(dev); + region.start = base; + region.end = base + size - 1; + pcibios_bus_to_resource(dev, res, ®ion); + } +} + +/** + * of_create_pci_dev - Given a device tree node on a pci bus, create a pci_dev + * @node: device tree node pointer + * @bus: bus the device is sitting on + * @devfn: PCI function number, extracted from device tree by caller. + */ +struct pci_dev *of_create_pci_dev(struct device_node *node, + struct pci_bus *bus, int devfn) +{ + struct pci_dev *dev; + const char *type; + struct pci_slot *slot; + + dev = alloc_pci_dev(); + if (!dev) + return NULL; + type = of_get_property(node, "device_type", NULL); + if (type == NULL) + type = ""; + + pr_debug(" create device, devfn: %x, type: %s\n", devfn, type); + + dev->bus = bus; + dev->dev.of_node = of_node_get(node); + dev->dev.parent = bus->bridge; + dev->dev.bus = &pci_bus_type; + dev->devfn = devfn; + dev->multifunction = 0; /* maybe a lie? */ + dev->needs_freset = 0; /* pcie fundamental reset required */ + set_pcie_port_type(dev); + + list_for_each_entry(slot, &dev->bus->slots, list) + if (PCI_SLOT(dev->devfn) == slot->number) + dev->slot = slot; + + dev->vendor = get_int_prop(node, "vendor-id", 0xffff); + dev->device = get_int_prop(node, "device-id", 0xffff); + dev->subsystem_vendor = get_int_prop(node, "subsystem-vendor-id", 0); + dev->subsystem_device = get_int_prop(node, "subsystem-id", 0); + + dev->cfg_size = pci_cfg_space_size(dev); + + dev_set_name(&dev->dev, "%04x:%02x:%02x.%d", pci_domain_nr(bus), + dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn)); + dev->class = get_int_prop(node, "class-code", 0); + dev->revision = get_int_prop(node, "revision-id", 0); + + pr_debug(" class: 0x%x\n", dev->class); + pr_debug(" revision: 0x%x\n", dev->revision); + + dev->current_state = 4; /* unknown power state */ + dev->error_state = pci_channel_io_normal; + dev->dma_mask = 0xffffffff; + + /* Early fixups, before probing the BARs */ + pci_fixup_device(pci_fixup_early, dev); + + if (!strcmp(type, "pci") || !strcmp(type, "pciex")) { + /* a PCI-PCI bridge */ + dev->hdr_type = PCI_HEADER_TYPE_BRIDGE; + dev->rom_base_reg = PCI_ROM_ADDRESS1; + set_pcie_hotplug_bridge(dev); + } else if (!strcmp(type, "cardbus")) { + dev->hdr_type = PCI_HEADER_TYPE_CARDBUS; + } else { + dev->hdr_type = PCI_HEADER_TYPE_NORMAL; + dev->rom_base_reg = PCI_ROM_ADDRESS; + /* Maybe do a default OF mapping here */ + dev->irq = NO_IRQ; + } + + of_pci_parse_addrs(node, dev); + + pr_debug(" adding to system ...\n"); + + pci_device_add(dev, bus); + + return dev; +} +EXPORT_SYMBOL(of_create_pci_dev); + +/** + * of_scan_pci_bridge - Set up a PCI bridge and scan for child nodes + * @node: device tree node of bridge + * @dev: pci_dev structure for the bridge + * + * of_scan_bus() calls this routine for each PCI bridge that it finds, and + * this routine in turn call of_scan_bus() recusively to scan for more child + * devices. + */ +void __devinit of_scan_pci_bridge(struct pci_dev *dev) +{ + struct device_node *node = dev->dev.of_node; + struct pci_bus *bus; + const u32 *busrange, *ranges; + int len, i, mode; + struct pci_bus_region region; + struct resource *res; + unsigned int flags; + u64 size; + + pr_debug("of_scan_pci_bridge(%s)\n", node->full_name); + + /* parse bus-range property */ + busrange = of_get_property(node, "bus-range", &len); + if (busrange == NULL || len != 8) { + printk(KERN_DEBUG "Can't get bus-range for PCI-PCI bridge %s\n", + node->full_name); + return; + } + ranges = of_get_property(node, "ranges", &len); + if (ranges == NULL) { + printk(KERN_DEBUG "Can't get ranges for PCI-PCI bridge %s\n", + node->full_name); + return; + } + + bus = pci_add_new_bus(dev->bus, dev, busrange[0]); + if (!bus) { + printk(KERN_ERR "Failed to create pci bus for %s\n", + node->full_name); + return; + } + + bus->primary = dev->bus->number; + bus->subordinate = busrange[1]; + bus->bridge_ctl = 0; + + /* parse ranges property */ + /* PCI #address-cells == 3 and #size-cells == 2 always */ + res = &dev->resource[PCI_BRIDGE_RESOURCES]; + for (i = 0; i < PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES; ++i) { + res->flags = 0; + bus->resource[i] = res; + ++res; + } + i = 1; + for (; len >= 32; len -= 32, ranges += 8) { + flags = pci_parse_of_flags(ranges[0], 1); + size = of_read_number(&ranges[6], 2); + if (flags == 0 || size == 0) + continue; + if (flags & IORESOURCE_IO) { + res = bus->resource[0]; + if (res->flags) { + printk(KERN_ERR "PCI: ignoring extra I/O range" + " for bridge %s\n", node->full_name); + continue; + } + } else { + if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) { + printk(KERN_ERR "PCI: too many memory ranges" + " for bridge %s\n", node->full_name); + continue; + } + res = bus->resource[i]; + ++i; + } + res->flags = flags; + region.start = of_read_number(&ranges[1], 2); + region.end = region.start + size - 1; + pcibios_bus_to_resource(dev, res, ®ion); + } + sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus), + bus->number); + pr_debug(" bus name: %s\n", bus->name); + + mode = PCI_PROBE_NORMAL; + if (ppc_md.pci_probe_mode) + mode = ppc_md.pci_probe_mode(bus); + pr_debug(" probe mode: %d\n", mode); + + if (mode == PCI_PROBE_DEVTREE) + of_scan_bus(node, bus); + else if (mode == PCI_PROBE_NORMAL) + pci_scan_child_bus(bus); +} +EXPORT_SYMBOL(of_scan_pci_bridge); + +/** + * __of_scan_bus - given a PCI bus node, setup bus and scan for child devices + * @node: device tree node for the PCI bus + * @bus: pci_bus structure for the PCI bus + * @rescan_existing: Flag indicating bus has already been set up + */ +static void __devinit __of_scan_bus(struct device_node *node, + struct pci_bus *bus, int rescan_existing) +{ + struct device_node *child; + const u32 *reg; + int reglen, devfn; + struct pci_dev *dev; + + pr_debug("of_scan_bus(%s) bus no %d...\n", + node->full_name, bus->number); + + /* Scan direct children */ + for_each_child_of_node(node, child) { + pr_debug(" * %s\n", child->full_name); + if (!of_device_is_available(child)) + continue; + reg = of_get_property(child, "reg", ®len); + if (reg == NULL || reglen < 20) + continue; + devfn = (reg[0] >> 8) & 0xff; + + /* create a new pci_dev for this device */ + dev = of_create_pci_dev(child, bus, devfn); + if (!dev) + continue; + pr_debug(" dev header type: %x\n", dev->hdr_type); + } + + /* Apply all fixups necessary. We don't fixup the bus "self" + * for an existing bridge that is being rescanned + */ + if (!rescan_existing) + pcibios_setup_bus_self(bus); + pcibios_setup_bus_devices(bus); + + /* Now scan child busses */ + list_for_each_entry(dev, &bus->devices, bus_list) { + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || + dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) { + of_scan_pci_bridge(dev); + } + } +} + +/** + * of_scan_bus - given a PCI bus node, setup bus and scan for child devices + * @node: device tree node for the PCI bus + * @bus: pci_bus structure for the PCI bus + */ +void __devinit of_scan_bus(struct device_node *node, + struct pci_bus *bus) +{ + __of_scan_bus(node, bus, 0); +} +EXPORT_SYMBOL_GPL(of_scan_bus); + +/** + * of_rescan_bus - given a PCI bus node, scan for child devices + * @node: device tree node for the PCI bus + * @bus: pci_bus structure for the PCI bus + * + * Same as of_scan_bus, but for a pci_bus structure that has already been + * setup. + */ +void __devinit of_rescan_bus(struct device_node *node, + struct pci_bus *bus) +{ + __of_scan_bus(node, bus, 1); +} +EXPORT_SYMBOL_GPL(of_rescan_bus); + diff --git a/arch/powerpc/kernel/pmc.c b/arch/powerpc/kernel/pmc.c new file mode 100644 index 00000000..58eaa3dd --- /dev/null +++ b/arch/powerpc/kernel/pmc.c @@ -0,0 +1,102 @@ +/* + * arch/powerpc/kernel/pmc.c + * + * Copyright (C) 2004 David Gibson, IBM Corporation. + * Includes code formerly from arch/ppc/kernel/perfmon.c: + * Author: Andy Fleming + * Copyright (c) 2004 Freescale Semiconductor, Inc + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/errno.h> +#include <linux/bug.h> +#include <linux/spinlock.h> +#include <linux/export.h> + +#include <asm/processor.h> +#include <asm/cputable.h> +#include <asm/pmc.h> + +#ifndef MMCR0_PMAO +#define MMCR0_PMAO 0 +#endif + +static void dummy_perf(struct pt_regs *regs) +{ +#if defined(CONFIG_FSL_EMB_PERFMON) + mtpmr(PMRN_PMGC0, mfpmr(PMRN_PMGC0) & ~PMGC0_PMIE); +#elif defined(CONFIG_PPC64) || defined(CONFIG_6xx) + if (cur_cpu_spec->pmc_type == PPC_PMC_IBM) + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~(MMCR0_PMXE|MMCR0_PMAO)); +#else + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMXE); +#endif +} + + +static DEFINE_RAW_SPINLOCK(pmc_owner_lock); +static void *pmc_owner_caller; /* mostly for debugging */ +perf_irq_t perf_irq = dummy_perf; + +int reserve_pmc_hardware(perf_irq_t new_perf_irq) +{ + int err = 0; + + raw_spin_lock(&pmc_owner_lock); + + if (pmc_owner_caller) { + printk(KERN_WARNING "reserve_pmc_hardware: " + "PMC hardware busy (reserved by caller %p)\n", + pmc_owner_caller); + err = -EBUSY; + goto out; + } + + pmc_owner_caller = __builtin_return_address(0); + perf_irq = new_perf_irq ? new_perf_irq : dummy_perf; + + out: + raw_spin_unlock(&pmc_owner_lock); + return err; +} +EXPORT_SYMBOL_GPL(reserve_pmc_hardware); + +void release_pmc_hardware(void) +{ + raw_spin_lock(&pmc_owner_lock); + + WARN_ON(! pmc_owner_caller); + + pmc_owner_caller = NULL; + perf_irq = dummy_perf; + + raw_spin_unlock(&pmc_owner_lock); +} +EXPORT_SYMBOL_GPL(release_pmc_hardware); + +#ifdef CONFIG_PPC64 +void power4_enable_pmcs(void) +{ + unsigned long hid0; + + hid0 = mfspr(SPRN_HID0); + hid0 |= 1UL << (63 - 20); + + /* POWER4 requires the following sequence */ + asm volatile( + "sync\n" + "mtspr %1, %0\n" + "mfspr %0, %1\n" + "mfspr %0, %1\n" + "mfspr %0, %1\n" + "mfspr %0, %1\n" + "mfspr %0, %1\n" + "mfspr %0, %1\n" + "isync" : "=&r" (hid0) : "i" (SPRN_HID0), "0" (hid0): + "memory"); +} +#endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/ppc32.h b/arch/powerpc/kernel/ppc32.h new file mode 100644 index 00000000..dc16aefe --- /dev/null +++ b/arch/powerpc/kernel/ppc32.h @@ -0,0 +1,139 @@ +#ifndef _PPC64_PPC32_H +#define _PPC64_PPC32_H + +#include <linux/compat.h> +#include <asm/siginfo.h> +#include <asm/signal.h> + +/* + * Data types and macros for providing 32b PowerPC support. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* These are here to support 32-bit syscalls on a 64-bit kernel. */ + +typedef struct compat_siginfo { + int si_signo; + int si_errno; + int si_code; + + union { + int _pad[SI_PAD_SIZE32]; + + /* kill() */ + struct { + compat_pid_t _pid; /* sender's pid */ + compat_uid_t _uid; /* sender's uid */ + } _kill; + + /* POSIX.1b timers */ + struct { + compat_timer_t _tid; /* timer id */ + int _overrun; /* overrun count */ + compat_sigval_t _sigval; /* same as below */ + int _sys_private; /* not to be passed to user */ + } _timer; + + /* POSIX.1b signals */ + struct { + compat_pid_t _pid; /* sender's pid */ + compat_uid_t _uid; /* sender's uid */ + compat_sigval_t _sigval; + } _rt; + + /* SIGCHLD */ + struct { + compat_pid_t _pid; /* which child */ + compat_uid_t _uid; /* sender's uid */ + int _status; /* exit code */ + compat_clock_t _utime; + compat_clock_t _stime; + } _sigchld; + + /* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGEMT */ + struct { + unsigned int _addr; /* faulting insn/memory ref. */ + } _sigfault; + + /* SIGPOLL */ + struct { + int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ + int _fd; + } _sigpoll; + } _sifields; +} compat_siginfo_t; + +#define __old_sigaction32 old_sigaction32 + +struct __old_sigaction32 { + compat_uptr_t sa_handler; + compat_old_sigset_t sa_mask; + unsigned int sa_flags; + compat_uptr_t sa_restorer; /* not used by Linux/SPARC yet */ +}; + + + +struct sigaction32 { + compat_uptr_t sa_handler; /* Really a pointer, but need to deal with 32 bits */ + unsigned int sa_flags; + compat_uptr_t sa_restorer; /* Another 32 bit pointer */ + compat_sigset_t sa_mask; /* A 32 bit mask */ +}; + +typedef struct sigaltstack_32 { + unsigned int ss_sp; + int ss_flags; + compat_size_t ss_size; +} stack_32_t; + +struct pt_regs32 { + unsigned int gpr[32]; + unsigned int nip; + unsigned int msr; + unsigned int orig_gpr3; /* Used for restarting system calls */ + unsigned int ctr; + unsigned int link; + unsigned int xer; + unsigned int ccr; + unsigned int mq; /* 601 only (not used at present) */ + unsigned int trap; /* Reason for being here */ + unsigned int dar; /* Fault registers */ + unsigned int dsisr; + unsigned int result; /* Result of a system call */ +}; + +struct sigcontext32 { + unsigned int _unused[4]; + int signal; + compat_uptr_t handler; + unsigned int oldmask; + compat_uptr_t regs; /* 4 byte pointer to the pt_regs32 structure. */ +}; + +struct mcontext32 { + elf_gregset_t32 mc_gregs; + elf_fpregset_t mc_fregs; + unsigned int mc_pad[2]; + elf_vrregset_t32 mc_vregs __attribute__((__aligned__(16))); + elf_vsrreghalf_t32 mc_vsregs __attribute__((__aligned__(16))); +}; + +struct ucontext32 { + unsigned int uc_flags; + unsigned int uc_link; + stack_32_t uc_stack; + int uc_pad[7]; + compat_uptr_t uc_regs; /* points to uc_mcontext field */ + compat_sigset_t uc_sigmask; /* mask last for extensibility */ + /* glibc has 1024-bit signal masks, ours are 64-bit */ + int uc_maskext[30]; + int uc_pad2[3]; + struct mcontext32 uc_mcontext; +}; + +#endif /* _PPC64_PPC32_H */ diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c new file mode 100644 index 00000000..786a2700 --- /dev/null +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -0,0 +1,192 @@ +#include <linux/export.h> +#include <linux/threads.h> +#include <linux/smp.h> +#include <linux/sched.h> +#include <linux/elfcore.h> +#include <linux/string.h> +#include <linux/interrupt.h> +#include <linux/screen_info.h> +#include <linux/vt_kern.h> +#include <linux/nvram.h> +#include <linux/irq.h> +#include <linux/pci.h> +#include <linux/delay.h> +#include <linux/bitops.h> + +#include <asm/page.h> +#include <asm/processor.h> +#include <asm/cacheflush.h> +#include <asm/uaccess.h> +#include <asm/io.h> +#include <linux/atomic.h> +#include <asm/checksum.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> +#include <linux/adb.h> +#include <linux/cuda.h> +#include <linux/pmu.h> +#include <asm/prom.h> +#include <asm/pci-bridge.h> +#include <asm/irq.h> +#include <asm/pmac_feature.h> +#include <asm/dma.h> +#include <asm/machdep.h> +#include <asm/hw_irq.h> +#include <asm/nvram.h> +#include <asm/mmu_context.h> +#include <asm/backlight.h> +#include <asm/time.h> +#include <asm/cputable.h> +#include <asm/btext.h> +#include <asm/div64.h> +#include <asm/signal.h> +#include <asm/dcr.h> +#include <asm/ftrace.h> +#include <asm/switch_to.h> + +#ifdef CONFIG_PPC32 +extern void transfer_to_handler(void); +extern void do_IRQ(struct pt_regs *regs); +extern void machine_check_exception(struct pt_regs *regs); +extern void alignment_exception(struct pt_regs *regs); +extern void program_check_exception(struct pt_regs *regs); +extern void single_step_exception(struct pt_regs *regs); +extern int sys_sigreturn(struct pt_regs *regs); + +EXPORT_SYMBOL(clear_pages); +EXPORT_SYMBOL(ISA_DMA_THRESHOLD); +EXPORT_SYMBOL(DMA_MODE_READ); +EXPORT_SYMBOL(DMA_MODE_WRITE); + +EXPORT_SYMBOL(transfer_to_handler); +EXPORT_SYMBOL(do_IRQ); +EXPORT_SYMBOL(machine_check_exception); +EXPORT_SYMBOL(alignment_exception); +EXPORT_SYMBOL(program_check_exception); +EXPORT_SYMBOL(single_step_exception); +EXPORT_SYMBOL(sys_sigreturn); +#endif + +#ifdef CONFIG_FUNCTION_TRACER +EXPORT_SYMBOL(_mcount); +#endif + +EXPORT_SYMBOL(strcpy); +EXPORT_SYMBOL(strncpy); +EXPORT_SYMBOL(strcat); +EXPORT_SYMBOL(strlen); +EXPORT_SYMBOL(strcmp); +EXPORT_SYMBOL(strncmp); + +EXPORT_SYMBOL(csum_partial); +EXPORT_SYMBOL(csum_partial_copy_generic); +EXPORT_SYMBOL(ip_fast_csum); +EXPORT_SYMBOL(csum_tcpudp_magic); + +EXPORT_SYMBOL(__copy_tofrom_user); +EXPORT_SYMBOL(__clear_user); +EXPORT_SYMBOL(__strncpy_from_user); +EXPORT_SYMBOL(__strnlen_user); +EXPORT_SYMBOL(copy_page); + +#if defined(CONFIG_PCI) && defined(CONFIG_PPC32) +EXPORT_SYMBOL(isa_io_base); +EXPORT_SYMBOL(isa_mem_base); +EXPORT_SYMBOL(pci_dram_offset); +#endif /* CONFIG_PCI */ + +EXPORT_SYMBOL(start_thread); +EXPORT_SYMBOL(kernel_thread); + +EXPORT_SYMBOL(giveup_fpu); +#ifdef CONFIG_ALTIVEC +EXPORT_SYMBOL(giveup_altivec); +#endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_VSX +EXPORT_SYMBOL(giveup_vsx); +EXPORT_SYMBOL_GPL(__giveup_vsx); +#endif /* CONFIG_VSX */ +#ifdef CONFIG_SPE +EXPORT_SYMBOL(giveup_spe); +#endif /* CONFIG_SPE */ + +#ifndef CONFIG_PPC64 +EXPORT_SYMBOL(flush_instruction_cache); +#endif +EXPORT_SYMBOL(__flush_icache_range); +EXPORT_SYMBOL(flush_dcache_range); + +#ifdef CONFIG_SMP +#ifdef CONFIG_PPC32 +EXPORT_SYMBOL(smp_hw_index); +#endif +#endif + +#ifdef CONFIG_ADB +EXPORT_SYMBOL(adb_request); +EXPORT_SYMBOL(adb_register); +EXPORT_SYMBOL(adb_unregister); +EXPORT_SYMBOL(adb_poll); +EXPORT_SYMBOL(adb_try_handler_change); +#endif /* CONFIG_ADB */ +#ifdef CONFIG_ADB_CUDA +EXPORT_SYMBOL(cuda_request); +EXPORT_SYMBOL(cuda_poll); +#endif /* CONFIG_ADB_CUDA */ +EXPORT_SYMBOL(to_tm); + +#ifdef CONFIG_PPC32 +long long __ashrdi3(long long, int); +long long __ashldi3(long long, int); +long long __lshrdi3(long long, int); +EXPORT_SYMBOL(__ashrdi3); +EXPORT_SYMBOL(__ashldi3); +EXPORT_SYMBOL(__lshrdi3); +int __ucmpdi2(unsigned long long, unsigned long long); +EXPORT_SYMBOL(__ucmpdi2); +#endif + +EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(memmove); +EXPORT_SYMBOL(memcmp); +EXPORT_SYMBOL(memchr); + +#if defined(CONFIG_FB_VGA16_MODULE) +EXPORT_SYMBOL(screen_info); +#endif + +#ifdef CONFIG_PPC32 +EXPORT_SYMBOL(timer_interrupt); +EXPORT_SYMBOL(tb_ticks_per_jiffy); +EXPORT_SYMBOL(cacheable_memcpy); +EXPORT_SYMBOL(cacheable_memzero); +#endif + +#ifdef CONFIG_PPC32 +EXPORT_SYMBOL(switch_mmu_context); +#endif + +#ifdef CONFIG_PPC_STD_MMU_32 +extern long mol_trampoline; +EXPORT_SYMBOL(mol_trampoline); /* For MOL */ +EXPORT_SYMBOL(flush_hash_pages); /* For MOL */ +#ifdef CONFIG_SMP +extern int mmu_hash_lock; +EXPORT_SYMBOL(mmu_hash_lock); /* For MOL */ +#endif /* CONFIG_SMP */ +extern long *intercept_table; +EXPORT_SYMBOL(intercept_table); +#endif /* CONFIG_PPC_STD_MMU_32 */ +#ifdef CONFIG_PPC_DCR_NATIVE +EXPORT_SYMBOL(__mtdcr); +EXPORT_SYMBOL(__mfdcr); +#endif +EXPORT_SYMBOL(empty_zero_page); + +#ifdef CONFIG_PPC64 +EXPORT_SYMBOL(__arch_hweight8); +EXPORT_SYMBOL(__arch_hweight16); +EXPORT_SYMBOL(__arch_hweight32); +EXPORT_SYMBOL(__arch_hweight64); +#endif diff --git a/arch/powerpc/kernel/ppc_save_regs.S b/arch/powerpc/kernel/ppc_save_regs.S new file mode 100644 index 00000000..1b1787d5 --- /dev/null +++ b/arch/powerpc/kernel/ppc_save_regs.S @@ -0,0 +1,75 @@ +/* + * Copyright (C) 1996 Paul Mackerras. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * NOTE: assert(sizeof(buf) > 23 * sizeof(long)) + */ +#include <asm/processor.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/ptrace.h> + +/* + * Grab the register values as they are now. + * This won't do a particularly good job because we really + * want our caller's caller's registers, and our caller has + * already executed its prologue. + * ToDo: We could reach back into the caller's save area to do + * a better job of representing the caller's state (note that + * that will be different for 32-bit and 64-bit, because of the + * different ABIs, though). + */ +_GLOBAL(ppc_save_regs) + PPC_STL r0,0*SZL(r3) + PPC_STL r2,2*SZL(r3) + PPC_STL r3,3*SZL(r3) + PPC_STL r4,4*SZL(r3) + PPC_STL r5,5*SZL(r3) + PPC_STL r6,6*SZL(r3) + PPC_STL r7,7*SZL(r3) + PPC_STL r8,8*SZL(r3) + PPC_STL r9,9*SZL(r3) + PPC_STL r10,10*SZL(r3) + PPC_STL r11,11*SZL(r3) + PPC_STL r12,12*SZL(r3) + PPC_STL r13,13*SZL(r3) + PPC_STL r14,14*SZL(r3) + PPC_STL r15,15*SZL(r3) + PPC_STL r16,16*SZL(r3) + PPC_STL r17,17*SZL(r3) + PPC_STL r18,18*SZL(r3) + PPC_STL r19,19*SZL(r3) + PPC_STL r20,20*SZL(r3) + PPC_STL r21,21*SZL(r3) + PPC_STL r22,22*SZL(r3) + PPC_STL r23,23*SZL(r3) + PPC_STL r24,24*SZL(r3) + PPC_STL r25,25*SZL(r3) + PPC_STL r26,26*SZL(r3) + PPC_STL r27,27*SZL(r3) + PPC_STL r28,28*SZL(r3) + PPC_STL r29,29*SZL(r3) + PPC_STL r30,30*SZL(r3) + PPC_STL r31,31*SZL(r3) + /* go up one stack frame for SP */ + PPC_LL r4,0(r1) + PPC_STL r4,1*SZL(r3) + /* get caller's LR */ + PPC_LL r0,LRSAVE(r4) + PPC_STL r0,_NIP-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_LINK-STACK_FRAME_OVERHEAD(r3) + mfmsr r0 + PPC_STL r0,_MSR-STACK_FRAME_OVERHEAD(r3) + mfctr r0 + PPC_STL r0,_CTR-STACK_FRAME_OVERHEAD(r3) + mfxer r0 + PPC_STL r0,_XER-STACK_FRAME_OVERHEAD(r3) + mfcr r0 + PPC_STL r0,_CCR-STACK_FRAME_OVERHEAD(r3) + li r0,0 + PPC_STL r0,_TRAP-STACK_FRAME_OVERHEAD(r3) + blr diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c new file mode 100644 index 00000000..c8ae3714 --- /dev/null +++ b/arch/powerpc/kernel/proc_powerpc.c @@ -0,0 +1,125 @@ +/* + * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/proc_fs.h> +#include <linux/kernel.h> + +#include <asm/machdep.h> +#include <asm/vdso_datapage.h> +#include <asm/rtas.h> +#include <asm/uaccess.h> +#include <asm/prom.h> + +#ifdef CONFIG_PPC64 + +static loff_t page_map_seek( struct file *file, loff_t off, int whence) +{ + loff_t new; + struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + + switch(whence) { + case 0: + new = off; + break; + case 1: + new = file->f_pos + off; + break; + case 2: + new = dp->size + off; + break; + default: + return -EINVAL; + } + if ( new < 0 || new > dp->size ) + return -EINVAL; + return (file->f_pos = new); +} + +static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes, + loff_t *ppos) +{ + struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + return simple_read_from_buffer(buf, nbytes, ppos, dp->data, dp->size); +} + +static int page_map_mmap( struct file *file, struct vm_area_struct *vma ) +{ + struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + + if ((vma->vm_end - vma->vm_start) > dp->size) + return -EINVAL; + + remap_pfn_range(vma, vma->vm_start, __pa(dp->data) >> PAGE_SHIFT, + dp->size, vma->vm_page_prot); + return 0; +} + +static const struct file_operations page_map_fops = { + .llseek = page_map_seek, + .read = page_map_read, + .mmap = page_map_mmap +}; + + +static int __init proc_ppc64_init(void) +{ + struct proc_dir_entry *pde; + + pde = proc_create_data("powerpc/systemcfg", S_IFREG|S_IRUGO, NULL, + &page_map_fops, vdso_data); + if (!pde) + return 1; + pde->size = PAGE_SIZE; + + return 0; +} +__initcall(proc_ppc64_init); + +#endif /* CONFIG_PPC64 */ + +/* + * Create the ppc64 and ppc64/rtas directories early. This allows us to + * assume that they have been previously created in drivers. + */ +static int __init proc_ppc64_create(void) +{ + struct proc_dir_entry *root; + + root = proc_mkdir("powerpc", NULL); + if (!root) + return 1; + +#ifdef CONFIG_PPC64 + if (!proc_symlink("ppc64", NULL, "powerpc")) + pr_err("Failed to create link /proc/ppc64 -> /proc/powerpc\n"); +#endif + + if (!of_find_node_by_path("/rtas")) + return 0; + + if (!proc_mkdir("rtas", root)) + return 1; + + if (!proc_symlink("rtas", NULL, "powerpc/rtas")) + return 1; + + return 0; +} +core_initcall(proc_ppc64_create); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c new file mode 100644 index 00000000..4937c969 --- /dev/null +++ b/arch/powerpc/kernel/process.c @@ -0,0 +1,1339 @@ +/* + * Derived from "arch/i386/kernel/process.c" + * Copyright (C) 1995 Linus Torvalds + * + * Updated and modified by Cort Dougan (cort@cs.nmt.edu) and + * Paul Mackerras (paulus@cs.anu.edu.au) + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/ptrace.h> +#include <linux/slab.h> +#include <linux/user.h> +#include <linux/elf.h> +#include <linux/init.h> +#include <linux/prctl.h> +#include <linux/init_task.h> +#include <linux/export.h> +#include <linux/kallsyms.h> +#include <linux/mqueue.h> +#include <linux/hardirq.h> +#include <linux/utsname.h> +#include <linux/ftrace.h> +#include <linux/kernel_stat.h> +#include <linux/personality.h> +#include <linux/random.h> +#include <linux/hw_breakpoint.h> + +#include <asm/pgtable.h> +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/processor.h> +#include <asm/mmu.h> +#include <asm/prom.h> +#include <asm/machdep.h> +#include <asm/time.h> +#include <asm/runlatch.h> +#include <asm/syscalls.h> +#include <asm/switch_to.h> +#include <asm/debug.h> +#ifdef CONFIG_PPC64 +#include <asm/firmware.h> +#endif +#include <linux/kprobes.h> +#include <linux/kdebug.h> + +extern unsigned long _get_SP(void); + +#ifndef CONFIG_SMP +struct task_struct *last_task_used_math = NULL; +struct task_struct *last_task_used_altivec = NULL; +struct task_struct *last_task_used_vsx = NULL; +struct task_struct *last_task_used_spe = NULL; +#endif + +/* + * Make sure the floating-point register state in the + * the thread_struct is up to date for task tsk. + */ +void flush_fp_to_thread(struct task_struct *tsk) +{ + if (tsk->thread.regs) { + /* + * We need to disable preemption here because if we didn't, + * another process could get scheduled after the regs->msr + * test but before we have finished saving the FP registers + * to the thread_struct. That process could take over the + * FPU, and then when we get scheduled again we would store + * bogus values for the remaining FP registers. + */ + preempt_disable(); + if (tsk->thread.regs->msr & MSR_FP) { +#ifdef CONFIG_SMP + /* + * This should only ever be called for current or + * for a stopped child process. Since we save away + * the FP register state on context switch on SMP, + * there is something wrong if a stopped child appears + * to still have its FP state in the CPU registers. + */ + BUG_ON(tsk != current); +#endif + giveup_fpu(tsk); + } + preempt_enable(); + } +} +EXPORT_SYMBOL_GPL(flush_fp_to_thread); + +void enable_kernel_fp(void) +{ + WARN_ON(preemptible()); + +#ifdef CONFIG_SMP + if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) + giveup_fpu(current); + else + giveup_fpu(NULL); /* just enables FP for kernel */ +#else + giveup_fpu(last_task_used_math); +#endif /* CONFIG_SMP */ +} +EXPORT_SYMBOL(enable_kernel_fp); + +#ifdef CONFIG_ALTIVEC +void enable_kernel_altivec(void) +{ + WARN_ON(preemptible()); + +#ifdef CONFIG_SMP + if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) + giveup_altivec(current); + else + giveup_altivec(NULL); /* just enable AltiVec for kernel - force */ +#else + giveup_altivec(last_task_used_altivec); +#endif /* CONFIG_SMP */ +} +EXPORT_SYMBOL(enable_kernel_altivec); + +/* + * Make sure the VMX/Altivec register state in the + * the thread_struct is up to date for task tsk. + */ +void flush_altivec_to_thread(struct task_struct *tsk) +{ + if (tsk->thread.regs) { + preempt_disable(); + if (tsk->thread.regs->msr & MSR_VEC) { +#ifdef CONFIG_SMP + BUG_ON(tsk != current); +#endif + giveup_altivec(tsk); + } + preempt_enable(); + } +} +EXPORT_SYMBOL_GPL(flush_altivec_to_thread); +#endif /* CONFIG_ALTIVEC */ + +#ifdef CONFIG_VSX +#if 0 +/* not currently used, but some crazy RAID module might want to later */ +void enable_kernel_vsx(void) +{ + WARN_ON(preemptible()); + +#ifdef CONFIG_SMP + if (current->thread.regs && (current->thread.regs->msr & MSR_VSX)) + giveup_vsx(current); + else + giveup_vsx(NULL); /* just enable vsx for kernel - force */ +#else + giveup_vsx(last_task_used_vsx); +#endif /* CONFIG_SMP */ +} +EXPORT_SYMBOL(enable_kernel_vsx); +#endif + +void giveup_vsx(struct task_struct *tsk) +{ + giveup_fpu(tsk); + giveup_altivec(tsk); + __giveup_vsx(tsk); +} + +void flush_vsx_to_thread(struct task_struct *tsk) +{ + if (tsk->thread.regs) { + preempt_disable(); + if (tsk->thread.regs->msr & MSR_VSX) { +#ifdef CONFIG_SMP + BUG_ON(tsk != current); +#endif + giveup_vsx(tsk); + } + preempt_enable(); + } +} +EXPORT_SYMBOL_GPL(flush_vsx_to_thread); +#endif /* CONFIG_VSX */ + +#ifdef CONFIG_SPE + +void enable_kernel_spe(void) +{ + WARN_ON(preemptible()); + +#ifdef CONFIG_SMP + if (current->thread.regs && (current->thread.regs->msr & MSR_SPE)) + giveup_spe(current); + else + giveup_spe(NULL); /* just enable SPE for kernel - force */ +#else + giveup_spe(last_task_used_spe); +#endif /* __SMP __ */ +} +EXPORT_SYMBOL(enable_kernel_spe); + +void flush_spe_to_thread(struct task_struct *tsk) +{ + if (tsk->thread.regs) { + preempt_disable(); + if (tsk->thread.regs->msr & MSR_SPE) { +#ifdef CONFIG_SMP + BUG_ON(tsk != current); +#endif + tsk->thread.spefscr = mfspr(SPRN_SPEFSCR); + giveup_spe(tsk); + } + preempt_enable(); + } +} +#endif /* CONFIG_SPE */ + +#ifndef CONFIG_SMP +/* + * If we are doing lazy switching of CPU state (FP, altivec or SPE), + * and the current task has some state, discard it. + */ +void discard_lazy_cpu_state(void) +{ + preempt_disable(); + if (last_task_used_math == current) + last_task_used_math = NULL; +#ifdef CONFIG_ALTIVEC + if (last_task_used_altivec == current) + last_task_used_altivec = NULL; +#endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_VSX + if (last_task_used_vsx == current) + last_task_used_vsx = NULL; +#endif /* CONFIG_VSX */ +#ifdef CONFIG_SPE + if (last_task_used_spe == current) + last_task_used_spe = NULL; +#endif + preempt_enable(); +} +#endif /* CONFIG_SMP */ + +#ifdef CONFIG_PPC_ADV_DEBUG_REGS +void do_send_trap(struct pt_regs *regs, unsigned long address, + unsigned long error_code, int signal_code, int breakpt) +{ + siginfo_t info; + + if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code, + 11, SIGSEGV) == NOTIFY_STOP) + return; + + /* Deliver the signal to userspace */ + info.si_signo = SIGTRAP; + info.si_errno = breakpt; /* breakpoint or watchpoint id */ + info.si_code = signal_code; + info.si_addr = (void __user *)address; + force_sig_info(SIGTRAP, &info, current); +} +#else /* !CONFIG_PPC_ADV_DEBUG_REGS */ +void do_dabr(struct pt_regs *regs, unsigned long address, + unsigned long error_code) +{ + siginfo_t info; + + if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code, + 11, SIGSEGV) == NOTIFY_STOP) + return; + + if (debugger_dabr_match(regs)) + return; + + /* Clear the DABR */ + set_dabr(0); + + /* Deliver the signal to userspace */ + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_HWBKPT; + info.si_addr = (void __user *)address; + force_sig_info(SIGTRAP, &info, current); +} +#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ + +static DEFINE_PER_CPU(unsigned long, current_dabr); + +#ifdef CONFIG_PPC_ADV_DEBUG_REGS +/* + * Set the debug registers back to their default "safe" values. + */ +static void set_debug_reg_defaults(struct thread_struct *thread) +{ + thread->iac1 = thread->iac2 = 0; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + thread->iac3 = thread->iac4 = 0; +#endif + thread->dac1 = thread->dac2 = 0; +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + thread->dvc1 = thread->dvc2 = 0; +#endif + thread->dbcr0 = 0; +#ifdef CONFIG_BOOKE + /* + * Force User/Supervisor bits to b11 (user-only MSR[PR]=1) + */ + thread->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | \ + DBCR1_IAC3US | DBCR1_IAC4US; + /* + * Force Data Address Compare User/Supervisor bits to be User-only + * (0b11 MSR[PR]=1) and set all other bits in DBCR2 register to be 0. + */ + thread->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US; +#else + thread->dbcr1 = 0; +#endif +} + +static void prime_debug_regs(struct thread_struct *thread) +{ + mtspr(SPRN_IAC1, thread->iac1); + mtspr(SPRN_IAC2, thread->iac2); +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + mtspr(SPRN_IAC3, thread->iac3); + mtspr(SPRN_IAC4, thread->iac4); +#endif + mtspr(SPRN_DAC1, thread->dac1); + mtspr(SPRN_DAC2, thread->dac2); +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + mtspr(SPRN_DVC1, thread->dvc1); + mtspr(SPRN_DVC2, thread->dvc2); +#endif + mtspr(SPRN_DBCR0, thread->dbcr0); + mtspr(SPRN_DBCR1, thread->dbcr1); +#ifdef CONFIG_BOOKE + mtspr(SPRN_DBCR2, thread->dbcr2); +#endif +} +/* + * Unless neither the old or new thread are making use of the + * debug registers, set the debug registers from the values + * stored in the new thread. + */ +static void switch_booke_debug_regs(struct thread_struct *new_thread) +{ + if ((current->thread.dbcr0 & DBCR0_IDM) + || (new_thread->dbcr0 & DBCR0_IDM)) + prime_debug_regs(new_thread); +} +#else /* !CONFIG_PPC_ADV_DEBUG_REGS */ +#ifndef CONFIG_HAVE_HW_BREAKPOINT +static void set_debug_reg_defaults(struct thread_struct *thread) +{ + if (thread->dabr) { + thread->dabr = 0; + set_dabr(0); + } +} +#endif /* !CONFIG_HAVE_HW_BREAKPOINT */ +#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ + +int set_dabr(unsigned long dabr) +{ + __get_cpu_var(current_dabr) = dabr; + + if (ppc_md.set_dabr) + return ppc_md.set_dabr(dabr); + + /* XXX should we have a CPU_FTR_HAS_DABR ? */ +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + mtspr(SPRN_DAC1, dabr); +#ifdef CONFIG_PPC_47x + isync(); +#endif +#elif defined(CONFIG_PPC_BOOK3S) + mtspr(SPRN_DABR, dabr); +#endif + + + return 0; +} + +#ifdef CONFIG_PPC64 +DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array); +#endif + +struct task_struct *__switch_to(struct task_struct *prev, + struct task_struct *new) +{ + struct thread_struct *new_thread, *old_thread; + unsigned long flags; + struct task_struct *last; +#ifdef CONFIG_PPC_BOOK3S_64 + struct ppc64_tlb_batch *batch; +#endif + +#ifdef CONFIG_SMP + /* avoid complexity of lazy save/restore of fpu + * by just saving it every time we switch out if + * this task used the fpu during the last quantum. + * + * If it tries to use the fpu again, it'll trap and + * reload its fp regs. So we don't have to do a restore + * every switch, just a save. + * -- Cort + */ + if (prev->thread.regs && (prev->thread.regs->msr & MSR_FP)) + giveup_fpu(prev); +#ifdef CONFIG_ALTIVEC + /* + * If the previous thread used altivec in the last quantum + * (thus changing altivec regs) then save them. + * We used to check the VRSAVE register but not all apps + * set it, so we don't rely on it now (and in fact we need + * to save & restore VSCR even if VRSAVE == 0). -- paulus + * + * On SMP we always save/restore altivec regs just to avoid the + * complexity of changing processors. + * -- Cort + */ + if (prev->thread.regs && (prev->thread.regs->msr & MSR_VEC)) + giveup_altivec(prev); +#endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_VSX + if (prev->thread.regs && (prev->thread.regs->msr & MSR_VSX)) + /* VMX and FPU registers are already save here */ + __giveup_vsx(prev); +#endif /* CONFIG_VSX */ +#ifdef CONFIG_SPE + /* + * If the previous thread used spe in the last quantum + * (thus changing spe regs) then save them. + * + * On SMP we always save/restore spe regs just to avoid the + * complexity of changing processors. + */ + if ((prev->thread.regs && (prev->thread.regs->msr & MSR_SPE))) + giveup_spe(prev); +#endif /* CONFIG_SPE */ + +#else /* CONFIG_SMP */ +#ifdef CONFIG_ALTIVEC + /* Avoid the trap. On smp this this never happens since + * we don't set last_task_used_altivec -- Cort + */ + if (new->thread.regs && last_task_used_altivec == new) + new->thread.regs->msr |= MSR_VEC; +#endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_VSX + if (new->thread.regs && last_task_used_vsx == new) + new->thread.regs->msr |= MSR_VSX; +#endif /* CONFIG_VSX */ +#ifdef CONFIG_SPE + /* Avoid the trap. On smp this this never happens since + * we don't set last_task_used_spe + */ + if (new->thread.regs && last_task_used_spe == new) + new->thread.regs->msr |= MSR_SPE; +#endif /* CONFIG_SPE */ + +#endif /* CONFIG_SMP */ + +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + switch_booke_debug_regs(&new->thread); +#else +/* + * For PPC_BOOK3S_64, we use the hw-breakpoint interfaces that would + * schedule DABR + */ +#ifndef CONFIG_HAVE_HW_BREAKPOINT + if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr)) + set_dabr(new->thread.dabr); +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ +#endif + + + new_thread = &new->thread; + old_thread = ¤t->thread; + +#ifdef CONFIG_PPC64 + /* + * Collect processor utilization data per process + */ + if (firmware_has_feature(FW_FEATURE_SPLPAR)) { + struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); + long unsigned start_tb, current_tb; + start_tb = old_thread->start_tb; + cu->current_tb = current_tb = mfspr(SPRN_PURR); + old_thread->accum_tb += (current_tb - start_tb); + new_thread->start_tb = current_tb; + } +#endif /* CONFIG_PPC64 */ + +#ifdef CONFIG_PPC_BOOK3S_64 + batch = &__get_cpu_var(ppc64_tlb_batch); + if (batch->active) { + current_thread_info()->local_flags |= _TLF_LAZY_MMU; + if (batch->index) + __flush_tlb_pending(batch); + batch->active = 0; + } +#endif /* CONFIG_PPC_BOOK3S_64 */ + + local_irq_save(flags); + + account_system_vtime(current); + account_process_vtime(current); + + /* + * We can't take a PMU exception inside _switch() since there is a + * window where the kernel stack SLB and the kernel stack are out + * of sync. Hard disable here. + */ + hard_irq_disable(); + last = _switch(old_thread, new_thread); + +#ifdef CONFIG_PPC_BOOK3S_64 + if (current_thread_info()->local_flags & _TLF_LAZY_MMU) { + current_thread_info()->local_flags &= ~_TLF_LAZY_MMU; + batch = &__get_cpu_var(ppc64_tlb_batch); + batch->active = 1; + } +#endif /* CONFIG_PPC_BOOK3S_64 */ + + local_irq_restore(flags); + + return last; +} + +static int instructions_to_print = 16; + +static void show_instructions(struct pt_regs *regs) +{ + int i; + unsigned long pc = regs->nip - (instructions_to_print * 3 / 4 * + sizeof(int)); + + printk("Instruction dump:"); + + for (i = 0; i < instructions_to_print; i++) { + int instr; + + if (!(i % 8)) + printk("\n"); + +#if !defined(CONFIG_BOOKE) + /* If executing with the IMMU off, adjust pc rather + * than print XXXXXXXX. + */ + if (!(regs->msr & MSR_IR)) + pc = (unsigned long)phys_to_virt(pc); +#endif + + /* We use __get_user here *only* to avoid an OOPS on a + * bad address because the pc *should* only be a + * kernel address. + */ + if (!__kernel_text_address(pc) || + __get_user(instr, (unsigned int __user *)pc)) { + printk(KERN_CONT "XXXXXXXX "); + } else { + if (regs->nip == pc) + printk(KERN_CONT "<%08x> ", instr); + else + printk(KERN_CONT "%08x ", instr); + } + + pc += sizeof(int); + } + + printk("\n"); +} + +static struct regbit { + unsigned long bit; + const char *name; +} msr_bits[] = { +#if defined(CONFIG_PPC64) && !defined(CONFIG_BOOKE) + {MSR_SF, "SF"}, + {MSR_HV, "HV"}, +#endif + {MSR_VEC, "VEC"}, + {MSR_VSX, "VSX"}, +#ifdef CONFIG_BOOKE + {MSR_CE, "CE"}, +#endif + {MSR_EE, "EE"}, + {MSR_PR, "PR"}, + {MSR_FP, "FP"}, + {MSR_ME, "ME"}, +#ifdef CONFIG_BOOKE + {MSR_DE, "DE"}, +#else + {MSR_SE, "SE"}, + {MSR_BE, "BE"}, +#endif + {MSR_IR, "IR"}, + {MSR_DR, "DR"}, + {MSR_PMM, "PMM"}, +#ifndef CONFIG_BOOKE + {MSR_RI, "RI"}, + {MSR_LE, "LE"}, +#endif + {0, NULL} +}; + +static void printbits(unsigned long val, struct regbit *bits) +{ + const char *sep = ""; + + printk("<"); + for (; bits->bit; ++bits) + if (val & bits->bit) { + printk("%s%s", sep, bits->name); + sep = ","; + } + printk(">"); +} + +#ifdef CONFIG_PPC64 +#define REG "%016lx" +#define REGS_PER_LINE 4 +#define LAST_VOLATILE 13 +#else +#define REG "%08lx" +#define REGS_PER_LINE 8 +#define LAST_VOLATILE 12 +#endif + +void show_regs(struct pt_regs * regs) +{ + int i, trap; + + printk("NIP: "REG" LR: "REG" CTR: "REG"\n", + regs->nip, regs->link, regs->ctr); + printk("REGS: %p TRAP: %04lx %s (%s)\n", + regs, regs->trap, print_tainted(), init_utsname()->release); + printk("MSR: "REG" ", regs->msr); + printbits(regs->msr, msr_bits); + printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); +#ifdef CONFIG_PPC64 + printk("SOFTE: %ld\n", regs->softe); +#endif + trap = TRAP(regs); + if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR)) + printk("CFAR: "REG"\n", regs->orig_gpr3); + if (trap == 0x300 || trap == 0x600) +#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) + printk("DEAR: "REG", ESR: "REG"\n", regs->dar, regs->dsisr); +#else + printk("DAR: "REG", DSISR: %08lx\n", regs->dar, regs->dsisr); +#endif + printk("TASK = %p[%d] '%s' THREAD: %p", + current, task_pid_nr(current), current->comm, task_thread_info(current)); + +#ifdef CONFIG_SMP + printk(" CPU: %d", raw_smp_processor_id()); +#endif /* CONFIG_SMP */ + + for (i = 0; i < 32; i++) { + if ((i % REGS_PER_LINE) == 0) + printk("\nGPR%02d: ", i); + printk(REG " ", regs->gpr[i]); + if (i == LAST_VOLATILE && !FULL_REGS(regs)) + break; + } + printk("\n"); +#ifdef CONFIG_KALLSYMS + /* + * Lookup NIP late so we have the best change of getting the + * above info out without failing + */ + printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip); + printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link); +#endif + show_stack(current, (unsigned long *) regs->gpr[1]); + if (!user_mode(regs)) + show_instructions(regs); +} + +void exit_thread(void) +{ + discard_lazy_cpu_state(); +} + +void flush_thread(void) +{ + discard_lazy_cpu_state(); + +#ifdef CONFIG_HAVE_HW_BREAKPOINT + flush_ptrace_hw_breakpoint(current); +#else /* CONFIG_HAVE_HW_BREAKPOINT */ + set_debug_reg_defaults(¤t->thread); +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ +} + +void +release_thread(struct task_struct *t) +{ +} + +/* + * This gets called before we allocate a new thread and copy + * the current task into it. + */ +void prepare_to_copy(struct task_struct *tsk) +{ + flush_fp_to_thread(current); + flush_altivec_to_thread(current); + flush_vsx_to_thread(current); + flush_spe_to_thread(current); +#ifdef CONFIG_HAVE_HW_BREAKPOINT + flush_ptrace_hw_breakpoint(tsk); +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ +} + +/* + * Copy a thread.. + */ +extern unsigned long dscr_default; /* defined in arch/powerpc/kernel/sysfs.c */ + +int copy_thread(unsigned long clone_flags, unsigned long usp, + unsigned long unused, struct task_struct *p, + struct pt_regs *regs) +{ + struct pt_regs *childregs, *kregs; + extern void ret_from_fork(void); + unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE; + + CHECK_FULL_REGS(regs); + /* Copy registers */ + sp -= sizeof(struct pt_regs); + childregs = (struct pt_regs *) sp; + *childregs = *regs; + if ((childregs->msr & MSR_PR) == 0) { + /* for kernel thread, set `current' and stackptr in new task */ + childregs->gpr[1] = sp + sizeof(struct pt_regs); +#ifdef CONFIG_PPC32 + childregs->gpr[2] = (unsigned long) p; +#else + clear_tsk_thread_flag(p, TIF_32BIT); +#endif + p->thread.regs = NULL; /* no user register state */ + } else { + childregs->gpr[1] = usp; + p->thread.regs = childregs; + if (clone_flags & CLONE_SETTLS) { +#ifdef CONFIG_PPC64 + if (!is_32bit_task()) + childregs->gpr[13] = childregs->gpr[6]; + else +#endif + childregs->gpr[2] = childregs->gpr[6]; + } + } + childregs->gpr[3] = 0; /* Result from fork() */ + sp -= STACK_FRAME_OVERHEAD; + + /* + * The way this works is that at some point in the future + * some task will call _switch to switch to the new task. + * That will pop off the stack frame created below and start + * the new task running at ret_from_fork. The new task will + * do some house keeping and then return from the fork or clone + * system call, using the stack frame created above. + */ + sp -= sizeof(struct pt_regs); + kregs = (struct pt_regs *) sp; + sp -= STACK_FRAME_OVERHEAD; + p->thread.ksp = sp; + p->thread.ksp_limit = (unsigned long)task_stack_page(p) + + _ALIGN_UP(sizeof(struct thread_info), 16); + +#ifdef CONFIG_PPC_STD_MMU_64 + if (mmu_has_feature(MMU_FTR_SLB)) { + unsigned long sp_vsid; + unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp; + + if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) + sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_1T) + << SLB_VSID_SHIFT_1T; + else + sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_256M) + << SLB_VSID_SHIFT; + sp_vsid |= SLB_VSID_KERNEL | llp; + p->thread.ksp_vsid = sp_vsid; + } +#endif /* CONFIG_PPC_STD_MMU_64 */ +#ifdef CONFIG_PPC64 + if (cpu_has_feature(CPU_FTR_DSCR)) { + if (current->thread.dscr_inherit) { + p->thread.dscr_inherit = 1; + p->thread.dscr = current->thread.dscr; + } else if (0 != dscr_default) { + p->thread.dscr_inherit = 1; + p->thread.dscr = dscr_default; + } else { + p->thread.dscr_inherit = 0; + p->thread.dscr = 0; + } + } +#endif + + /* + * The PPC64 ABI makes use of a TOC to contain function + * pointers. The function (ret_from_except) is actually a pointer + * to the TOC entry. The first entry is a pointer to the actual + * function. + */ +#ifdef CONFIG_PPC64 + kregs->nip = *((unsigned long *)ret_from_fork); +#else + kregs->nip = (unsigned long)ret_from_fork; +#endif + + return 0; +} + +/* + * Set up a thread for executing a new program + */ +void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) +{ +#ifdef CONFIG_PPC64 + unsigned long load_addr = regs->gpr[2]; /* saved by ELF_PLAT_INIT */ +#endif + + /* + * If we exec out of a kernel thread then thread.regs will not be + * set. Do it now. + */ + if (!current->thread.regs) { + struct pt_regs *regs = task_stack_page(current) + THREAD_SIZE; + current->thread.regs = regs - 1; + } + + memset(regs->gpr, 0, sizeof(regs->gpr)); + regs->ctr = 0; + regs->link = 0; + regs->xer = 0; + regs->ccr = 0; + regs->gpr[1] = sp; + + /* + * We have just cleared all the nonvolatile GPRs, so make + * FULL_REGS(regs) return true. This is necessary to allow + * ptrace to examine the thread immediately after exec. + */ + regs->trap &= ~1UL; + +#ifdef CONFIG_PPC32 + regs->mq = 0; + regs->nip = start; + regs->msr = MSR_USER; +#else + if (!is_32bit_task()) { + unsigned long entry, toc; + + /* start is a relocated pointer to the function descriptor for + * the elf _start routine. The first entry in the function + * descriptor is the entry address of _start and the second + * entry is the TOC value we need to use. + */ + __get_user(entry, (unsigned long __user *)start); + __get_user(toc, (unsigned long __user *)start+1); + + /* Check whether the e_entry function descriptor entries + * need to be relocated before we can use them. + */ + if (load_addr != 0) { + entry += load_addr; + toc += load_addr; + } + regs->nip = entry; + regs->gpr[2] = toc; + regs->msr = MSR_USER64; + } else { + regs->nip = start; + regs->gpr[2] = 0; + regs->msr = MSR_USER32; + } +#endif + + discard_lazy_cpu_state(); +#ifdef CONFIG_VSX + current->thread.used_vsr = 0; +#endif + memset(current->thread.fpr, 0, sizeof(current->thread.fpr)); + current->thread.fpscr.val = 0; +#ifdef CONFIG_ALTIVEC + memset(current->thread.vr, 0, sizeof(current->thread.vr)); + memset(¤t->thread.vscr, 0, sizeof(current->thread.vscr)); + current->thread.vscr.u[3] = 0x00010000; /* Java mode disabled */ + current->thread.vrsave = 0; + current->thread.used_vr = 0; +#endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_SPE + memset(current->thread.evr, 0, sizeof(current->thread.evr)); + current->thread.acc = 0; + current->thread.spefscr = 0; + current->thread.used_spe = 0; +#endif /* CONFIG_SPE */ +} + +#define PR_FP_ALL_EXCEPT (PR_FP_EXC_DIV | PR_FP_EXC_OVF | PR_FP_EXC_UND \ + | PR_FP_EXC_RES | PR_FP_EXC_INV) + +int set_fpexc_mode(struct task_struct *tsk, unsigned int val) +{ + struct pt_regs *regs = tsk->thread.regs; + + /* This is a bit hairy. If we are an SPE enabled processor + * (have embedded fp) we store the IEEE exception enable flags in + * fpexc_mode. fpexc_mode is also used for setting FP exception + * mode (asyn, precise, disabled) for 'Classic' FP. */ + if (val & PR_FP_EXC_SW_ENABLE) { +#ifdef CONFIG_SPE + if (cpu_has_feature(CPU_FTR_SPE)) { + tsk->thread.fpexc_mode = val & + (PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT); + return 0; + } else { + return -EINVAL; + } +#else + return -EINVAL; +#endif + } + + /* on a CONFIG_SPE this does not hurt us. The bits that + * __pack_fe01 use do not overlap with bits used for + * PR_FP_EXC_SW_ENABLE. Additionally, the MSR[FE0,FE1] bits + * on CONFIG_SPE implementations are reserved so writing to + * them does not change anything */ + if (val > PR_FP_EXC_PRECISE) + return -EINVAL; + tsk->thread.fpexc_mode = __pack_fe01(val); + if (regs != NULL && (regs->msr & MSR_FP) != 0) + regs->msr = (regs->msr & ~(MSR_FE0|MSR_FE1)) + | tsk->thread.fpexc_mode; + return 0; +} + +int get_fpexc_mode(struct task_struct *tsk, unsigned long adr) +{ + unsigned int val; + + if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) +#ifdef CONFIG_SPE + if (cpu_has_feature(CPU_FTR_SPE)) + val = tsk->thread.fpexc_mode; + else + return -EINVAL; +#else + return -EINVAL; +#endif + else + val = __unpack_fe01(tsk->thread.fpexc_mode); + return put_user(val, (unsigned int __user *) adr); +} + +int set_endian(struct task_struct *tsk, unsigned int val) +{ + struct pt_regs *regs = tsk->thread.regs; + + if ((val == PR_ENDIAN_LITTLE && !cpu_has_feature(CPU_FTR_REAL_LE)) || + (val == PR_ENDIAN_PPC_LITTLE && !cpu_has_feature(CPU_FTR_PPC_LE))) + return -EINVAL; + + if (regs == NULL) + return -EINVAL; + + if (val == PR_ENDIAN_BIG) + regs->msr &= ~MSR_LE; + else if (val == PR_ENDIAN_LITTLE || val == PR_ENDIAN_PPC_LITTLE) + regs->msr |= MSR_LE; + else + return -EINVAL; + + return 0; +} + +int get_endian(struct task_struct *tsk, unsigned long adr) +{ + struct pt_regs *regs = tsk->thread.regs; + unsigned int val; + + if (!cpu_has_feature(CPU_FTR_PPC_LE) && + !cpu_has_feature(CPU_FTR_REAL_LE)) + return -EINVAL; + + if (regs == NULL) + return -EINVAL; + + if (regs->msr & MSR_LE) { + if (cpu_has_feature(CPU_FTR_REAL_LE)) + val = PR_ENDIAN_LITTLE; + else + val = PR_ENDIAN_PPC_LITTLE; + } else + val = PR_ENDIAN_BIG; + + return put_user(val, (unsigned int __user *)adr); +} + +int set_unalign_ctl(struct task_struct *tsk, unsigned int val) +{ + tsk->thread.align_ctl = val; + return 0; +} + +int get_unalign_ctl(struct task_struct *tsk, unsigned long adr) +{ + return put_user(tsk->thread.align_ctl, (unsigned int __user *)adr); +} + +#define TRUNC_PTR(x) ((typeof(x))(((unsigned long)(x)) & 0xffffffff)) + +int sys_clone(unsigned long clone_flags, unsigned long usp, + int __user *parent_tidp, void __user *child_threadptr, + int __user *child_tidp, int p6, + struct pt_regs *regs) +{ + CHECK_FULL_REGS(regs); + if (usp == 0) + usp = regs->gpr[1]; /* stack pointer for child */ +#ifdef CONFIG_PPC64 + if (is_32bit_task()) { + parent_tidp = TRUNC_PTR(parent_tidp); + child_tidp = TRUNC_PTR(child_tidp); + } +#endif + return do_fork(clone_flags, usp, regs, 0, parent_tidp, child_tidp); +} + +int sys_fork(unsigned long p1, unsigned long p2, unsigned long p3, + unsigned long p4, unsigned long p5, unsigned long p6, + struct pt_regs *regs) +{ + CHECK_FULL_REGS(regs); + return do_fork(SIGCHLD, regs->gpr[1], regs, 0, NULL, NULL); +} + +int sys_vfork(unsigned long p1, unsigned long p2, unsigned long p3, + unsigned long p4, unsigned long p5, unsigned long p6, + struct pt_regs *regs) +{ + CHECK_FULL_REGS(regs); + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->gpr[1], + regs, 0, NULL, NULL); +} + +int sys_execve(unsigned long a0, unsigned long a1, unsigned long a2, + unsigned long a3, unsigned long a4, unsigned long a5, + struct pt_regs *regs) +{ + int error; + char *filename; + + filename = getname((const char __user *) a0); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + goto out; + flush_fp_to_thread(current); + flush_altivec_to_thread(current); + flush_spe_to_thread(current); + error = do_execve(filename, + (const char __user *const __user *) a1, + (const char __user *const __user *) a2, regs); + putname(filename); +out: + return error; +} + +static inline int valid_irq_stack(unsigned long sp, struct task_struct *p, + unsigned long nbytes) +{ + unsigned long stack_page; + unsigned long cpu = task_cpu(p); + + /* + * Avoid crashing if the stack has overflowed and corrupted + * task_cpu(p), which is in the thread_info struct. + */ + if (cpu < NR_CPUS && cpu_possible(cpu)) { + stack_page = (unsigned long) hardirq_ctx[cpu]; + if (sp >= stack_page + sizeof(struct thread_struct) + && sp <= stack_page + THREAD_SIZE - nbytes) + return 1; + + stack_page = (unsigned long) softirq_ctx[cpu]; + if (sp >= stack_page + sizeof(struct thread_struct) + && sp <= stack_page + THREAD_SIZE - nbytes) + return 1; + } + return 0; +} + +int validate_sp(unsigned long sp, struct task_struct *p, + unsigned long nbytes) +{ + unsigned long stack_page = (unsigned long)task_stack_page(p); + + if (sp >= stack_page + sizeof(struct thread_struct) + && sp <= stack_page + THREAD_SIZE - nbytes) + return 1; + + return valid_irq_stack(sp, p, nbytes); +} + +EXPORT_SYMBOL(validate_sp); + +unsigned long get_wchan(struct task_struct *p) +{ + unsigned long ip, sp; + int count = 0; + + if (!p || p == current || p->state == TASK_RUNNING) + return 0; + + sp = p->thread.ksp; + if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD)) + return 0; + + do { + sp = *(unsigned long *)sp; + if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD)) + return 0; + if (count > 0) { + ip = ((unsigned long *)sp)[STACK_FRAME_LR_SAVE]; + if (!in_sched_functions(ip)) + return ip; + } + } while (count++ < 16); + return 0; +} + +static int kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH; + +void show_stack(struct task_struct *tsk, unsigned long *stack) +{ + unsigned long sp, ip, lr, newsp; + int count = 0; + int firstframe = 1; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + int curr_frame = current->curr_ret_stack; + extern void return_to_handler(void); + unsigned long rth = (unsigned long)return_to_handler; + unsigned long mrth = -1; +#ifdef CONFIG_PPC64 + extern void mod_return_to_handler(void); + rth = *(unsigned long *)rth; + mrth = (unsigned long)mod_return_to_handler; + mrth = *(unsigned long *)mrth; +#endif +#endif + + sp = (unsigned long) stack; + if (tsk == NULL) + tsk = current; + if (sp == 0) { + if (tsk == current) + asm("mr %0,1" : "=r" (sp)); + else + sp = tsk->thread.ksp; + } + + lr = 0; + printk("Call Trace:\n"); + do { + if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD)) + return; + + stack = (unsigned long *) sp; + newsp = stack[0]; + ip = stack[STACK_FRAME_LR_SAVE]; + if (!firstframe || ip != lr) { + printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + if ((ip == rth || ip == mrth) && curr_frame >= 0) { + printk(" (%pS)", + (void *)current->ret_stack[curr_frame].ret); + curr_frame--; + } +#endif + if (firstframe) + printk(" (unreliable)"); + printk("\n"); + } + firstframe = 0; + + /* + * See if this is an exception frame. + * We look for the "regshere" marker in the current frame. + */ + if (validate_sp(sp, tsk, STACK_INT_FRAME_SIZE) + && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { + struct pt_regs *regs = (struct pt_regs *) + (sp + STACK_FRAME_OVERHEAD); + lr = regs->link; + printk("--- Exception: %lx at %pS\n LR = %pS\n", + regs->trap, (void *)regs->nip, (void *)lr); + firstframe = 1; + } + + sp = newsp; + } while (count++ < kstack_depth_to_print); +} + +void dump_stack(void) +{ + show_stack(current, NULL); +} +EXPORT_SYMBOL(dump_stack); + +#ifdef CONFIG_PPC64 +/* Called with hard IRQs off */ +void __ppc64_runlatch_on(void) +{ + struct thread_info *ti = current_thread_info(); + unsigned long ctrl; + + ctrl = mfspr(SPRN_CTRLF); + ctrl |= CTRL_RUNLATCH; + mtspr(SPRN_CTRLT, ctrl); + + ti->local_flags |= _TLF_RUNLATCH; +} + +/* Called with hard IRQs off */ +void __ppc64_runlatch_off(void) +{ + struct thread_info *ti = current_thread_info(); + unsigned long ctrl; + + ti->local_flags &= ~_TLF_RUNLATCH; + + ctrl = mfspr(SPRN_CTRLF); + ctrl &= ~CTRL_RUNLATCH; + mtspr(SPRN_CTRLT, ctrl); +} +#endif /* CONFIG_PPC64 */ + +#if THREAD_SHIFT < PAGE_SHIFT + +static struct kmem_cache *thread_info_cache; + +struct thread_info *alloc_thread_info_node(struct task_struct *tsk, int node) +{ + struct thread_info *ti; + + ti = kmem_cache_alloc_node(thread_info_cache, GFP_KERNEL, node); + if (unlikely(ti == NULL)) + return NULL; +#ifdef CONFIG_DEBUG_STACK_USAGE + memset(ti, 0, THREAD_SIZE); +#endif + return ti; +} + +void free_thread_info(struct thread_info *ti) +{ + kmem_cache_free(thread_info_cache, ti); +} + +void thread_info_cache_init(void) +{ + thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE, + THREAD_SIZE, 0, NULL); + BUG_ON(thread_info_cache == NULL); +} + +#endif /* THREAD_SHIFT < PAGE_SHIFT */ + +unsigned long arch_align_stack(unsigned long sp) +{ + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) + sp -= get_random_int() & ~PAGE_MASK; + return sp & ~0xf; +} + +static inline unsigned long brk_rnd(void) +{ + unsigned long rnd = 0; + + /* 8MB for 32bit, 1GB for 64bit */ + if (is_32bit_task()) + rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT))); + else + rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT))); + + return rnd << PAGE_SHIFT; +} + +unsigned long arch_randomize_brk(struct mm_struct *mm) +{ + unsigned long base = mm->brk; + unsigned long ret; + +#ifdef CONFIG_PPC_STD_MMU_64 + /* + * If we are using 1TB segments and we are allowed to randomise + * the heap, we can put it above 1TB so it is backed by a 1TB + * segment. Otherwise the heap will be in the bottom 1TB + * which always uses 256MB segments and this may result in a + * performance penalty. + */ + if (!is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T)) + base = max_t(unsigned long, mm->brk, 1UL << SID_SHIFT_1T); +#endif + + ret = PAGE_ALIGN(base + brk_rnd()); + + if (ret < mm->brk) + return mm->brk; + + return ret; +} + +unsigned long randomize_et_dyn(unsigned long base) +{ + unsigned long ret = PAGE_ALIGN(base + brk_rnd()); + + if (ret < base) + return base; + + return ret; +} diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c new file mode 100644 index 00000000..f191bf02 --- /dev/null +++ b/arch/powerpc/kernel/prom.c @@ -0,0 +1,891 @@ +/* + * Procedures for creating, accessing and interpreting the device tree. + * + * Paul Mackerras August 1996. + * Copyright (C) 1996-2005 Paul Mackerras. + * + * Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner. + * {engebret|bergner}@us.ibm.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG + +#include <stdarg.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/threads.h> +#include <linux/spinlock.h> +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/stringify.h> +#include <linux/delay.h> +#include <linux/initrd.h> +#include <linux/bitops.h> +#include <linux/export.h> +#include <linux/kexec.h> +#include <linux/debugfs.h> +#include <linux/irq.h> +#include <linux/memblock.h> + +#include <asm/prom.h> +#include <asm/rtas.h> +#include <asm/page.h> +#include <asm/processor.h> +#include <asm/irq.h> +#include <asm/io.h> +#include <asm/kdump.h> +#include <asm/smp.h> +#include <asm/mmu.h> +#include <asm/paca.h> +#include <asm/pgtable.h> +#include <asm/pci.h> +#include <asm/iommu.h> +#include <asm/btext.h> +#include <asm/sections.h> +#include <asm/machdep.h> +#include <asm/pSeries_reconfig.h> +#include <asm/pci-bridge.h> +#include <asm/kexec.h> +#include <asm/opal.h> +#include <asm/fadump.h> + +#include <mm/mmu_decl.h> + +#ifdef DEBUG +#define DBG(fmt...) printk(KERN_ERR fmt) +#else +#define DBG(fmt...) +#endif + +#ifdef CONFIG_PPC64 +int __initdata iommu_is_off; +int __initdata iommu_force_on; +unsigned long tce_alloc_start, tce_alloc_end; +u64 ppc64_rma_size; +#endif +static phys_addr_t first_memblock_size; +static int __initdata boot_cpu_count; + +static int __init early_parse_mem(char *p) +{ + if (!p) + return 1; + + memory_limit = PAGE_ALIGN(memparse(p, &p)); + DBG("memory limit = 0x%llx\n", (unsigned long long)memory_limit); + + return 0; +} +early_param("mem", early_parse_mem); + +/* + * overlaps_initrd - check for overlap with page aligned extension of + * initrd. + */ +static inline int overlaps_initrd(unsigned long start, unsigned long size) +{ +#ifdef CONFIG_BLK_DEV_INITRD + if (!initrd_start) + return 0; + + return (start + size) > _ALIGN_DOWN(initrd_start, PAGE_SIZE) && + start <= _ALIGN_UP(initrd_end, PAGE_SIZE); +#else + return 0; +#endif +} + +/** + * move_device_tree - move tree to an unused area, if needed. + * + * The device tree may be allocated beyond our memory limit, or inside the + * crash kernel region for kdump, or within the page aligned range of initrd. + * If so, move it out of the way. + */ +static void __init move_device_tree(void) +{ + unsigned long start, size; + void *p; + + DBG("-> move_device_tree\n"); + + start = __pa(initial_boot_params); + size = be32_to_cpu(initial_boot_params->totalsize); + + if ((memory_limit && (start + size) > PHYSICAL_START + memory_limit) || + overlaps_crashkernel(start, size) || + overlaps_initrd(start, size)) { + p = __va(memblock_alloc(size, PAGE_SIZE)); + memcpy(p, initial_boot_params, size); + initial_boot_params = (struct boot_param_header *)p; + DBG("Moved device tree to 0x%p\n", p); + } + + DBG("<- move_device_tree\n"); +} + +/* + * ibm,pa-features is a per-cpu property that contains a string of + * attribute descriptors, each of which has a 2 byte header plus up + * to 254 bytes worth of processor attribute bits. First header + * byte specifies the number of bytes following the header. + * Second header byte is an "attribute-specifier" type, of which + * zero is the only currently-defined value. + * Implementation: Pass in the byte and bit offset for the feature + * that we are interested in. The function will return -1 if the + * pa-features property is missing, or a 1/0 to indicate if the feature + * is supported/not supported. Note that the bit numbers are + * big-endian to match the definition in PAPR. + */ +static struct ibm_pa_feature { + unsigned long cpu_features; /* CPU_FTR_xxx bit */ + unsigned long mmu_features; /* MMU_FTR_xxx bit */ + unsigned int cpu_user_ftrs; /* PPC_FEATURE_xxx bit */ + unsigned char pabyte; /* byte number in ibm,pa-features */ + unsigned char pabit; /* bit number (big-endian) */ + unsigned char invert; /* if 1, pa bit set => clear feature */ +} ibm_pa_features[] __initdata = { + {0, 0, PPC_FEATURE_HAS_MMU, 0, 0, 0}, + {0, 0, PPC_FEATURE_HAS_FPU, 0, 1, 0}, + {0, MMU_FTR_SLB, 0, 0, 2, 0}, + {CPU_FTR_CTRL, 0, 0, 0, 3, 0}, + {CPU_FTR_NOEXECUTE, 0, 0, 0, 6, 0}, + {CPU_FTR_NODSISRALIGN, 0, 0, 1, 1, 1}, + {0, MMU_FTR_CI_LARGE_PAGE, 0, 1, 2, 0}, + {CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 5, 0, 0}, +}; + +static void __init scan_features(unsigned long node, unsigned char *ftrs, + unsigned long tablelen, + struct ibm_pa_feature *fp, + unsigned long ft_size) +{ + unsigned long i, len, bit; + + /* find descriptor with type == 0 */ + for (;;) { + if (tablelen < 3) + return; + len = 2 + ftrs[0]; + if (tablelen < len) + return; /* descriptor 0 not found */ + if (ftrs[1] == 0) + break; + tablelen -= len; + ftrs += len; + } + + /* loop over bits we know about */ + for (i = 0; i < ft_size; ++i, ++fp) { + if (fp->pabyte >= ftrs[0]) + continue; + bit = (ftrs[2 + fp->pabyte] >> (7 - fp->pabit)) & 1; + if (bit ^ fp->invert) { + cur_cpu_spec->cpu_features |= fp->cpu_features; + cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftrs; + cur_cpu_spec->mmu_features |= fp->mmu_features; + } else { + cur_cpu_spec->cpu_features &= ~fp->cpu_features; + cur_cpu_spec->cpu_user_features &= ~fp->cpu_user_ftrs; + cur_cpu_spec->mmu_features &= ~fp->mmu_features; + } + } +} + +static void __init check_cpu_pa_features(unsigned long node) +{ + unsigned char *pa_ftrs; + unsigned long tablelen; + + pa_ftrs = of_get_flat_dt_prop(node, "ibm,pa-features", &tablelen); + if (pa_ftrs == NULL) + return; + + scan_features(node, pa_ftrs, tablelen, + ibm_pa_features, ARRAY_SIZE(ibm_pa_features)); +} + +#ifdef CONFIG_PPC_STD_MMU_64 +static void __init check_cpu_slb_size(unsigned long node) +{ + u32 *slb_size_ptr; + + slb_size_ptr = of_get_flat_dt_prop(node, "slb-size", NULL); + if (slb_size_ptr != NULL) { + mmu_slb_size = *slb_size_ptr; + return; + } + slb_size_ptr = of_get_flat_dt_prop(node, "ibm,slb-size", NULL); + if (slb_size_ptr != NULL) { + mmu_slb_size = *slb_size_ptr; + } +} +#else +#define check_cpu_slb_size(node) do { } while(0) +#endif + +static struct feature_property { + const char *name; + u32 min_value; + unsigned long cpu_feature; + unsigned long cpu_user_ftr; +} feature_properties[] __initdata = { +#ifdef CONFIG_ALTIVEC + {"altivec", 0, CPU_FTR_ALTIVEC, PPC_FEATURE_HAS_ALTIVEC}, + {"ibm,vmx", 1, CPU_FTR_ALTIVEC, PPC_FEATURE_HAS_ALTIVEC}, +#endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_VSX + /* Yes, this _really_ is ibm,vmx == 2 to enable VSX */ + {"ibm,vmx", 2, CPU_FTR_VSX, PPC_FEATURE_HAS_VSX}, +#endif /* CONFIG_VSX */ +#ifdef CONFIG_PPC64 + {"ibm,dfp", 1, 0, PPC_FEATURE_HAS_DFP}, + {"ibm,purr", 1, CPU_FTR_PURR, 0}, + {"ibm,spurr", 1, CPU_FTR_SPURR, 0}, +#endif /* CONFIG_PPC64 */ +}; + +#if defined(CONFIG_44x) && defined(CONFIG_PPC_FPU) +static inline void identical_pvr_fixup(unsigned long node) +{ + unsigned int pvr; + char *model = of_get_flat_dt_prop(node, "model", NULL); + + /* + * Since 440GR(x)/440EP(x) processors have the same pvr, + * we check the node path and set bit 28 in the cur_cpu_spec + * pvr for EP(x) processor version. This bit is always 0 in + * the "real" pvr. Then we call identify_cpu again with + * the new logical pvr to enable FPU support. + */ + if (model && strstr(model, "440EP")) { + pvr = cur_cpu_spec->pvr_value | 0x8; + identify_cpu(0, pvr); + DBG("Using logical pvr %x for %s\n", pvr, model); + } +} +#else +#define identical_pvr_fixup(node) do { } while(0) +#endif + +static void __init check_cpu_feature_properties(unsigned long node) +{ + unsigned long i; + struct feature_property *fp = feature_properties; + const u32 *prop; + + for (i = 0; i < ARRAY_SIZE(feature_properties); ++i, ++fp) { + prop = of_get_flat_dt_prop(node, fp->name, NULL); + if (prop && *prop >= fp->min_value) { + cur_cpu_spec->cpu_features |= fp->cpu_feature; + cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftr; + } + } +} + +static int __init early_init_dt_scan_cpus(unsigned long node, + const char *uname, int depth, + void *data) +{ + char *type = of_get_flat_dt_prop(node, "device_type", NULL); + const u32 *prop; + const u32 *intserv; + int i, nthreads; + unsigned long len; + int found = -1; + int found_thread = 0; + + /* We are scanning "cpu" nodes only */ + if (type == NULL || strcmp(type, "cpu") != 0) + return 0; + + /* Get physical cpuid */ + intserv = of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", &len); + if (intserv) { + nthreads = len / sizeof(int); + } else { + intserv = of_get_flat_dt_prop(node, "reg", NULL); + nthreads = 1; + } + + /* + * Now see if any of these threads match our boot cpu. + * NOTE: This must match the parsing done in smp_setup_cpu_maps. + */ + for (i = 0; i < nthreads; i++) { + /* + * version 2 of the kexec param format adds the phys cpuid of + * booted proc. + */ + if (initial_boot_params->version >= 2) { + if (intserv[i] == initial_boot_params->boot_cpuid_phys) { + found = boot_cpu_count; + found_thread = i; + } + } else { + /* + * Check if it's the boot-cpu, set it's hw index now, + * unfortunately this format did not support booting + * off secondary threads. + */ + if (of_get_flat_dt_prop(node, + "linux,boot-cpu", NULL) != NULL) + found = boot_cpu_count; + } +#ifdef CONFIG_SMP + /* logical cpu id is always 0 on UP kernels */ + boot_cpu_count++; +#endif + } + + if (found >= 0) { + DBG("boot cpu: logical %d physical %d\n", found, + intserv[found_thread]); + boot_cpuid = found; + set_hard_smp_processor_id(found, intserv[found_thread]); + + /* + * PAPR defines "logical" PVR values for cpus that + * meet various levels of the architecture: + * 0x0f000001 Architecture version 2.04 + * 0x0f000002 Architecture version 2.05 + * If the cpu-version property in the cpu node contains + * such a value, we call identify_cpu again with the + * logical PVR value in order to use the cpu feature + * bits appropriate for the architecture level. + * + * A POWER6 partition in "POWER6 architected" mode + * uses the 0x0f000002 PVR value; in POWER5+ mode + * it uses 0x0f000001. + */ + prop = of_get_flat_dt_prop(node, "cpu-version", NULL); + if (prop && (*prop & 0xff000000) == 0x0f000000) + identify_cpu(0, *prop); + + identical_pvr_fixup(node); + } + + check_cpu_feature_properties(node); + check_cpu_pa_features(node); + check_cpu_slb_size(node); + +#ifdef CONFIG_PPC_PSERIES + if (nthreads > 1) + cur_cpu_spec->cpu_features |= CPU_FTR_SMT; + else + cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT; +#endif + + return 0; +} + +int __init early_init_dt_scan_chosen_ppc(unsigned long node, const char *uname, + int depth, void *data) +{ + unsigned long *lprop; + + /* Use common scan routine to determine if this is the chosen node */ + if (early_init_dt_scan_chosen(node, uname, depth, data) == 0) + return 0; + +#ifdef CONFIG_PPC64 + /* check if iommu is forced on or off */ + if (of_get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL) + iommu_is_off = 1; + if (of_get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL) + iommu_force_on = 1; +#endif + + /* mem=x on the command line is the preferred mechanism */ + lprop = of_get_flat_dt_prop(node, "linux,memory-limit", NULL); + if (lprop) + memory_limit = *lprop; + +#ifdef CONFIG_PPC64 + lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-start", NULL); + if (lprop) + tce_alloc_start = *lprop; + lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-end", NULL); + if (lprop) + tce_alloc_end = *lprop; +#endif + +#ifdef CONFIG_KEXEC + lprop = of_get_flat_dt_prop(node, "linux,crashkernel-base", NULL); + if (lprop) + crashk_res.start = *lprop; + + lprop = of_get_flat_dt_prop(node, "linux,crashkernel-size", NULL); + if (lprop) + crashk_res.end = crashk_res.start + *lprop - 1; +#endif + + /* break now */ + return 1; +} + +#ifdef CONFIG_PPC_PSERIES +/* + * Interpret the ibm,dynamic-memory property in the + * /ibm,dynamic-reconfiguration-memory node. + * This contains a list of memory blocks along with NUMA affinity + * information. + */ +static int __init early_init_dt_scan_drconf_memory(unsigned long node) +{ + __be32 *dm, *ls, *usm; + unsigned long l, n, flags; + u64 base, size, memblock_size; + unsigned int is_kexec_kdump = 0, rngs; + + ls = of_get_flat_dt_prop(node, "ibm,lmb-size", &l); + if (ls == NULL || l < dt_root_size_cells * sizeof(__be32)) + return 0; + memblock_size = dt_mem_next_cell(dt_root_size_cells, &ls); + + dm = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &l); + if (dm == NULL || l < sizeof(__be32)) + return 0; + + n = *dm++; /* number of entries */ + if (l < (n * (dt_root_addr_cells + 4) + 1) * sizeof(__be32)) + return 0; + + /* check if this is a kexec/kdump kernel. */ + usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory", + &l); + if (usm != NULL) + is_kexec_kdump = 1; + + for (; n != 0; --n) { + base = dt_mem_next_cell(dt_root_addr_cells, &dm); + flags = dm[3]; + /* skip DRC index, pad, assoc. list index, flags */ + dm += 4; + /* skip this block if the reserved bit is set in flags (0x80) + or if the block is not assigned to this partition (0x8) */ + if ((flags & 0x80) || !(flags & 0x8)) + continue; + size = memblock_size; + rngs = 1; + if (is_kexec_kdump) { + /* + * For each memblock in ibm,dynamic-memory, a corresponding + * entry in linux,drconf-usable-memory property contains + * a counter 'p' followed by 'p' (base, size) duple. + * Now read the counter from + * linux,drconf-usable-memory property + */ + rngs = dt_mem_next_cell(dt_root_size_cells, &usm); + if (!rngs) /* there are no (base, size) duple */ + continue; + } + do { + if (is_kexec_kdump) { + base = dt_mem_next_cell(dt_root_addr_cells, + &usm); + size = dt_mem_next_cell(dt_root_size_cells, + &usm); + } + if (iommu_is_off) { + if (base >= 0x80000000ul) + continue; + if ((base + size) > 0x80000000ul) + size = 0x80000000ul - base; + } + memblock_add(base, size); + } while (--rngs); + } + memblock_dump_all(); + return 0; +} +#else +#define early_init_dt_scan_drconf_memory(node) 0 +#endif /* CONFIG_PPC_PSERIES */ + +static int __init early_init_dt_scan_memory_ppc(unsigned long node, + const char *uname, + int depth, void *data) +{ + if (depth == 1 && + strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) + return early_init_dt_scan_drconf_memory(node); + + return early_init_dt_scan_memory(node, uname, depth, data); +} + +void __init early_init_dt_add_memory_arch(u64 base, u64 size) +{ +#ifdef CONFIG_PPC64 + if (iommu_is_off) { + if (base >= 0x80000000ul) + return; + if ((base + size) > 0x80000000ul) + size = 0x80000000ul - base; + } +#endif + /* Keep track of the beginning of memory -and- the size of + * the very first block in the device-tree as it represents + * the RMA on ppc64 server + */ + if (base < memstart_addr) { + memstart_addr = base; + first_memblock_size = size; + } + + /* Add the chunk to the MEMBLOCK list */ + memblock_add(base, size); +} + +void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) +{ + return __va(memblock_alloc(size, align)); +} + +#ifdef CONFIG_BLK_DEV_INITRD +void __init early_init_dt_setup_initrd_arch(unsigned long start, + unsigned long end) +{ + initrd_start = (unsigned long)__va(start); + initrd_end = (unsigned long)__va(end); + initrd_below_start_ok = 1; +} +#endif + +static void __init early_reserve_mem(void) +{ + u64 base, size; + u64 *reserve_map; + unsigned long self_base; + unsigned long self_size; + + reserve_map = (u64 *)(((unsigned long)initial_boot_params) + + initial_boot_params->off_mem_rsvmap); + + /* before we do anything, lets reserve the dt blob */ + self_base = __pa((unsigned long)initial_boot_params); + self_size = initial_boot_params->totalsize; + memblock_reserve(self_base, self_size); + +#ifdef CONFIG_BLK_DEV_INITRD + /* then reserve the initrd, if any */ + if (initrd_start && (initrd_end > initrd_start)) + memblock_reserve(_ALIGN_DOWN(__pa(initrd_start), PAGE_SIZE), + _ALIGN_UP(initrd_end, PAGE_SIZE) - + _ALIGN_DOWN(initrd_start, PAGE_SIZE)); +#endif /* CONFIG_BLK_DEV_INITRD */ + +#ifdef CONFIG_PPC32 + /* + * Handle the case where we might be booting from an old kexec + * image that setup the mem_rsvmap as pairs of 32-bit values + */ + if (*reserve_map > 0xffffffffull) { + u32 base_32, size_32; + u32 *reserve_map_32 = (u32 *)reserve_map; + + while (1) { + base_32 = *(reserve_map_32++); + size_32 = *(reserve_map_32++); + if (size_32 == 0) + break; + /* skip if the reservation is for the blob */ + if (base_32 == self_base && size_32 == self_size) + continue; + DBG("reserving: %x -> %x\n", base_32, size_32); + memblock_reserve(base_32, size_32); + } + return; + } +#endif + while (1) { + base = *(reserve_map++); + size = *(reserve_map++); + if (size == 0) + break; + DBG("reserving: %llx -> %llx\n", base, size); + memblock_reserve(base, size); + } +} + +void __init early_init_devtree(void *params) +{ + phys_addr_t limit; + + DBG(" -> early_init_devtree(%p)\n", params); + + /* Setup flat device-tree pointer */ + initial_boot_params = params; + +#ifdef CONFIG_PPC_RTAS + /* Some machines might need RTAS info for debugging, grab it now. */ + of_scan_flat_dt(early_init_dt_scan_rtas, NULL); +#endif + +#ifdef CONFIG_PPC_POWERNV + /* Some machines might need OPAL info for debugging, grab it now. */ + of_scan_flat_dt(early_init_dt_scan_opal, NULL); +#endif + +#ifdef CONFIG_FA_DUMP + /* scan tree to see if dump is active during last boot */ + of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL); +#endif + + /* Pre-initialize the cmd_line with the content of boot_commmand_line, + * which will be empty except when the content of the variable has + * been overriden by a bootloading mechanism. This happens typically + * with HAL takeover + */ + strlcpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE); + + /* Retrieve various informations from the /chosen node of the + * device-tree, including the platform type, initrd location and + * size, TCE reserve, and more ... + */ + of_scan_flat_dt(early_init_dt_scan_chosen_ppc, cmd_line); + + /* Scan memory nodes and rebuild MEMBLOCKs */ + of_scan_flat_dt(early_init_dt_scan_root, NULL); + of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); + + /* Save command line for /proc/cmdline and then parse parameters */ + strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE); + parse_early_param(); + + /* make sure we've parsed cmdline for mem= before this */ + if (memory_limit) + first_memblock_size = min(first_memblock_size, memory_limit); + setup_initial_memory_limit(memstart_addr, first_memblock_size); + /* Reserve MEMBLOCK regions used by kernel, initrd, dt, etc... */ + memblock_reserve(PHYSICAL_START, __pa(klimit) - PHYSICAL_START); + /* If relocatable, reserve first 32k for interrupt vectors etc. */ + if (PHYSICAL_START > MEMORY_START) + memblock_reserve(MEMORY_START, 0x8000); + reserve_kdump_trampoline(); +#ifdef CONFIG_FA_DUMP + /* + * If we fail to reserve memory for firmware-assisted dump then + * fallback to kexec based kdump. + */ + if (fadump_reserve_mem() == 0) +#endif + reserve_crashkernel(); + early_reserve_mem(); + + /* + * Ensure that total memory size is page-aligned, because otherwise + * mark_bootmem() gets upset. + */ + limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE); + memblock_enforce_memory_limit(limit); + + memblock_allow_resize(); + memblock_dump_all(); + + DBG("Phys. mem: %llx\n", memblock_phys_mem_size()); + + /* We may need to relocate the flat tree, do it now. + * FIXME .. and the initrd too? */ + move_device_tree(); + + allocate_pacas(); + + DBG("Scanning CPUs ...\n"); + + /* Retrieve CPU related informations from the flat tree + * (altivec support, boot CPU ID, ...) + */ + of_scan_flat_dt(early_init_dt_scan_cpus, NULL); + +#if defined(CONFIG_SMP) && defined(CONFIG_PPC64) + /* We'll later wait for secondaries to check in; there are + * NCPUS-1 non-boot CPUs :-) + */ + spinning_secondaries = boot_cpu_count - 1; +#endif + + DBG(" <- early_init_devtree()\n"); +} + +/******* + * + * New implementation of the OF "find" APIs, return a refcounted + * object, call of_node_put() when done. The device tree and list + * are protected by a rw_lock. + * + * Note that property management will need some locking as well, + * this isn't dealt with yet. + * + *******/ + +/** + * of_find_next_cache_node - Find a node's subsidiary cache + * @np: node of type "cpu" or "cache" + * + * Returns a node pointer with refcount incremented, use + * of_node_put() on it when done. Caller should hold a reference + * to np. + */ +struct device_node *of_find_next_cache_node(struct device_node *np) +{ + struct device_node *child; + const phandle *handle; + + handle = of_get_property(np, "l2-cache", NULL); + if (!handle) + handle = of_get_property(np, "next-level-cache", NULL); + + if (handle) + return of_find_node_by_phandle(*handle); + + /* OF on pmac has nodes instead of properties named "l2-cache" + * beneath CPU nodes. + */ + if (!strcmp(np->type, "cpu")) + for_each_child_of_node(np, child) + if (!strcmp(child->type, "cache")) + return child; + + return NULL; +} + +#ifdef CONFIG_PPC_PSERIES +/* + * Fix up the uninitialized fields in a new device node: + * name, type and pci-specific fields + */ + +static int of_finish_dynamic_node(struct device_node *node) +{ + struct device_node *parent = of_get_parent(node); + int err = 0; + const phandle *ibm_phandle; + + node->name = of_get_property(node, "name", NULL); + node->type = of_get_property(node, "device_type", NULL); + + if (!node->name) + node->name = "<NULL>"; + if (!node->type) + node->type = "<NULL>"; + + if (!parent) { + err = -ENODEV; + goto out; + } + + /* We don't support that function on PowerMac, at least + * not yet + */ + if (machine_is(powermac)) + return -ENODEV; + + /* fix up new node's phandle field */ + if ((ibm_phandle = of_get_property(node, "ibm,phandle", NULL))) + node->phandle = *ibm_phandle; + +out: + of_node_put(parent); + return err; +} + +static int prom_reconfig_notifier(struct notifier_block *nb, + unsigned long action, void *node) +{ + int err; + + switch (action) { + case PSERIES_RECONFIG_ADD: + err = of_finish_dynamic_node(node); + if (err < 0) + printk(KERN_ERR "finish_node returned %d\n", err); + break; + default: + err = 0; + break; + } + return notifier_from_errno(err); +} + +static struct notifier_block prom_reconfig_nb = { + .notifier_call = prom_reconfig_notifier, + .priority = 10, /* This one needs to run first */ +}; + +static int __init prom_reconfig_setup(void) +{ + return pSeries_reconfig_notifier_register(&prom_reconfig_nb); +} +__initcall(prom_reconfig_setup); +#endif + +/* Find the device node for a given logical cpu number, also returns the cpu + * local thread number (index in ibm,interrupt-server#s) if relevant and + * asked for (non NULL) + */ +struct device_node *of_get_cpu_node(int cpu, unsigned int *thread) +{ + int hardid; + struct device_node *np; + + hardid = get_hard_smp_processor_id(cpu); + + for_each_node_by_type(np, "cpu") { + const u32 *intserv; + unsigned int plen, t; + + /* Check for ibm,ppc-interrupt-server#s. If it doesn't exist + * fallback to "reg" property and assume no threads + */ + intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", + &plen); + if (intserv == NULL) { + const u32 *reg = of_get_property(np, "reg", NULL); + if (reg == NULL) + continue; + if (*reg == hardid) { + if (thread) + *thread = 0; + return np; + } + } else { + plen /= sizeof(u32); + for (t = 0; t < plen; t++) { + if (hardid == intserv[t]) { + if (thread) + *thread = t; + return np; + } + } + } + } + return NULL; +} +EXPORT_SYMBOL(of_get_cpu_node); + +#if defined(CONFIG_DEBUG_FS) && defined(DEBUG) +static struct debugfs_blob_wrapper flat_dt_blob; + +static int __init export_flat_device_tree(void) +{ + struct dentry *d; + + flat_dt_blob.data = initial_boot_params; + flat_dt_blob.size = initial_boot_params->totalsize; + + d = debugfs_create_blob("flat-device-tree", S_IFREG | S_IRUSR, + powerpc_debugfs_root, &flat_dt_blob); + if (!d) + return 1; + + return 0; +} +__initcall(export_flat_device_tree); +#endif diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c new file mode 100644 index 00000000..99860273 --- /dev/null +++ b/arch/powerpc/kernel/prom_init.c @@ -0,0 +1,2997 @@ +/* + * Procedures for interfacing to Open Firmware. + * + * Paul Mackerras August 1996. + * Copyright (C) 1996-2005 Paul Mackerras. + * + * Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner. + * {engebret|bergner}@us.ibm.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG_PROM + +#include <stdarg.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/threads.h> +#include <linux/spinlock.h> +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/proc_fs.h> +#include <linux/stringify.h> +#include <linux/delay.h> +#include <linux/initrd.h> +#include <linux/bitops.h> +#include <asm/prom.h> +#include <asm/rtas.h> +#include <asm/page.h> +#include <asm/processor.h> +#include <asm/irq.h> +#include <asm/io.h> +#include <asm/smp.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/pci.h> +#include <asm/iommu.h> +#include <asm/btext.h> +#include <asm/sections.h> +#include <asm/machdep.h> +#include <asm/opal.h> + +#include <linux/linux_logo.h> + +/* + * Eventually bump that one up + */ +#define DEVTREE_CHUNK_SIZE 0x100000 + +/* + * This is the size of the local memory reserve map that gets copied + * into the boot params passed to the kernel. That size is totally + * flexible as the kernel just reads the list until it encounters an + * entry with size 0, so it can be changed without breaking binary + * compatibility + */ +#define MEM_RESERVE_MAP_SIZE 8 + +/* + * prom_init() is called very early on, before the kernel text + * and data have been mapped to KERNELBASE. At this point the code + * is running at whatever address it has been loaded at. + * On ppc32 we compile with -mrelocatable, which means that references + * to extern and static variables get relocated automatically. + * On ppc64 we have to relocate the references explicitly with + * RELOC. (Note that strings count as static variables.) + * + * Because OF may have mapped I/O devices into the area starting at + * KERNELBASE, particularly on CHRP machines, we can't safely call + * OF once the kernel has been mapped to KERNELBASE. Therefore all + * OF calls must be done within prom_init(). + * + * ADDR is used in calls to call_prom. The 4th and following + * arguments to call_prom should be 32-bit values. + * On ppc64, 64 bit values are truncated to 32 bits (and + * fortunately don't get interpreted as two arguments). + */ +#ifdef CONFIG_PPC64 +#define RELOC(x) (*PTRRELOC(&(x))) +#define ADDR(x) (u32) add_reloc_offset((unsigned long)(x)) +#define OF_WORKAROUNDS 0 +#else +#define RELOC(x) (x) +#define ADDR(x) (u32) (x) +#define OF_WORKAROUNDS of_workarounds +int of_workarounds; +#endif + +#define OF_WA_CLAIM 1 /* do phys/virt claim separately, then map */ +#define OF_WA_LONGTRAIL 2 /* work around longtrail bugs */ + +#define PROM_BUG() do { \ + prom_printf("kernel BUG at %s line 0x%x!\n", \ + RELOC(__FILE__), __LINE__); \ + __asm__ __volatile__(".long " BUG_ILLEGAL_INSTR); \ +} while (0) + +#ifdef DEBUG_PROM +#define prom_debug(x...) prom_printf(x) +#else +#define prom_debug(x...) +#endif + + +typedef u32 prom_arg_t; + +struct prom_args { + u32 service; + u32 nargs; + u32 nret; + prom_arg_t args[10]; +}; + +struct prom_t { + ihandle root; + phandle chosen; + int cpu; + ihandle stdout; + ihandle mmumap; + ihandle memory; +}; + +struct mem_map_entry { + u64 base; + u64 size; +}; + +typedef u32 cell_t; + +extern void __start(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7, unsigned long r8, + unsigned long r9); + +#ifdef CONFIG_PPC64 +extern int enter_prom(struct prom_args *args, unsigned long entry); +#else +static inline int enter_prom(struct prom_args *args, unsigned long entry) +{ + return ((int (*)(struct prom_args *))entry)(args); +} +#endif + +extern void copy_and_flush(unsigned long dest, unsigned long src, + unsigned long size, unsigned long offset); + +/* prom structure */ +static struct prom_t __initdata prom; + +static unsigned long prom_entry __initdata; + +#define PROM_SCRATCH_SIZE 256 + +static char __initdata of_stdout_device[256]; +static char __initdata prom_scratch[PROM_SCRATCH_SIZE]; + +static unsigned long __initdata dt_header_start; +static unsigned long __initdata dt_struct_start, dt_struct_end; +static unsigned long __initdata dt_string_start, dt_string_end; + +static unsigned long __initdata prom_initrd_start, prom_initrd_end; + +#ifdef CONFIG_PPC64 +static int __initdata prom_iommu_force_on; +static int __initdata prom_iommu_off; +static unsigned long __initdata prom_tce_alloc_start; +static unsigned long __initdata prom_tce_alloc_end; +#endif + +/* Platforms codes are now obsolete in the kernel. Now only used within this + * file and ultimately gone too. Feel free to change them if you need, they + * are not shared with anything outside of this file anymore + */ +#define PLATFORM_PSERIES 0x0100 +#define PLATFORM_PSERIES_LPAR 0x0101 +#define PLATFORM_LPAR 0x0001 +#define PLATFORM_POWERMAC 0x0400 +#define PLATFORM_GENERIC 0x0500 +#define PLATFORM_OPAL 0x0600 + +static int __initdata of_platform; + +static char __initdata prom_cmd_line[COMMAND_LINE_SIZE]; + +static unsigned long __initdata prom_memory_limit; + +static unsigned long __initdata alloc_top; +static unsigned long __initdata alloc_top_high; +static unsigned long __initdata alloc_bottom; +static unsigned long __initdata rmo_top; +static unsigned long __initdata ram_top; + +static struct mem_map_entry __initdata mem_reserve_map[MEM_RESERVE_MAP_SIZE]; +static int __initdata mem_reserve_cnt; + +static cell_t __initdata regbuf[1024]; + + +/* + * Error results ... some OF calls will return "-1" on error, some + * will return 0, some will return either. To simplify, here are + * macros to use with any ihandle or phandle return value to check if + * it is valid + */ + +#define PROM_ERROR (-1u) +#define PHANDLE_VALID(p) ((p) != 0 && (p) != PROM_ERROR) +#define IHANDLE_VALID(i) ((i) != 0 && (i) != PROM_ERROR) + + +/* This is the one and *ONLY* place where we actually call open + * firmware. + */ + +static int __init call_prom(const char *service, int nargs, int nret, ...) +{ + int i; + struct prom_args args; + va_list list; + + args.service = ADDR(service); + args.nargs = nargs; + args.nret = nret; + + va_start(list, nret); + for (i = 0; i < nargs; i++) + args.args[i] = va_arg(list, prom_arg_t); + va_end(list); + + for (i = 0; i < nret; i++) + args.args[nargs+i] = 0; + + if (enter_prom(&args, RELOC(prom_entry)) < 0) + return PROM_ERROR; + + return (nret > 0) ? args.args[nargs] : 0; +} + +static int __init call_prom_ret(const char *service, int nargs, int nret, + prom_arg_t *rets, ...) +{ + int i; + struct prom_args args; + va_list list; + + args.service = ADDR(service); + args.nargs = nargs; + args.nret = nret; + + va_start(list, rets); + for (i = 0; i < nargs; i++) + args.args[i] = va_arg(list, prom_arg_t); + va_end(list); + + for (i = 0; i < nret; i++) + args.args[nargs+i] = 0; + + if (enter_prom(&args, RELOC(prom_entry)) < 0) + return PROM_ERROR; + + if (rets != NULL) + for (i = 1; i < nret; ++i) + rets[i-1] = args.args[nargs+i]; + + return (nret > 0) ? args.args[nargs] : 0; +} + + +static void __init prom_print(const char *msg) +{ + const char *p, *q; + struct prom_t *_prom = &RELOC(prom); + + if (_prom->stdout == 0) + return; + + for (p = msg; *p != 0; p = q) { + for (q = p; *q != 0 && *q != '\n'; ++q) + ; + if (q > p) + call_prom("write", 3, 1, _prom->stdout, p, q - p); + if (*q == 0) + break; + ++q; + call_prom("write", 3, 1, _prom->stdout, ADDR("\r\n"), 2); + } +} + + +static void __init prom_print_hex(unsigned long val) +{ + int i, nibbles = sizeof(val)*2; + char buf[sizeof(val)*2+1]; + struct prom_t *_prom = &RELOC(prom); + + for (i = nibbles-1; i >= 0; i--) { + buf[i] = (val & 0xf) + '0'; + if (buf[i] > '9') + buf[i] += ('a'-'0'-10); + val >>= 4; + } + buf[nibbles] = '\0'; + call_prom("write", 3, 1, _prom->stdout, buf, nibbles); +} + +/* max number of decimal digits in an unsigned long */ +#define UL_DIGITS 21 +static void __init prom_print_dec(unsigned long val) +{ + int i, size; + char buf[UL_DIGITS+1]; + struct prom_t *_prom = &RELOC(prom); + + for (i = UL_DIGITS-1; i >= 0; i--) { + buf[i] = (val % 10) + '0'; + val = val/10; + if (val == 0) + break; + } + /* shift stuff down */ + size = UL_DIGITS - i; + call_prom("write", 3, 1, _prom->stdout, buf+i, size); +} + +static void __init prom_printf(const char *format, ...) +{ + const char *p, *q, *s; + va_list args; + unsigned long v; + long vs; + struct prom_t *_prom = &RELOC(prom); + + va_start(args, format); +#ifdef CONFIG_PPC64 + format = PTRRELOC(format); +#endif + for (p = format; *p != 0; p = q) { + for (q = p; *q != 0 && *q != '\n' && *q != '%'; ++q) + ; + if (q > p) + call_prom("write", 3, 1, _prom->stdout, p, q - p); + if (*q == 0) + break; + if (*q == '\n') { + ++q; + call_prom("write", 3, 1, _prom->stdout, + ADDR("\r\n"), 2); + continue; + } + ++q; + if (*q == 0) + break; + switch (*q) { + case 's': + ++q; + s = va_arg(args, const char *); + prom_print(s); + break; + case 'x': + ++q; + v = va_arg(args, unsigned long); + prom_print_hex(v); + break; + case 'd': + ++q; + vs = va_arg(args, int); + if (vs < 0) { + prom_print(RELOC("-")); + vs = -vs; + } + prom_print_dec(vs); + break; + case 'l': + ++q; + if (*q == 0) + break; + else if (*q == 'x') { + ++q; + v = va_arg(args, unsigned long); + prom_print_hex(v); + } else if (*q == 'u') { /* '%lu' */ + ++q; + v = va_arg(args, unsigned long); + prom_print_dec(v); + } else if (*q == 'd') { /* %ld */ + ++q; + vs = va_arg(args, long); + if (vs < 0) { + prom_print(RELOC("-")); + vs = -vs; + } + prom_print_dec(vs); + } + break; + } + } +} + + +static unsigned int __init prom_claim(unsigned long virt, unsigned long size, + unsigned long align) +{ + struct prom_t *_prom = &RELOC(prom); + + if (align == 0 && (OF_WORKAROUNDS & OF_WA_CLAIM)) { + /* + * Old OF requires we claim physical and virtual separately + * and then map explicitly (assuming virtual mode) + */ + int ret; + prom_arg_t result; + + ret = call_prom_ret("call-method", 5, 2, &result, + ADDR("claim"), _prom->memory, + align, size, virt); + if (ret != 0 || result == -1) + return -1; + ret = call_prom_ret("call-method", 5, 2, &result, + ADDR("claim"), _prom->mmumap, + align, size, virt); + if (ret != 0) { + call_prom("call-method", 4, 1, ADDR("release"), + _prom->memory, size, virt); + return -1; + } + /* the 0x12 is M (coherence) + PP == read/write */ + call_prom("call-method", 6, 1, + ADDR("map"), _prom->mmumap, 0x12, size, virt, virt); + return virt; + } + return call_prom("claim", 3, 1, (prom_arg_t)virt, (prom_arg_t)size, + (prom_arg_t)align); +} + +static void __init __attribute__((noreturn)) prom_panic(const char *reason) +{ +#ifdef CONFIG_PPC64 + reason = PTRRELOC(reason); +#endif + prom_print(reason); + /* Do not call exit because it clears the screen on pmac + * it also causes some sort of double-fault on early pmacs */ + if (RELOC(of_platform) == PLATFORM_POWERMAC) + asm("trap\n"); + + /* ToDo: should put up an SRC here on pSeries */ + call_prom("exit", 0, 0); + + for (;;) /* should never get here */ + ; +} + + +static int __init prom_next_node(phandle *nodep) +{ + phandle node; + + if ((node = *nodep) != 0 + && (*nodep = call_prom("child", 1, 1, node)) != 0) + return 1; + if ((*nodep = call_prom("peer", 1, 1, node)) != 0) + return 1; + for (;;) { + if ((node = call_prom("parent", 1, 1, node)) == 0) + return 0; + if ((*nodep = call_prom("peer", 1, 1, node)) != 0) + return 1; + } +} + +static int inline prom_getprop(phandle node, const char *pname, + void *value, size_t valuelen) +{ + return call_prom("getprop", 4, 1, node, ADDR(pname), + (u32)(unsigned long) value, (u32) valuelen); +} + +static int inline prom_getproplen(phandle node, const char *pname) +{ + return call_prom("getproplen", 2, 1, node, ADDR(pname)); +} + +static void add_string(char **str, const char *q) +{ + char *p = *str; + + while (*q) + *p++ = *q++; + *p++ = ' '; + *str = p; +} + +static char *tohex(unsigned int x) +{ + static char digits[] = "0123456789abcdef"; + static char result[9]; + int i; + + result[8] = 0; + i = 8; + do { + --i; + result[i] = digits[x & 0xf]; + x >>= 4; + } while (x != 0 && i > 0); + return &result[i]; +} + +static int __init prom_setprop(phandle node, const char *nodename, + const char *pname, void *value, size_t valuelen) +{ + char cmd[256], *p; + + if (!(OF_WORKAROUNDS & OF_WA_LONGTRAIL)) + return call_prom("setprop", 4, 1, node, ADDR(pname), + (u32)(unsigned long) value, (u32) valuelen); + + /* gah... setprop doesn't work on longtrail, have to use interpret */ + p = cmd; + add_string(&p, "dev"); + add_string(&p, nodename); + add_string(&p, tohex((u32)(unsigned long) value)); + add_string(&p, tohex(valuelen)); + add_string(&p, tohex(ADDR(pname))); + add_string(&p, tohex(strlen(RELOC(pname)))); + add_string(&p, "property"); + *p = 0; + return call_prom("interpret", 1, 1, (u32)(unsigned long) cmd); +} + +/* We can't use the standard versions because of RELOC headaches. */ +#define isxdigit(c) (('0' <= (c) && (c) <= '9') \ + || ('a' <= (c) && (c) <= 'f') \ + || ('A' <= (c) && (c) <= 'F')) + +#define isdigit(c) ('0' <= (c) && (c) <= '9') +#define islower(c) ('a' <= (c) && (c) <= 'z') +#define toupper(c) (islower(c) ? ((c) - 'a' + 'A') : (c)) + +unsigned long prom_strtoul(const char *cp, const char **endp) +{ + unsigned long result = 0, base = 10, value; + + if (*cp == '0') { + base = 8; + cp++; + if (toupper(*cp) == 'X') { + cp++; + base = 16; + } + } + + while (isxdigit(*cp) && + (value = isdigit(*cp) ? *cp - '0' : toupper(*cp) - 'A' + 10) < base) { + result = result * base + value; + cp++; + } + + if (endp) + *endp = cp; + + return result; +} + +unsigned long prom_memparse(const char *ptr, const char **retptr) +{ + unsigned long ret = prom_strtoul(ptr, retptr); + int shift = 0; + + /* + * We can't use a switch here because GCC *may* generate a + * jump table which won't work, because we're not running at + * the address we're linked at. + */ + if ('G' == **retptr || 'g' == **retptr) + shift = 30; + + if ('M' == **retptr || 'm' == **retptr) + shift = 20; + + if ('K' == **retptr || 'k' == **retptr) + shift = 10; + + if (shift) { + ret <<= shift; + (*retptr)++; + } + + return ret; +} + +/* + * Early parsing of the command line passed to the kernel, used for + * "mem=x" and the options that affect the iommu + */ +static void __init early_cmdline_parse(void) +{ + struct prom_t *_prom = &RELOC(prom); + const char *opt; + + char *p; + int l = 0; + + RELOC(prom_cmd_line[0]) = 0; + p = RELOC(prom_cmd_line); + if ((long)_prom->chosen > 0) + l = prom_getprop(_prom->chosen, "bootargs", p, COMMAND_LINE_SIZE-1); +#ifdef CONFIG_CMDLINE + if (l <= 0 || p[0] == '\0') /* dbl check */ + strlcpy(RELOC(prom_cmd_line), + RELOC(CONFIG_CMDLINE), sizeof(prom_cmd_line)); +#endif /* CONFIG_CMDLINE */ + prom_printf("command line: %s\n", RELOC(prom_cmd_line)); + +#ifdef CONFIG_PPC64 + opt = strstr(RELOC(prom_cmd_line), RELOC("iommu=")); + if (opt) { + prom_printf("iommu opt is: %s\n", opt); + opt += 6; + while (*opt && *opt == ' ') + opt++; + if (!strncmp(opt, RELOC("off"), 3)) + RELOC(prom_iommu_off) = 1; + else if (!strncmp(opt, RELOC("force"), 5)) + RELOC(prom_iommu_force_on) = 1; + } +#endif + opt = strstr(RELOC(prom_cmd_line), RELOC("mem=")); + if (opt) { + opt += 4; + RELOC(prom_memory_limit) = prom_memparse(opt, (const char **)&opt); +#ifdef CONFIG_PPC64 + /* Align to 16 MB == size of ppc64 large page */ + RELOC(prom_memory_limit) = ALIGN(RELOC(prom_memory_limit), 0x1000000); +#endif + } +} + +#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) +/* + * There are two methods for telling firmware what our capabilities are. + * Newer machines have an "ibm,client-architecture-support" method on the + * root node. For older machines, we have to call the "process-elf-header" + * method in the /packages/elf-loader node, passing it a fake 32-bit + * ELF header containing a couple of PT_NOTE sections that contain + * structures that contain various information. + */ + +/* + * New method - extensible architecture description vector. + * + * Because the description vector contains a mix of byte and word + * values, we declare it as an unsigned char array, and use this + * macro to put word values in. + */ +#define W(x) ((x) >> 24) & 0xff, ((x) >> 16) & 0xff, \ + ((x) >> 8) & 0xff, (x) & 0xff + +/* Option vector bits - generic bits in byte 1 */ +#define OV_IGNORE 0x80 /* ignore this vector */ +#define OV_CESSATION_POLICY 0x40 /* halt if unsupported option present*/ + +/* Option vector 1: processor architectures supported */ +#define OV1_PPC_2_00 0x80 /* set if we support PowerPC 2.00 */ +#define OV1_PPC_2_01 0x40 /* set if we support PowerPC 2.01 */ +#define OV1_PPC_2_02 0x20 /* set if we support PowerPC 2.02 */ +#define OV1_PPC_2_03 0x10 /* set if we support PowerPC 2.03 */ +#define OV1_PPC_2_04 0x08 /* set if we support PowerPC 2.04 */ +#define OV1_PPC_2_05 0x04 /* set if we support PowerPC 2.05 */ +#define OV1_PPC_2_06 0x02 /* set if we support PowerPC 2.06 */ + +/* Option vector 2: Open Firmware options supported */ +#define OV2_REAL_MODE 0x20 /* set if we want OF in real mode */ + +/* Option vector 3: processor options supported */ +#define OV3_FP 0x80 /* floating point */ +#define OV3_VMX 0x40 /* VMX/Altivec */ +#define OV3_DFP 0x20 /* decimal FP */ + +/* Option vector 5: PAPR/OF options supported */ +#define OV5_LPAR 0x80 /* logical partitioning supported */ +#define OV5_SPLPAR 0x40 /* shared-processor LPAR supported */ +/* ibm,dynamic-reconfiguration-memory property supported */ +#define OV5_DRCONF_MEMORY 0x20 +#define OV5_LARGE_PAGES 0x10 /* large pages supported */ +#define OV5_DONATE_DEDICATE_CPU 0x02 /* donate dedicated CPU support */ +/* PCIe/MSI support. Without MSI full PCIe is not supported */ +#ifdef CONFIG_PCI_MSI +#define OV5_MSI 0x01 /* PCIe/MSI support */ +#else +#define OV5_MSI 0x00 +#endif /* CONFIG_PCI_MSI */ +#ifdef CONFIG_PPC_SMLPAR +#define OV5_CMO 0x80 /* Cooperative Memory Overcommitment */ +#define OV5_XCMO 0x40 /* Page Coalescing */ +#else +#define OV5_CMO 0x00 +#define OV5_XCMO 0x00 +#endif +#define OV5_TYPE1_AFFINITY 0x80 /* Type 1 NUMA affinity */ + +/* Option Vector 6: IBM PAPR hints */ +#define OV6_LINUX 0x02 /* Linux is our OS */ + +/* + * The architecture vector has an array of PVR mask/value pairs, + * followed by # option vectors - 1, followed by the option vectors. + */ +static unsigned char ibm_architecture_vec[] = { + W(0xfffe0000), W(0x003a0000), /* POWER5/POWER5+ */ + W(0xffff0000), W(0x003e0000), /* POWER6 */ + W(0xffff0000), W(0x003f0000), /* POWER7 */ + W(0xffffffff), W(0x0f000003), /* all 2.06-compliant */ + W(0xffffffff), W(0x0f000002), /* all 2.05-compliant */ + W(0xfffffffe), W(0x0f000001), /* all 2.04-compliant and earlier */ + 6 - 1, /* 6 option vectors */ + + /* option vector 1: processor architectures supported */ + 3 - 2, /* length */ + 0, /* don't ignore, don't halt */ + OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 | + OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06, + + /* option vector 2: Open Firmware options supported */ + 34 - 2, /* length */ + OV2_REAL_MODE, + 0, 0, + W(0xffffffff), /* real_base */ + W(0xffffffff), /* real_size */ + W(0xffffffff), /* virt_base */ + W(0xffffffff), /* virt_size */ + W(0xffffffff), /* load_base */ + W(256), /* 256MB min RMA */ + W(0xffffffff), /* full client load */ + 0, /* min RMA percentage of total RAM */ + 48, /* max log_2(hash table size) */ + + /* option vector 3: processor options supported */ + 3 - 2, /* length */ + 0, /* don't ignore, don't halt */ + OV3_FP | OV3_VMX | OV3_DFP, + + /* option vector 4: IBM PAPR implementation */ + 2 - 2, /* length */ + 0, /* don't halt */ + + /* option vector 5: PAPR/OF options */ + 13 - 2, /* length */ + 0, /* don't ignore, don't halt */ + OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY | + OV5_DONATE_DEDICATE_CPU | OV5_MSI, + 0, + OV5_CMO | OV5_XCMO, + OV5_TYPE1_AFFINITY, + 0, + 0, + 0, + /* WARNING: The offset of the "number of cores" field below + * must match by the macro below. Update the definition if + * the structure layout changes. + */ +#define IBM_ARCH_VEC_NRCORES_OFFSET 100 + W(NR_CPUS), /* number of cores supported */ + + /* option vector 6: IBM PAPR hints */ + 4 - 2, /* length */ + 0, + 0, + OV6_LINUX, + +}; + +/* Old method - ELF header with PT_NOTE sections */ +static struct fake_elf { + Elf32_Ehdr elfhdr; + Elf32_Phdr phdr[2]; + struct chrpnote { + u32 namesz; + u32 descsz; + u32 type; + char name[8]; /* "PowerPC" */ + struct chrpdesc { + u32 real_mode; + u32 real_base; + u32 real_size; + u32 virt_base; + u32 virt_size; + u32 load_base; + } chrpdesc; + } chrpnote; + struct rpanote { + u32 namesz; + u32 descsz; + u32 type; + char name[24]; /* "IBM,RPA-Client-Config" */ + struct rpadesc { + u32 lpar_affinity; + u32 min_rmo_size; + u32 min_rmo_percent; + u32 max_pft_size; + u32 splpar; + u32 min_load; + u32 new_mem_def; + u32 ignore_me; + } rpadesc; + } rpanote; +} fake_elf = { + .elfhdr = { + .e_ident = { 0x7f, 'E', 'L', 'F', + ELFCLASS32, ELFDATA2MSB, EV_CURRENT }, + .e_type = ET_EXEC, /* yeah right */ + .e_machine = EM_PPC, + .e_version = EV_CURRENT, + .e_phoff = offsetof(struct fake_elf, phdr), + .e_phentsize = sizeof(Elf32_Phdr), + .e_phnum = 2 + }, + .phdr = { + [0] = { + .p_type = PT_NOTE, + .p_offset = offsetof(struct fake_elf, chrpnote), + .p_filesz = sizeof(struct chrpnote) + }, [1] = { + .p_type = PT_NOTE, + .p_offset = offsetof(struct fake_elf, rpanote), + .p_filesz = sizeof(struct rpanote) + } + }, + .chrpnote = { + .namesz = sizeof("PowerPC"), + .descsz = sizeof(struct chrpdesc), + .type = 0x1275, + .name = "PowerPC", + .chrpdesc = { + .real_mode = ~0U, /* ~0 means "don't care" */ + .real_base = ~0U, + .real_size = ~0U, + .virt_base = ~0U, + .virt_size = ~0U, + .load_base = ~0U + }, + }, + .rpanote = { + .namesz = sizeof("IBM,RPA-Client-Config"), + .descsz = sizeof(struct rpadesc), + .type = 0x12759999, + .name = "IBM,RPA-Client-Config", + .rpadesc = { + .lpar_affinity = 0, + .min_rmo_size = 64, /* in megabytes */ + .min_rmo_percent = 0, + .max_pft_size = 48, /* 2^48 bytes max PFT size */ + .splpar = 1, + .min_load = ~0U, + .new_mem_def = 0 + } + } +}; + +static int __init prom_count_smt_threads(void) +{ + phandle node; + char type[64]; + unsigned int plen; + + /* Pick up th first CPU node we can find */ + for (node = 0; prom_next_node(&node); ) { + type[0] = 0; + prom_getprop(node, "device_type", type, sizeof(type)); + + if (strcmp(type, RELOC("cpu"))) + continue; + /* + * There is an entry for each smt thread, each entry being + * 4 bytes long. All cpus should have the same number of + * smt threads, so return after finding the first. + */ + plen = prom_getproplen(node, "ibm,ppc-interrupt-server#s"); + if (plen == PROM_ERROR) + break; + plen >>= 2; + prom_debug("Found %lu smt threads per core\n", (unsigned long)plen); + + /* Sanity check */ + if (plen < 1 || plen > 64) { + prom_printf("Threads per core %lu out of bounds, assuming 1\n", + (unsigned long)plen); + return 1; + } + return plen; + } + prom_debug("No threads found, assuming 1 per core\n"); + + return 1; + +} + + +static void __init prom_send_capabilities(void) +{ + ihandle elfloader, root; + prom_arg_t ret; + u32 *cores; + + root = call_prom("open", 1, 1, ADDR("/")); + if (root != 0) { + /* We need to tell the FW about the number of cores we support. + * + * To do that, we count the number of threads on the first core + * (we assume this is the same for all cores) and use it to + * divide NR_CPUS. + */ + cores = (u32 *)PTRRELOC(&ibm_architecture_vec[IBM_ARCH_VEC_NRCORES_OFFSET]); + if (*cores != NR_CPUS) { + prom_printf("WARNING ! " + "ibm_architecture_vec structure inconsistent: %lu!\n", + *cores); + } else { + *cores = DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads()); + prom_printf("Max number of cores passed to firmware: %lu (NR_CPUS = %lu)\n", + *cores, NR_CPUS); + } + + /* try calling the ibm,client-architecture-support method */ + prom_printf("Calling ibm,client-architecture-support..."); + if (call_prom_ret("call-method", 3, 2, &ret, + ADDR("ibm,client-architecture-support"), + root, + ADDR(ibm_architecture_vec)) == 0) { + /* the call exists... */ + if (ret) + prom_printf("\nWARNING: ibm,client-architecture" + "-support call FAILED!\n"); + call_prom("close", 1, 0, root); + prom_printf(" done\n"); + return; + } + call_prom("close", 1, 0, root); + prom_printf(" not implemented\n"); + } + + /* no ibm,client-architecture-support call, try the old way */ + elfloader = call_prom("open", 1, 1, ADDR("/packages/elf-loader")); + if (elfloader == 0) { + prom_printf("couldn't open /packages/elf-loader\n"); + return; + } + call_prom("call-method", 3, 1, ADDR("process-elf-header"), + elfloader, ADDR(&fake_elf)); + call_prom("close", 1, 0, elfloader); +} +#endif + +/* + * Memory allocation strategy... our layout is normally: + * + * at 14Mb or more we have vmlinux, then a gap and initrd. In some + * rare cases, initrd might end up being before the kernel though. + * We assume this won't override the final kernel at 0, we have no + * provision to handle that in this version, but it should hopefully + * never happen. + * + * alloc_top is set to the top of RMO, eventually shrink down if the + * TCEs overlap + * + * alloc_bottom is set to the top of kernel/initrd + * + * from there, allocations are done this way : rtas is allocated + * topmost, and the device-tree is allocated from the bottom. We try + * to grow the device-tree allocation as we progress. If we can't, + * then we fail, we don't currently have a facility to restart + * elsewhere, but that shouldn't be necessary. + * + * Note that calls to reserve_mem have to be done explicitly, memory + * allocated with either alloc_up or alloc_down isn't automatically + * reserved. + */ + + +/* + * Allocates memory in the RMO upward from the kernel/initrd + * + * When align is 0, this is a special case, it means to allocate in place + * at the current location of alloc_bottom or fail (that is basically + * extending the previous allocation). Used for the device-tree flattening + */ +static unsigned long __init alloc_up(unsigned long size, unsigned long align) +{ + unsigned long base = RELOC(alloc_bottom); + unsigned long addr = 0; + + if (align) + base = _ALIGN_UP(base, align); + prom_debug("alloc_up(%x, %x)\n", size, align); + if (RELOC(ram_top) == 0) + prom_panic("alloc_up() called with mem not initialized\n"); + + if (align) + base = _ALIGN_UP(RELOC(alloc_bottom), align); + else + base = RELOC(alloc_bottom); + + for(; (base + size) <= RELOC(alloc_top); + base = _ALIGN_UP(base + 0x100000, align)) { + prom_debug(" trying: 0x%x\n\r", base); + addr = (unsigned long)prom_claim(base, size, 0); + if (addr != PROM_ERROR && addr != 0) + break; + addr = 0; + if (align == 0) + break; + } + if (addr == 0) + return 0; + RELOC(alloc_bottom) = addr + size; + + prom_debug(" -> %x\n", addr); + prom_debug(" alloc_bottom : %x\n", RELOC(alloc_bottom)); + prom_debug(" alloc_top : %x\n", RELOC(alloc_top)); + prom_debug(" alloc_top_hi : %x\n", RELOC(alloc_top_high)); + prom_debug(" rmo_top : %x\n", RELOC(rmo_top)); + prom_debug(" ram_top : %x\n", RELOC(ram_top)); + + return addr; +} + +/* + * Allocates memory downward, either from top of RMO, or if highmem + * is set, from the top of RAM. Note that this one doesn't handle + * failures. It does claim memory if highmem is not set. + */ +static unsigned long __init alloc_down(unsigned long size, unsigned long align, + int highmem) +{ + unsigned long base, addr = 0; + + prom_debug("alloc_down(%x, %x, %s)\n", size, align, + highmem ? RELOC("(high)") : RELOC("(low)")); + if (RELOC(ram_top) == 0) + prom_panic("alloc_down() called with mem not initialized\n"); + + if (highmem) { + /* Carve out storage for the TCE table. */ + addr = _ALIGN_DOWN(RELOC(alloc_top_high) - size, align); + if (addr <= RELOC(alloc_bottom)) + return 0; + /* Will we bump into the RMO ? If yes, check out that we + * didn't overlap existing allocations there, if we did, + * we are dead, we must be the first in town ! + */ + if (addr < RELOC(rmo_top)) { + /* Good, we are first */ + if (RELOC(alloc_top) == RELOC(rmo_top)) + RELOC(alloc_top) = RELOC(rmo_top) = addr; + else + return 0; + } + RELOC(alloc_top_high) = addr; + goto bail; + } + + base = _ALIGN_DOWN(RELOC(alloc_top) - size, align); + for (; base > RELOC(alloc_bottom); + base = _ALIGN_DOWN(base - 0x100000, align)) { + prom_debug(" trying: 0x%x\n\r", base); + addr = (unsigned long)prom_claim(base, size, 0); + if (addr != PROM_ERROR && addr != 0) + break; + addr = 0; + } + if (addr == 0) + return 0; + RELOC(alloc_top) = addr; + + bail: + prom_debug(" -> %x\n", addr); + prom_debug(" alloc_bottom : %x\n", RELOC(alloc_bottom)); + prom_debug(" alloc_top : %x\n", RELOC(alloc_top)); + prom_debug(" alloc_top_hi : %x\n", RELOC(alloc_top_high)); + prom_debug(" rmo_top : %x\n", RELOC(rmo_top)); + prom_debug(" ram_top : %x\n", RELOC(ram_top)); + + return addr; +} + +/* + * Parse a "reg" cell + */ +static unsigned long __init prom_next_cell(int s, cell_t **cellp) +{ + cell_t *p = *cellp; + unsigned long r = 0; + + /* Ignore more than 2 cells */ + while (s > sizeof(unsigned long) / 4) { + p++; + s--; + } + r = *p++; +#ifdef CONFIG_PPC64 + if (s > 1) { + r <<= 32; + r |= *(p++); + } +#endif + *cellp = p; + return r; +} + +/* + * Very dumb function for adding to the memory reserve list, but + * we don't need anything smarter at this point + * + * XXX Eventually check for collisions. They should NEVER happen. + * If problems seem to show up, it would be a good start to track + * them down. + */ +static void __init reserve_mem(u64 base, u64 size) +{ + u64 top = base + size; + unsigned long cnt = RELOC(mem_reserve_cnt); + + if (size == 0) + return; + + /* We need to always keep one empty entry so that we + * have our terminator with "size" set to 0 since we are + * dumb and just copy this entire array to the boot params + */ + base = _ALIGN_DOWN(base, PAGE_SIZE); + top = _ALIGN_UP(top, PAGE_SIZE); + size = top - base; + + if (cnt >= (MEM_RESERVE_MAP_SIZE - 1)) + prom_panic("Memory reserve map exhausted !\n"); + RELOC(mem_reserve_map)[cnt].base = base; + RELOC(mem_reserve_map)[cnt].size = size; + RELOC(mem_reserve_cnt) = cnt + 1; +} + +/* + * Initialize memory allocation mechanism, parse "memory" nodes and + * obtain that way the top of memory and RMO to setup out local allocator + */ +static void __init prom_init_mem(void) +{ + phandle node; + char *path, type[64]; + unsigned int plen; + cell_t *p, *endp; + struct prom_t *_prom = &RELOC(prom); + u32 rac, rsc; + + /* + * We iterate the memory nodes to find + * 1) top of RMO (first node) + * 2) top of memory + */ + rac = 2; + prom_getprop(_prom->root, "#address-cells", &rac, sizeof(rac)); + rsc = 1; + prom_getprop(_prom->root, "#size-cells", &rsc, sizeof(rsc)); + prom_debug("root_addr_cells: %x\n", (unsigned long) rac); + prom_debug("root_size_cells: %x\n", (unsigned long) rsc); + + prom_debug("scanning memory:\n"); + path = RELOC(prom_scratch); + + for (node = 0; prom_next_node(&node); ) { + type[0] = 0; + prom_getprop(node, "device_type", type, sizeof(type)); + + if (type[0] == 0) { + /* + * CHRP Longtrail machines have no device_type + * on the memory node, so check the name instead... + */ + prom_getprop(node, "name", type, sizeof(type)); + } + if (strcmp(type, RELOC("memory"))) + continue; + + plen = prom_getprop(node, "reg", RELOC(regbuf), sizeof(regbuf)); + if (plen > sizeof(regbuf)) { + prom_printf("memory node too large for buffer !\n"); + plen = sizeof(regbuf); + } + p = RELOC(regbuf); + endp = p + (plen / sizeof(cell_t)); + +#ifdef DEBUG_PROM + memset(path, 0, PROM_SCRATCH_SIZE); + call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1); + prom_debug(" node %s :\n", path); +#endif /* DEBUG_PROM */ + + while ((endp - p) >= (rac + rsc)) { + unsigned long base, size; + + base = prom_next_cell(rac, &p); + size = prom_next_cell(rsc, &p); + + if (size == 0) + continue; + prom_debug(" %x %x\n", base, size); + if (base == 0 && (RELOC(of_platform) & PLATFORM_LPAR)) + RELOC(rmo_top) = size; + if ((base + size) > RELOC(ram_top)) + RELOC(ram_top) = base + size; + } + } + + RELOC(alloc_bottom) = PAGE_ALIGN((unsigned long)&RELOC(_end) + 0x4000); + + /* + * If prom_memory_limit is set we reduce the upper limits *except* for + * alloc_top_high. This must be the real top of RAM so we can put + * TCE's up there. + */ + + RELOC(alloc_top_high) = RELOC(ram_top); + + if (RELOC(prom_memory_limit)) { + if (RELOC(prom_memory_limit) <= RELOC(alloc_bottom)) { + prom_printf("Ignoring mem=%x <= alloc_bottom.\n", + RELOC(prom_memory_limit)); + RELOC(prom_memory_limit) = 0; + } else if (RELOC(prom_memory_limit) >= RELOC(ram_top)) { + prom_printf("Ignoring mem=%x >= ram_top.\n", + RELOC(prom_memory_limit)); + RELOC(prom_memory_limit) = 0; + } else { + RELOC(ram_top) = RELOC(prom_memory_limit); + RELOC(rmo_top) = min(RELOC(rmo_top), RELOC(prom_memory_limit)); + } + } + + /* + * Setup our top alloc point, that is top of RMO or top of + * segment 0 when running non-LPAR. + * Some RS64 machines have buggy firmware where claims up at + * 1GB fail. Cap at 768MB as a workaround. + * Since 768MB is plenty of room, and we need to cap to something + * reasonable on 32-bit, cap at 768MB on all machines. + */ + if (!RELOC(rmo_top)) + RELOC(rmo_top) = RELOC(ram_top); + RELOC(rmo_top) = min(0x30000000ul, RELOC(rmo_top)); + RELOC(alloc_top) = RELOC(rmo_top); + RELOC(alloc_top_high) = RELOC(ram_top); + + /* + * Check if we have an initrd after the kernel but still inside + * the RMO. If we do move our bottom point to after it. + */ + if (RELOC(prom_initrd_start) && + RELOC(prom_initrd_start) < RELOC(rmo_top) && + RELOC(prom_initrd_end) > RELOC(alloc_bottom)) + RELOC(alloc_bottom) = PAGE_ALIGN(RELOC(prom_initrd_end)); + + prom_printf("memory layout at init:\n"); + prom_printf(" memory_limit : %x (16 MB aligned)\n", RELOC(prom_memory_limit)); + prom_printf(" alloc_bottom : %x\n", RELOC(alloc_bottom)); + prom_printf(" alloc_top : %x\n", RELOC(alloc_top)); + prom_printf(" alloc_top_hi : %x\n", RELOC(alloc_top_high)); + prom_printf(" rmo_top : %x\n", RELOC(rmo_top)); + prom_printf(" ram_top : %x\n", RELOC(ram_top)); +} + +static void __init prom_close_stdin(void) +{ + struct prom_t *_prom = &RELOC(prom); + ihandle val; + + if (prom_getprop(_prom->chosen, "stdin", &val, sizeof(val)) > 0) + call_prom("close", 1, 0, val); +} + +#ifdef CONFIG_PPC_POWERNV + +static u64 __initdata prom_opal_size; +static u64 __initdata prom_opal_align; +static int __initdata prom_rtas_start_cpu; +static u64 __initdata prom_rtas_data; +static u64 __initdata prom_rtas_entry; + +#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL +static u64 __initdata prom_opal_base; +static u64 __initdata prom_opal_entry; +#endif + +/* XXX Don't change this structure without updating opal-takeover.S */ +static struct opal_secondary_data { + s64 ack; /* 0 */ + u64 go; /* 8 */ + struct opal_takeover_args args; /* 16 */ +} opal_secondary_data; + +extern char opal_secondary_entry; + +static void prom_query_opal(void) +{ + long rc; + + /* We must not query for OPAL presence on a machine that + * supports TNK takeover (970 blades), as this uses the same + * h-call with different arguments and will crash + */ + if (PHANDLE_VALID(call_prom("finddevice", 1, 1, + ADDR("/tnk-memory-map")))) { + prom_printf("TNK takeover detected, skipping OPAL check\n"); + return; + } + + prom_printf("Querying for OPAL presence... "); + rc = opal_query_takeover(&RELOC(prom_opal_size), + &RELOC(prom_opal_align)); + prom_debug("(rc = %ld) ", rc); + if (rc != 0) { + prom_printf("not there.\n"); + return; + } + RELOC(of_platform) = PLATFORM_OPAL; + prom_printf(" there !\n"); + prom_debug(" opal_size = 0x%lx\n", RELOC(prom_opal_size)); + prom_debug(" opal_align = 0x%lx\n", RELOC(prom_opal_align)); + if (RELOC(prom_opal_align) < 0x10000) + RELOC(prom_opal_align) = 0x10000; +} + +static int prom_rtas_call(int token, int nargs, int nret, int *outputs, ...) +{ + struct rtas_args rtas_args; + va_list list; + int i; + + rtas_args.token = token; + rtas_args.nargs = nargs; + rtas_args.nret = nret; + rtas_args.rets = (rtas_arg_t *)&(rtas_args.args[nargs]); + va_start(list, outputs); + for (i = 0; i < nargs; ++i) + rtas_args.args[i] = va_arg(list, rtas_arg_t); + va_end(list); + + for (i = 0; i < nret; ++i) + rtas_args.rets[i] = 0; + + opal_enter_rtas(&rtas_args, RELOC(prom_rtas_data), + RELOC(prom_rtas_entry)); + + if (nret > 1 && outputs != NULL) + for (i = 0; i < nret-1; ++i) + outputs[i] = rtas_args.rets[i+1]; + return (nret > 0)? rtas_args.rets[0]: 0; +} + +static void __init prom_opal_hold_cpus(void) +{ + int i, cnt, cpu, rc; + long j; + phandle node; + char type[64]; + u32 servers[8]; + struct prom_t *_prom = &RELOC(prom); + void *entry = (unsigned long *)&RELOC(opal_secondary_entry); + struct opal_secondary_data *data = &RELOC(opal_secondary_data); + + prom_debug("prom_opal_hold_cpus: start...\n"); + prom_debug(" - entry = 0x%x\n", entry); + prom_debug(" - data = 0x%x\n", data); + + data->ack = -1; + data->go = 0; + + /* look for cpus */ + for (node = 0; prom_next_node(&node); ) { + type[0] = 0; + prom_getprop(node, "device_type", type, sizeof(type)); + if (strcmp(type, RELOC("cpu")) != 0) + continue; + + /* Skip non-configured cpus. */ + if (prom_getprop(node, "status", type, sizeof(type)) > 0) + if (strcmp(type, RELOC("okay")) != 0) + continue; + + cnt = prom_getprop(node, "ibm,ppc-interrupt-server#s", servers, + sizeof(servers)); + if (cnt == PROM_ERROR) + break; + cnt >>= 2; + for (i = 0; i < cnt; i++) { + cpu = servers[i]; + prom_debug("CPU %d ... ", cpu); + if (cpu == _prom->cpu) { + prom_debug("booted !\n"); + continue; + } + prom_debug("starting ... "); + + /* Init the acknowledge var which will be reset by + * the secondary cpu when it awakens from its OF + * spinloop. + */ + data->ack = -1; + rc = prom_rtas_call(RELOC(prom_rtas_start_cpu), 3, 1, + NULL, cpu, entry, data); + prom_debug("rtas rc=%d ...", rc); + + for (j = 0; j < 100000000 && data->ack == -1; j++) { + HMT_low(); + mb(); + } + HMT_medium(); + if (data->ack != -1) + prom_debug("done, PIR=0x%x\n", data->ack); + else + prom_debug("timeout !\n"); + } + } + prom_debug("prom_opal_hold_cpus: end...\n"); +} + +static void prom_opal_takeover(void) +{ + struct opal_secondary_data *data = &RELOC(opal_secondary_data); + struct opal_takeover_args *args = &data->args; + u64 align = RELOC(prom_opal_align); + u64 top_addr, opal_addr; + + args->k_image = (u64)RELOC(_stext); + args->k_size = _end - _stext; + args->k_entry = 0; + args->k_entry2 = 0x60; + + top_addr = _ALIGN_UP(args->k_size, align); + + if (RELOC(prom_initrd_start) != 0) { + args->rd_image = RELOC(prom_initrd_start); + args->rd_size = RELOC(prom_initrd_end) - args->rd_image; + args->rd_loc = top_addr; + top_addr = _ALIGN_UP(args->rd_loc + args->rd_size, align); + } + + /* Pickup an address for the HAL. We want to go really high + * up to avoid problem with future kexecs. On the other hand + * we don't want to be all over the TCEs on P5IOC2 machines + * which are going to be up there too. We assume the machine + * has plenty of memory, and we ask for the HAL for now to + * be just below the 1G point, or above the initrd + */ + opal_addr = _ALIGN_DOWN(0x40000000 - RELOC(prom_opal_size), align); + if (opal_addr < top_addr) + opal_addr = top_addr; + args->hal_addr = opal_addr; + + /* Copy the command line to the kernel image */ + strlcpy(RELOC(boot_command_line), RELOC(prom_cmd_line), + COMMAND_LINE_SIZE); + + prom_debug(" k_image = 0x%lx\n", args->k_image); + prom_debug(" k_size = 0x%lx\n", args->k_size); + prom_debug(" k_entry = 0x%lx\n", args->k_entry); + prom_debug(" k_entry2 = 0x%lx\n", args->k_entry2); + prom_debug(" hal_addr = 0x%lx\n", args->hal_addr); + prom_debug(" rd_image = 0x%lx\n", args->rd_image); + prom_debug(" rd_size = 0x%lx\n", args->rd_size); + prom_debug(" rd_loc = 0x%lx\n", args->rd_loc); + prom_printf("Performing OPAL takeover,this can take a few minutes..\n"); + prom_close_stdin(); + mb(); + data->go = 1; + for (;;) + opal_do_takeover(args); +} + +/* + * Allocate room for and instantiate OPAL + */ +static void __init prom_instantiate_opal(void) +{ + phandle opal_node; + ihandle opal_inst; + u64 base, entry; + u64 size = 0, align = 0x10000; + u32 rets[2]; + + prom_debug("prom_instantiate_opal: start...\n"); + + opal_node = call_prom("finddevice", 1, 1, ADDR("/ibm,opal")); + prom_debug("opal_node: %x\n", opal_node); + if (!PHANDLE_VALID(opal_node)) + return; + + prom_getprop(opal_node, "opal-runtime-size", &size, sizeof(size)); + if (size == 0) + return; + prom_getprop(opal_node, "opal-runtime-alignment", &align, + sizeof(align)); + + base = alloc_down(size, align, 0); + if (base == 0) { + prom_printf("OPAL allocation failed !\n"); + return; + } + + opal_inst = call_prom("open", 1, 1, ADDR("/ibm,opal")); + if (!IHANDLE_VALID(opal_inst)) { + prom_printf("opening opal package failed (%x)\n", opal_inst); + return; + } + + prom_printf("instantiating opal at 0x%x...", base); + + if (call_prom_ret("call-method", 4, 3, rets, + ADDR("load-opal-runtime"), + opal_inst, + base >> 32, base & 0xffffffff) != 0 + || (rets[0] == 0 && rets[1] == 0)) { + prom_printf(" failed\n"); + return; + } + entry = (((u64)rets[0]) << 32) | rets[1]; + + prom_printf(" done\n"); + + reserve_mem(base, size); + + prom_debug("opal base = 0x%x\n", base); + prom_debug("opal align = 0x%x\n", align); + prom_debug("opal entry = 0x%x\n", entry); + prom_debug("opal size = 0x%x\n", (long)size); + + prom_setprop(opal_node, "/ibm,opal", "opal-base-address", + &base, sizeof(base)); + prom_setprop(opal_node, "/ibm,opal", "opal-entry-address", + &entry, sizeof(entry)); + +#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL + RELOC(prom_opal_base) = base; + RELOC(prom_opal_entry) = entry; +#endif + prom_debug("prom_instantiate_opal: end...\n"); +} + +#endif /* CONFIG_PPC_POWERNV */ + +/* + * Allocate room for and instantiate RTAS + */ +static void __init prom_instantiate_rtas(void) +{ + phandle rtas_node; + ihandle rtas_inst; + u32 base, entry = 0; + u32 size = 0; + + prom_debug("prom_instantiate_rtas: start...\n"); + + rtas_node = call_prom("finddevice", 1, 1, ADDR("/rtas")); + prom_debug("rtas_node: %x\n", rtas_node); + if (!PHANDLE_VALID(rtas_node)) + return; + + prom_getprop(rtas_node, "rtas-size", &size, sizeof(size)); + if (size == 0) + return; + + base = alloc_down(size, PAGE_SIZE, 0); + if (base == 0) + prom_panic("Could not allocate memory for RTAS\n"); + + rtas_inst = call_prom("open", 1, 1, ADDR("/rtas")); + if (!IHANDLE_VALID(rtas_inst)) { + prom_printf("opening rtas package failed (%x)\n", rtas_inst); + return; + } + + prom_printf("instantiating rtas at 0x%x...", base); + + if (call_prom_ret("call-method", 3, 2, &entry, + ADDR("instantiate-rtas"), + rtas_inst, base) != 0 + || entry == 0) { + prom_printf(" failed\n"); + return; + } + prom_printf(" done\n"); + + reserve_mem(base, size); + + prom_setprop(rtas_node, "/rtas", "linux,rtas-base", + &base, sizeof(base)); + prom_setprop(rtas_node, "/rtas", "linux,rtas-entry", + &entry, sizeof(entry)); + +#ifdef CONFIG_PPC_POWERNV + /* PowerVN takeover hack */ + RELOC(prom_rtas_data) = base; + RELOC(prom_rtas_entry) = entry; + prom_getprop(rtas_node, "start-cpu", &RELOC(prom_rtas_start_cpu), 4); +#endif + prom_debug("rtas base = 0x%x\n", base); + prom_debug("rtas entry = 0x%x\n", entry); + prom_debug("rtas size = 0x%x\n", (long)size); + + prom_debug("prom_instantiate_rtas: end...\n"); +} + +#ifdef CONFIG_PPC64 +/* + * Allocate room for and initialize TCE tables + */ +static void __init prom_initialize_tce_table(void) +{ + phandle node; + ihandle phb_node; + char compatible[64], type[64], model[64]; + char *path = RELOC(prom_scratch); + u64 base, align; + u32 minalign, minsize; + u64 tce_entry, *tce_entryp; + u64 local_alloc_top, local_alloc_bottom; + u64 i; + + if (RELOC(prom_iommu_off)) + return; + + prom_debug("starting prom_initialize_tce_table\n"); + + /* Cache current top of allocs so we reserve a single block */ + local_alloc_top = RELOC(alloc_top_high); + local_alloc_bottom = local_alloc_top; + + /* Search all nodes looking for PHBs. */ + for (node = 0; prom_next_node(&node); ) { + compatible[0] = 0; + type[0] = 0; + model[0] = 0; + prom_getprop(node, "compatible", + compatible, sizeof(compatible)); + prom_getprop(node, "device_type", type, sizeof(type)); + prom_getprop(node, "model", model, sizeof(model)); + + if ((type[0] == 0) || (strstr(type, RELOC("pci")) == NULL)) + continue; + + /* Keep the old logic intact to avoid regression. */ + if (compatible[0] != 0) { + if ((strstr(compatible, RELOC("python")) == NULL) && + (strstr(compatible, RELOC("Speedwagon")) == NULL) && + (strstr(compatible, RELOC("Winnipeg")) == NULL)) + continue; + } else if (model[0] != 0) { + if ((strstr(model, RELOC("ython")) == NULL) && + (strstr(model, RELOC("peedwagon")) == NULL) && + (strstr(model, RELOC("innipeg")) == NULL)) + continue; + } + + if (prom_getprop(node, "tce-table-minalign", &minalign, + sizeof(minalign)) == PROM_ERROR) + minalign = 0; + if (prom_getprop(node, "tce-table-minsize", &minsize, + sizeof(minsize)) == PROM_ERROR) + minsize = 4UL << 20; + + /* + * Even though we read what OF wants, we just set the table + * size to 4 MB. This is enough to map 2GB of PCI DMA space. + * By doing this, we avoid the pitfalls of trying to DMA to + * MMIO space and the DMA alias hole. + * + * On POWER4, firmware sets the TCE region by assuming + * each TCE table is 8MB. Using this memory for anything + * else will impact performance, so we always allocate 8MB. + * Anton + */ + if (__is_processor(PV_POWER4) || __is_processor(PV_POWER4p)) + minsize = 8UL << 20; + else + minsize = 4UL << 20; + + /* Align to the greater of the align or size */ + align = max(minalign, minsize); + base = alloc_down(minsize, align, 1); + if (base == 0) + prom_panic("ERROR, cannot find space for TCE table.\n"); + if (base < local_alloc_bottom) + local_alloc_bottom = base; + + /* It seems OF doesn't null-terminate the path :-( */ + memset(path, 0, PROM_SCRATCH_SIZE); + /* Call OF to setup the TCE hardware */ + if (call_prom("package-to-path", 3, 1, node, + path, PROM_SCRATCH_SIZE-1) == PROM_ERROR) { + prom_printf("package-to-path failed\n"); + } + + /* Save away the TCE table attributes for later use. */ + prom_setprop(node, path, "linux,tce-base", &base, sizeof(base)); + prom_setprop(node, path, "linux,tce-size", &minsize, sizeof(minsize)); + + prom_debug("TCE table: %s\n", path); + prom_debug("\tnode = 0x%x\n", node); + prom_debug("\tbase = 0x%x\n", base); + prom_debug("\tsize = 0x%x\n", minsize); + + /* Initialize the table to have a one-to-one mapping + * over the allocated size. + */ + tce_entryp = (u64 *)base; + for (i = 0; i < (minsize >> 3) ;tce_entryp++, i++) { + tce_entry = (i << PAGE_SHIFT); + tce_entry |= 0x3; + *tce_entryp = tce_entry; + } + + prom_printf("opening PHB %s", path); + phb_node = call_prom("open", 1, 1, path); + if (phb_node == 0) + prom_printf("... failed\n"); + else + prom_printf("... done\n"); + + call_prom("call-method", 6, 0, ADDR("set-64-bit-addressing"), + phb_node, -1, minsize, + (u32) base, (u32) (base >> 32)); + call_prom("close", 1, 0, phb_node); + } + + reserve_mem(local_alloc_bottom, local_alloc_top - local_alloc_bottom); + + /* These are only really needed if there is a memory limit in + * effect, but we don't know so export them always. */ + RELOC(prom_tce_alloc_start) = local_alloc_bottom; + RELOC(prom_tce_alloc_end) = local_alloc_top; + + /* Flag the first invalid entry */ + prom_debug("ending prom_initialize_tce_table\n"); +} +#endif + +/* + * With CHRP SMP we need to use the OF to start the other processors. + * We can't wait until smp_boot_cpus (the OF is trashed by then) + * so we have to put the processors into a holding pattern controlled + * by the kernel (not OF) before we destroy the OF. + * + * This uses a chunk of low memory, puts some holding pattern + * code there and sends the other processors off to there until + * smp_boot_cpus tells them to do something. The holding pattern + * checks that address until its cpu # is there, when it is that + * cpu jumps to __secondary_start(). smp_boot_cpus() takes care + * of setting those values. + * + * We also use physical address 0x4 here to tell when a cpu + * is in its holding pattern code. + * + * -- Cort + */ +/* + * We want to reference the copy of __secondary_hold_* in the + * 0 - 0x100 address range + */ +#define LOW_ADDR(x) (((unsigned long) &(x)) & 0xff) + +static void __init prom_hold_cpus(void) +{ + unsigned long i; + unsigned int reg; + phandle node; + char type[64]; + struct prom_t *_prom = &RELOC(prom); + unsigned long *spinloop + = (void *) LOW_ADDR(__secondary_hold_spinloop); + unsigned long *acknowledge + = (void *) LOW_ADDR(__secondary_hold_acknowledge); + unsigned long secondary_hold = LOW_ADDR(__secondary_hold); + + prom_debug("prom_hold_cpus: start...\n"); + prom_debug(" 1) spinloop = 0x%x\n", (unsigned long)spinloop); + prom_debug(" 1) *spinloop = 0x%x\n", *spinloop); + prom_debug(" 1) acknowledge = 0x%x\n", + (unsigned long)acknowledge); + prom_debug(" 1) *acknowledge = 0x%x\n", *acknowledge); + prom_debug(" 1) secondary_hold = 0x%x\n", secondary_hold); + + /* Set the common spinloop variable, so all of the secondary cpus + * will block when they are awakened from their OF spinloop. + * This must occur for both SMP and non SMP kernels, since OF will + * be trashed when we move the kernel. + */ + *spinloop = 0; + + /* look for cpus */ + for (node = 0; prom_next_node(&node); ) { + type[0] = 0; + prom_getprop(node, "device_type", type, sizeof(type)); + if (strcmp(type, RELOC("cpu")) != 0) + continue; + + /* Skip non-configured cpus. */ + if (prom_getprop(node, "status", type, sizeof(type)) > 0) + if (strcmp(type, RELOC("okay")) != 0) + continue; + + reg = -1; + prom_getprop(node, "reg", ®, sizeof(reg)); + + prom_debug("cpu hw idx = %lu\n", reg); + + /* Init the acknowledge var which will be reset by + * the secondary cpu when it awakens from its OF + * spinloop. + */ + *acknowledge = (unsigned long)-1; + + if (reg != _prom->cpu) { + /* Primary Thread of non-boot cpu or any thread */ + prom_printf("starting cpu hw idx %lu... ", reg); + call_prom("start-cpu", 3, 0, node, + secondary_hold, reg); + + for (i = 0; (i < 100000000) && + (*acknowledge == ((unsigned long)-1)); i++ ) + mb(); + + if (*acknowledge == reg) + prom_printf("done\n"); + else + prom_printf("failed: %x\n", *acknowledge); + } +#ifdef CONFIG_SMP + else + prom_printf("boot cpu hw idx %lu\n", reg); +#endif /* CONFIG_SMP */ + } + + prom_debug("prom_hold_cpus: end...\n"); +} + + +static void __init prom_init_client_services(unsigned long pp) +{ + struct prom_t *_prom = &RELOC(prom); + + /* Get a handle to the prom entry point before anything else */ + RELOC(prom_entry) = pp; + + /* get a handle for the stdout device */ + _prom->chosen = call_prom("finddevice", 1, 1, ADDR("/chosen")); + if (!PHANDLE_VALID(_prom->chosen)) + prom_panic("cannot find chosen"); /* msg won't be printed :( */ + + /* get device tree root */ + _prom->root = call_prom("finddevice", 1, 1, ADDR("/")); + if (!PHANDLE_VALID(_prom->root)) + prom_panic("cannot find device tree root"); /* msg won't be printed :( */ + + _prom->mmumap = 0; +} + +#ifdef CONFIG_PPC32 +/* + * For really old powermacs, we need to map things we claim. + * For that, we need the ihandle of the mmu. + * Also, on the longtrail, we need to work around other bugs. + */ +static void __init prom_find_mmu(void) +{ + struct prom_t *_prom = &RELOC(prom); + phandle oprom; + char version[64]; + + oprom = call_prom("finddevice", 1, 1, ADDR("/openprom")); + if (!PHANDLE_VALID(oprom)) + return; + if (prom_getprop(oprom, "model", version, sizeof(version)) <= 0) + return; + version[sizeof(version) - 1] = 0; + /* XXX might need to add other versions here */ + if (strcmp(version, "Open Firmware, 1.0.5") == 0) + of_workarounds = OF_WA_CLAIM; + else if (strncmp(version, "FirmWorks,3.", 12) == 0) { + of_workarounds = OF_WA_CLAIM | OF_WA_LONGTRAIL; + call_prom("interpret", 1, 1, "dev /memory 0 to allow-reclaim"); + } else + return; + _prom->memory = call_prom("open", 1, 1, ADDR("/memory")); + prom_getprop(_prom->chosen, "mmu", &_prom->mmumap, + sizeof(_prom->mmumap)); + if (!IHANDLE_VALID(_prom->memory) || !IHANDLE_VALID(_prom->mmumap)) + of_workarounds &= ~OF_WA_CLAIM; /* hmmm */ +} +#else +#define prom_find_mmu() +#endif + +static void __init prom_init_stdout(void) +{ + struct prom_t *_prom = &RELOC(prom); + char *path = RELOC(of_stdout_device); + char type[16]; + u32 val; + + if (prom_getprop(_prom->chosen, "stdout", &val, sizeof(val)) <= 0) + prom_panic("cannot find stdout"); + + _prom->stdout = val; + + /* Get the full OF pathname of the stdout device */ + memset(path, 0, 256); + call_prom("instance-to-path", 3, 1, _prom->stdout, path, 255); + val = call_prom("instance-to-package", 1, 1, _prom->stdout); + prom_setprop(_prom->chosen, "/chosen", "linux,stdout-package", + &val, sizeof(val)); + prom_printf("OF stdout device is: %s\n", RELOC(of_stdout_device)); + prom_setprop(_prom->chosen, "/chosen", "linux,stdout-path", + path, strlen(path) + 1); + + /* If it's a display, note it */ + memset(type, 0, sizeof(type)); + prom_getprop(val, "device_type", type, sizeof(type)); + if (strcmp(type, RELOC("display")) == 0) + prom_setprop(val, path, "linux,boot-display", NULL, 0); +} + +static int __init prom_find_machine_type(void) +{ + struct prom_t *_prom = &RELOC(prom); + char compat[256]; + int len, i = 0; +#ifdef CONFIG_PPC64 + phandle rtas; + int x; +#endif + + /* Look for a PowerMac or a Cell */ + len = prom_getprop(_prom->root, "compatible", + compat, sizeof(compat)-1); + if (len > 0) { + compat[len] = 0; + while (i < len) { + char *p = &compat[i]; + int sl = strlen(p); + if (sl == 0) + break; + if (strstr(p, RELOC("Power Macintosh")) || + strstr(p, RELOC("MacRISC"))) + return PLATFORM_POWERMAC; +#ifdef CONFIG_PPC64 + /* We must make sure we don't detect the IBM Cell + * blades as pSeries due to some firmware issues, + * so we do it here. + */ + if (strstr(p, RELOC("IBM,CBEA")) || + strstr(p, RELOC("IBM,CPBW-1.0"))) + return PLATFORM_GENERIC; +#endif /* CONFIG_PPC64 */ + i += sl + 1; + } + } +#ifdef CONFIG_PPC64 + /* Try to detect OPAL */ + if (PHANDLE_VALID(call_prom("finddevice", 1, 1, ADDR("/ibm,opal")))) + return PLATFORM_OPAL; + + /* Try to figure out if it's an IBM pSeries or any other + * PAPR compliant platform. We assume it is if : + * - /device_type is "chrp" (please, do NOT use that for future + * non-IBM designs ! + * - it has /rtas + */ + len = prom_getprop(_prom->root, "device_type", + compat, sizeof(compat)-1); + if (len <= 0) + return PLATFORM_GENERIC; + if (strcmp(compat, RELOC("chrp"))) + return PLATFORM_GENERIC; + + /* Default to pSeries. We need to know if we are running LPAR */ + rtas = call_prom("finddevice", 1, 1, ADDR("/rtas")); + if (!PHANDLE_VALID(rtas)) + return PLATFORM_GENERIC; + x = prom_getproplen(rtas, "ibm,hypertas-functions"); + if (x != PROM_ERROR) { + prom_debug("Hypertas detected, assuming LPAR !\n"); + return PLATFORM_PSERIES_LPAR; + } + return PLATFORM_PSERIES; +#else + return PLATFORM_GENERIC; +#endif +} + +static int __init prom_set_color(ihandle ih, int i, int r, int g, int b) +{ + return call_prom("call-method", 6, 1, ADDR("color!"), ih, i, b, g, r); +} + +/* + * If we have a display that we don't know how to drive, + * we will want to try to execute OF's open method for it + * later. However, OF will probably fall over if we do that + * we've taken over the MMU. + * So we check whether we will need to open the display, + * and if so, open it now. + */ +static void __init prom_check_displays(void) +{ + char type[16], *path; + phandle node; + ihandle ih; + int i; + + static unsigned char default_colors[] = { + 0x00, 0x00, 0x00, + 0x00, 0x00, 0xaa, + 0x00, 0xaa, 0x00, + 0x00, 0xaa, 0xaa, + 0xaa, 0x00, 0x00, + 0xaa, 0x00, 0xaa, + 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, + 0x55, 0x55, 0x55, + 0x55, 0x55, 0xff, + 0x55, 0xff, 0x55, + 0x55, 0xff, 0xff, + 0xff, 0x55, 0x55, + 0xff, 0x55, 0xff, + 0xff, 0xff, 0x55, + 0xff, 0xff, 0xff + }; + const unsigned char *clut; + + prom_debug("Looking for displays\n"); + for (node = 0; prom_next_node(&node); ) { + memset(type, 0, sizeof(type)); + prom_getprop(node, "device_type", type, sizeof(type)); + if (strcmp(type, RELOC("display")) != 0) + continue; + + /* It seems OF doesn't null-terminate the path :-( */ + path = RELOC(prom_scratch); + memset(path, 0, PROM_SCRATCH_SIZE); + + /* + * leave some room at the end of the path for appending extra + * arguments + */ + if (call_prom("package-to-path", 3, 1, node, path, + PROM_SCRATCH_SIZE-10) == PROM_ERROR) + continue; + prom_printf("found display : %s, opening... ", path); + + ih = call_prom("open", 1, 1, path); + if (ih == 0) { + prom_printf("failed\n"); + continue; + } + + /* Success */ + prom_printf("done\n"); + prom_setprop(node, path, "linux,opened", NULL, 0); + + /* Setup a usable color table when the appropriate + * method is available. Should update this to set-colors */ + clut = RELOC(default_colors); + for (i = 0; i < 16; i++, clut += 3) + if (prom_set_color(ih, i, clut[0], clut[1], + clut[2]) != 0) + break; + +#ifdef CONFIG_LOGO_LINUX_CLUT224 + clut = PTRRELOC(RELOC(logo_linux_clut224.clut)); + for (i = 0; i < RELOC(logo_linux_clut224.clutsize); i++, clut += 3) + if (prom_set_color(ih, i + 32, clut[0], clut[1], + clut[2]) != 0) + break; +#endif /* CONFIG_LOGO_LINUX_CLUT224 */ + } +} + + +/* Return (relocated) pointer to this much memory: moves initrd if reqd. */ +static void __init *make_room(unsigned long *mem_start, unsigned long *mem_end, + unsigned long needed, unsigned long align) +{ + void *ret; + + *mem_start = _ALIGN(*mem_start, align); + while ((*mem_start + needed) > *mem_end) { + unsigned long room, chunk; + + prom_debug("Chunk exhausted, claiming more at %x...\n", + RELOC(alloc_bottom)); + room = RELOC(alloc_top) - RELOC(alloc_bottom); + if (room > DEVTREE_CHUNK_SIZE) + room = DEVTREE_CHUNK_SIZE; + if (room < PAGE_SIZE) + prom_panic("No memory for flatten_device_tree " + "(no room)\n"); + chunk = alloc_up(room, 0); + if (chunk == 0) + prom_panic("No memory for flatten_device_tree " + "(claim failed)\n"); + *mem_end = chunk + room; + } + + ret = (void *)*mem_start; + *mem_start += needed; + + return ret; +} + +#define dt_push_token(token, mem_start, mem_end) \ + do { *((u32 *)make_room(mem_start, mem_end, 4, 4)) = token; } while(0) + +static unsigned long __init dt_find_string(char *str) +{ + char *s, *os; + + s = os = (char *)RELOC(dt_string_start); + s += 4; + while (s < (char *)RELOC(dt_string_end)) { + if (strcmp(s, str) == 0) + return s - os; + s += strlen(s) + 1; + } + return 0; +} + +/* + * The Open Firmware 1275 specification states properties must be 31 bytes or + * less, however not all firmwares obey this. Make it 64 bytes to be safe. + */ +#define MAX_PROPERTY_NAME 64 + +static void __init scan_dt_build_strings(phandle node, + unsigned long *mem_start, + unsigned long *mem_end) +{ + char *prev_name, *namep, *sstart; + unsigned long soff; + phandle child; + + sstart = (char *)RELOC(dt_string_start); + + /* get and store all property names */ + prev_name = RELOC(""); + for (;;) { + /* 64 is max len of name including nul. */ + namep = make_room(mem_start, mem_end, MAX_PROPERTY_NAME, 1); + if (call_prom("nextprop", 3, 1, node, prev_name, namep) != 1) { + /* No more nodes: unwind alloc */ + *mem_start = (unsigned long)namep; + break; + } + + /* skip "name" */ + if (strcmp(namep, RELOC("name")) == 0) { + *mem_start = (unsigned long)namep; + prev_name = RELOC("name"); + continue; + } + /* get/create string entry */ + soff = dt_find_string(namep); + if (soff != 0) { + *mem_start = (unsigned long)namep; + namep = sstart + soff; + } else { + /* Trim off some if we can */ + *mem_start = (unsigned long)namep + strlen(namep) + 1; + RELOC(dt_string_end) = *mem_start; + } + prev_name = namep; + } + + /* do all our children */ + child = call_prom("child", 1, 1, node); + while (child != 0) { + scan_dt_build_strings(child, mem_start, mem_end); + child = call_prom("peer", 1, 1, child); + } +} + +static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, + unsigned long *mem_end) +{ + phandle child; + char *namep, *prev_name, *sstart, *p, *ep, *lp, *path; + unsigned long soff; + unsigned char *valp; + static char pname[MAX_PROPERTY_NAME]; + int l, room, has_phandle = 0; + + dt_push_token(OF_DT_BEGIN_NODE, mem_start, mem_end); + + /* get the node's full name */ + namep = (char *)*mem_start; + room = *mem_end - *mem_start; + if (room > 255) + room = 255; + l = call_prom("package-to-path", 3, 1, node, namep, room); + if (l >= 0) { + /* Didn't fit? Get more room. */ + if (l >= room) { + if (l >= *mem_end - *mem_start) + namep = make_room(mem_start, mem_end, l+1, 1); + call_prom("package-to-path", 3, 1, node, namep, l); + } + namep[l] = '\0'; + + /* Fixup an Apple bug where they have bogus \0 chars in the + * middle of the path in some properties, and extract + * the unit name (everything after the last '/'). + */ + for (lp = p = namep, ep = namep + l; p < ep; p++) { + if (*p == '/') + lp = namep; + else if (*p != 0) + *lp++ = *p; + } + *lp = 0; + *mem_start = _ALIGN((unsigned long)lp + 1, 4); + } + + /* get it again for debugging */ + path = RELOC(prom_scratch); + memset(path, 0, PROM_SCRATCH_SIZE); + call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1); + + /* get and store all properties */ + prev_name = RELOC(""); + sstart = (char *)RELOC(dt_string_start); + for (;;) { + if (call_prom("nextprop", 3, 1, node, prev_name, + RELOC(pname)) != 1) + break; + + /* skip "name" */ + if (strcmp(RELOC(pname), RELOC("name")) == 0) { + prev_name = RELOC("name"); + continue; + } + + /* find string offset */ + soff = dt_find_string(RELOC(pname)); + if (soff == 0) { + prom_printf("WARNING: Can't find string index for" + " <%s>, node %s\n", RELOC(pname), path); + break; + } + prev_name = sstart + soff; + + /* get length */ + l = call_prom("getproplen", 2, 1, node, RELOC(pname)); + + /* sanity checks */ + if (l == PROM_ERROR) + continue; + + /* push property head */ + dt_push_token(OF_DT_PROP, mem_start, mem_end); + dt_push_token(l, mem_start, mem_end); + dt_push_token(soff, mem_start, mem_end); + + /* push property content */ + valp = make_room(mem_start, mem_end, l, 4); + call_prom("getprop", 4, 1, node, RELOC(pname), valp, l); + *mem_start = _ALIGN(*mem_start, 4); + + if (!strcmp(RELOC(pname), RELOC("phandle"))) + has_phandle = 1; + } + + /* Add a "linux,phandle" property if no "phandle" property already + * existed (can happen with OPAL) + */ + if (!has_phandle) { + soff = dt_find_string(RELOC("linux,phandle")); + if (soff == 0) + prom_printf("WARNING: Can't find string index for" + " <linux-phandle> node %s\n", path); + else { + dt_push_token(OF_DT_PROP, mem_start, mem_end); + dt_push_token(4, mem_start, mem_end); + dt_push_token(soff, mem_start, mem_end); + valp = make_room(mem_start, mem_end, 4, 4); + *(u32 *)valp = node; + } + } + + /* do all our children */ + child = call_prom("child", 1, 1, node); + while (child != 0) { + scan_dt_build_struct(child, mem_start, mem_end); + child = call_prom("peer", 1, 1, child); + } + + dt_push_token(OF_DT_END_NODE, mem_start, mem_end); +} + +static void __init flatten_device_tree(void) +{ + phandle root; + unsigned long mem_start, mem_end, room; + struct boot_param_header *hdr; + struct prom_t *_prom = &RELOC(prom); + char *namep; + u64 *rsvmap; + + /* + * Check how much room we have between alloc top & bottom (+/- a + * few pages), crop to 1MB, as this is our "chunk" size + */ + room = RELOC(alloc_top) - RELOC(alloc_bottom) - 0x4000; + if (room > DEVTREE_CHUNK_SIZE) + room = DEVTREE_CHUNK_SIZE; + prom_debug("starting device tree allocs at %x\n", RELOC(alloc_bottom)); + + /* Now try to claim that */ + mem_start = (unsigned long)alloc_up(room, PAGE_SIZE); + if (mem_start == 0) + prom_panic("Can't allocate initial device-tree chunk\n"); + mem_end = mem_start + room; + + /* Get root of tree */ + root = call_prom("peer", 1, 1, (phandle)0); + if (root == (phandle)0) + prom_panic ("couldn't get device tree root\n"); + + /* Build header and make room for mem rsv map */ + mem_start = _ALIGN(mem_start, 4); + hdr = make_room(&mem_start, &mem_end, + sizeof(struct boot_param_header), 4); + RELOC(dt_header_start) = (unsigned long)hdr; + rsvmap = make_room(&mem_start, &mem_end, sizeof(mem_reserve_map), 8); + + /* Start of strings */ + mem_start = PAGE_ALIGN(mem_start); + RELOC(dt_string_start) = mem_start; + mem_start += 4; /* hole */ + + /* Add "linux,phandle" in there, we'll need it */ + namep = make_room(&mem_start, &mem_end, 16, 1); + strcpy(namep, RELOC("linux,phandle")); + mem_start = (unsigned long)namep + strlen(namep) + 1; + + /* Build string array */ + prom_printf("Building dt strings...\n"); + scan_dt_build_strings(root, &mem_start, &mem_end); + RELOC(dt_string_end) = mem_start; + + /* Build structure */ + mem_start = PAGE_ALIGN(mem_start); + RELOC(dt_struct_start) = mem_start; + prom_printf("Building dt structure...\n"); + scan_dt_build_struct(root, &mem_start, &mem_end); + dt_push_token(OF_DT_END, &mem_start, &mem_end); + RELOC(dt_struct_end) = PAGE_ALIGN(mem_start); + + /* Finish header */ + hdr->boot_cpuid_phys = _prom->cpu; + hdr->magic = OF_DT_HEADER; + hdr->totalsize = RELOC(dt_struct_end) - RELOC(dt_header_start); + hdr->off_dt_struct = RELOC(dt_struct_start) - RELOC(dt_header_start); + hdr->off_dt_strings = RELOC(dt_string_start) - RELOC(dt_header_start); + hdr->dt_strings_size = RELOC(dt_string_end) - RELOC(dt_string_start); + hdr->off_mem_rsvmap = ((unsigned long)rsvmap) - RELOC(dt_header_start); + hdr->version = OF_DT_VERSION; + /* Version 16 is not backward compatible */ + hdr->last_comp_version = 0x10; + + /* Copy the reserve map in */ + memcpy(rsvmap, RELOC(mem_reserve_map), sizeof(mem_reserve_map)); + +#ifdef DEBUG_PROM + { + int i; + prom_printf("reserved memory map:\n"); + for (i = 0; i < RELOC(mem_reserve_cnt); i++) + prom_printf(" %x - %x\n", + RELOC(mem_reserve_map)[i].base, + RELOC(mem_reserve_map)[i].size); + } +#endif + /* Bump mem_reserve_cnt to cause further reservations to fail + * since it's too late. + */ + RELOC(mem_reserve_cnt) = MEM_RESERVE_MAP_SIZE; + + prom_printf("Device tree strings 0x%x -> 0x%x\n", + RELOC(dt_string_start), RELOC(dt_string_end)); + prom_printf("Device tree struct 0x%x -> 0x%x\n", + RELOC(dt_struct_start), RELOC(dt_struct_end)); + +} + +#ifdef CONFIG_PPC_MAPLE +/* PIBS Version 1.05.0000 04/26/2005 has an incorrect /ht/isa/ranges property. + * The values are bad, and it doesn't even have the right number of cells. */ +static void __init fixup_device_tree_maple(void) +{ + phandle isa; + u32 rloc = 0x01002000; /* IO space; PCI device = 4 */ + u32 isa_ranges[6]; + char *name; + + name = "/ht@0/isa@4"; + isa = call_prom("finddevice", 1, 1, ADDR(name)); + if (!PHANDLE_VALID(isa)) { + name = "/ht@0/isa@6"; + isa = call_prom("finddevice", 1, 1, ADDR(name)); + rloc = 0x01003000; /* IO space; PCI device = 6 */ + } + if (!PHANDLE_VALID(isa)) + return; + + if (prom_getproplen(isa, "ranges") != 12) + return; + if (prom_getprop(isa, "ranges", isa_ranges, sizeof(isa_ranges)) + == PROM_ERROR) + return; + + if (isa_ranges[0] != 0x1 || + isa_ranges[1] != 0xf4000000 || + isa_ranges[2] != 0x00010000) + return; + + prom_printf("Fixing up bogus ISA range on Maple/Apache...\n"); + + isa_ranges[0] = 0x1; + isa_ranges[1] = 0x0; + isa_ranges[2] = rloc; + isa_ranges[3] = 0x0; + isa_ranges[4] = 0x0; + isa_ranges[5] = 0x00010000; + prom_setprop(isa, name, "ranges", + isa_ranges, sizeof(isa_ranges)); +} + +#define CPC925_MC_START 0xf8000000 +#define CPC925_MC_LENGTH 0x1000000 +/* The values for memory-controller don't have right number of cells */ +static void __init fixup_device_tree_maple_memory_controller(void) +{ + phandle mc; + u32 mc_reg[4]; + char *name = "/hostbridge@f8000000"; + struct prom_t *_prom = &RELOC(prom); + u32 ac, sc; + + mc = call_prom("finddevice", 1, 1, ADDR(name)); + if (!PHANDLE_VALID(mc)) + return; + + if (prom_getproplen(mc, "reg") != 8) + return; + + prom_getprop(_prom->root, "#address-cells", &ac, sizeof(ac)); + prom_getprop(_prom->root, "#size-cells", &sc, sizeof(sc)); + if ((ac != 2) || (sc != 2)) + return; + + if (prom_getprop(mc, "reg", mc_reg, sizeof(mc_reg)) == PROM_ERROR) + return; + + if (mc_reg[0] != CPC925_MC_START || mc_reg[1] != CPC925_MC_LENGTH) + return; + + prom_printf("Fixing up bogus hostbridge on Maple...\n"); + + mc_reg[0] = 0x0; + mc_reg[1] = CPC925_MC_START; + mc_reg[2] = 0x0; + mc_reg[3] = CPC925_MC_LENGTH; + prom_setprop(mc, name, "reg", mc_reg, sizeof(mc_reg)); +} +#else +#define fixup_device_tree_maple() +#define fixup_device_tree_maple_memory_controller() +#endif + +#ifdef CONFIG_PPC_CHRP +/* + * Pegasos and BriQ lacks the "ranges" property in the isa node + * Pegasos needs decimal IRQ 14/15, not hexadecimal + * Pegasos has the IDE configured in legacy mode, but advertised as native + */ +static void __init fixup_device_tree_chrp(void) +{ + phandle ph; + u32 prop[6]; + u32 rloc = 0x01006000; /* IO space; PCI device = 12 */ + char *name; + int rc; + + name = "/pci@80000000/isa@c"; + ph = call_prom("finddevice", 1, 1, ADDR(name)); + if (!PHANDLE_VALID(ph)) { + name = "/pci@ff500000/isa@6"; + ph = call_prom("finddevice", 1, 1, ADDR(name)); + rloc = 0x01003000; /* IO space; PCI device = 6 */ + } + if (PHANDLE_VALID(ph)) { + rc = prom_getproplen(ph, "ranges"); + if (rc == 0 || rc == PROM_ERROR) { + prom_printf("Fixing up missing ISA range on Pegasos...\n"); + + prop[0] = 0x1; + prop[1] = 0x0; + prop[2] = rloc; + prop[3] = 0x0; + prop[4] = 0x0; + prop[5] = 0x00010000; + prom_setprop(ph, name, "ranges", prop, sizeof(prop)); + } + } + + name = "/pci@80000000/ide@C,1"; + ph = call_prom("finddevice", 1, 1, ADDR(name)); + if (PHANDLE_VALID(ph)) { + prom_printf("Fixing up IDE interrupt on Pegasos...\n"); + prop[0] = 14; + prop[1] = 0x0; + prom_setprop(ph, name, "interrupts", prop, 2*sizeof(u32)); + prom_printf("Fixing up IDE class-code on Pegasos...\n"); + rc = prom_getprop(ph, "class-code", prop, sizeof(u32)); + if (rc == sizeof(u32)) { + prop[0] &= ~0x5; + prom_setprop(ph, name, "class-code", prop, sizeof(u32)); + } + } +} +#else +#define fixup_device_tree_chrp() +#endif + +#if defined(CONFIG_PPC64) && defined(CONFIG_PPC_PMAC) +static void __init fixup_device_tree_pmac(void) +{ + phandle u3, i2c, mpic; + u32 u3_rev; + u32 interrupts[2]; + u32 parent; + + /* Some G5s have a missing interrupt definition, fix it up here */ + u3 = call_prom("finddevice", 1, 1, ADDR("/u3@0,f8000000")); + if (!PHANDLE_VALID(u3)) + return; + i2c = call_prom("finddevice", 1, 1, ADDR("/u3@0,f8000000/i2c@f8001000")); + if (!PHANDLE_VALID(i2c)) + return; + mpic = call_prom("finddevice", 1, 1, ADDR("/u3@0,f8000000/mpic@f8040000")); + if (!PHANDLE_VALID(mpic)) + return; + + /* check if proper rev of u3 */ + if (prom_getprop(u3, "device-rev", &u3_rev, sizeof(u3_rev)) + == PROM_ERROR) + return; + if (u3_rev < 0x35 || u3_rev > 0x39) + return; + /* does it need fixup ? */ + if (prom_getproplen(i2c, "interrupts") > 0) + return; + + prom_printf("fixing up bogus interrupts for u3 i2c...\n"); + + /* interrupt on this revision of u3 is number 0 and level */ + interrupts[0] = 0; + interrupts[1] = 1; + prom_setprop(i2c, "/u3@0,f8000000/i2c@f8001000", "interrupts", + &interrupts, sizeof(interrupts)); + parent = (u32)mpic; + prom_setprop(i2c, "/u3@0,f8000000/i2c@f8001000", "interrupt-parent", + &parent, sizeof(parent)); +} +#else +#define fixup_device_tree_pmac() +#endif + +#ifdef CONFIG_PPC_EFIKA +/* + * The MPC5200 FEC driver requires an phy-handle property to tell it how + * to talk to the phy. If the phy-handle property is missing, then this + * function is called to add the appropriate nodes and link it to the + * ethernet node. + */ +static void __init fixup_device_tree_efika_add_phy(void) +{ + u32 node; + char prop[64]; + int rv; + + /* Check if /builtin/ethernet exists - bail if it doesn't */ + node = call_prom("finddevice", 1, 1, ADDR("/builtin/ethernet")); + if (!PHANDLE_VALID(node)) + return; + + /* Check if the phy-handle property exists - bail if it does */ + rv = prom_getprop(node, "phy-handle", prop, sizeof(prop)); + if (!rv) + return; + + /* + * At this point the ethernet device doesn't have a phy described. + * Now we need to add the missing phy node and linkage + */ + + /* Check for an MDIO bus node - if missing then create one */ + node = call_prom("finddevice", 1, 1, ADDR("/builtin/mdio")); + if (!PHANDLE_VALID(node)) { + prom_printf("Adding Ethernet MDIO node\n"); + call_prom("interpret", 1, 1, + " s\" /builtin\" find-device" + " new-device" + " 1 encode-int s\" #address-cells\" property" + " 0 encode-int s\" #size-cells\" property" + " s\" mdio\" device-name" + " s\" fsl,mpc5200b-mdio\" encode-string" + " s\" compatible\" property" + " 0xf0003000 0x400 reg" + " 0x2 encode-int" + " 0x5 encode-int encode+" + " 0x3 encode-int encode+" + " s\" interrupts\" property" + " finish-device"); + }; + + /* Check for a PHY device node - if missing then create one and + * give it's phandle to the ethernet node */ + node = call_prom("finddevice", 1, 1, + ADDR("/builtin/mdio/ethernet-phy")); + if (!PHANDLE_VALID(node)) { + prom_printf("Adding Ethernet PHY node\n"); + call_prom("interpret", 1, 1, + " s\" /builtin/mdio\" find-device" + " new-device" + " s\" ethernet-phy\" device-name" + " 0x10 encode-int s\" reg\" property" + " my-self" + " ihandle>phandle" + " finish-device" + " s\" /builtin/ethernet\" find-device" + " encode-int" + " s\" phy-handle\" property" + " device-end"); + } +} + +static void __init fixup_device_tree_efika(void) +{ + int sound_irq[3] = { 2, 2, 0 }; + int bcomm_irq[3*16] = { 3,0,0, 3,1,0, 3,2,0, 3,3,0, + 3,4,0, 3,5,0, 3,6,0, 3,7,0, + 3,8,0, 3,9,0, 3,10,0, 3,11,0, + 3,12,0, 3,13,0, 3,14,0, 3,15,0 }; + u32 node; + char prop[64]; + int rv, len; + + /* Check if we're really running on a EFIKA */ + node = call_prom("finddevice", 1, 1, ADDR("/")); + if (!PHANDLE_VALID(node)) + return; + + rv = prom_getprop(node, "model", prop, sizeof(prop)); + if (rv == PROM_ERROR) + return; + if (strcmp(prop, "EFIKA5K2")) + return; + + prom_printf("Applying EFIKA device tree fixups\n"); + + /* Claiming to be 'chrp' is death */ + node = call_prom("finddevice", 1, 1, ADDR("/")); + rv = prom_getprop(node, "device_type", prop, sizeof(prop)); + if (rv != PROM_ERROR && (strcmp(prop, "chrp") == 0)) + prom_setprop(node, "/", "device_type", "efika", sizeof("efika")); + + /* CODEGEN,description is exposed in /proc/cpuinfo so + fix that too */ + rv = prom_getprop(node, "CODEGEN,description", prop, sizeof(prop)); + if (rv != PROM_ERROR && (strstr(prop, "CHRP"))) + prom_setprop(node, "/", "CODEGEN,description", + "Efika 5200B PowerPC System", + sizeof("Efika 5200B PowerPC System")); + + /* Fixup bestcomm interrupts property */ + node = call_prom("finddevice", 1, 1, ADDR("/builtin/bestcomm")); + if (PHANDLE_VALID(node)) { + len = prom_getproplen(node, "interrupts"); + if (len == 12) { + prom_printf("Fixing bestcomm interrupts property\n"); + prom_setprop(node, "/builtin/bestcom", "interrupts", + bcomm_irq, sizeof(bcomm_irq)); + } + } + + /* Fixup sound interrupts property */ + node = call_prom("finddevice", 1, 1, ADDR("/builtin/sound")); + if (PHANDLE_VALID(node)) { + rv = prom_getprop(node, "interrupts", prop, sizeof(prop)); + if (rv == PROM_ERROR) { + prom_printf("Adding sound interrupts property\n"); + prom_setprop(node, "/builtin/sound", "interrupts", + sound_irq, sizeof(sound_irq)); + } + } + + /* Make sure ethernet phy-handle property exists */ + fixup_device_tree_efika_add_phy(); +} +#else +#define fixup_device_tree_efika() +#endif + +static void __init fixup_device_tree(void) +{ + fixup_device_tree_maple(); + fixup_device_tree_maple_memory_controller(); + fixup_device_tree_chrp(); + fixup_device_tree_pmac(); + fixup_device_tree_efika(); +} + +static void __init prom_find_boot_cpu(void) +{ + struct prom_t *_prom = &RELOC(prom); + u32 getprop_rval; + ihandle prom_cpu; + phandle cpu_pkg; + + _prom->cpu = 0; + if (prom_getprop(_prom->chosen, "cpu", &prom_cpu, sizeof(prom_cpu)) <= 0) + return; + + cpu_pkg = call_prom("instance-to-package", 1, 1, prom_cpu); + + prom_getprop(cpu_pkg, "reg", &getprop_rval, sizeof(getprop_rval)); + _prom->cpu = getprop_rval; + + prom_debug("Booting CPU hw index = %lu\n", _prom->cpu); +} + +static void __init prom_check_initrd(unsigned long r3, unsigned long r4) +{ +#ifdef CONFIG_BLK_DEV_INITRD + struct prom_t *_prom = &RELOC(prom); + + if (r3 && r4 && r4 != 0xdeadbeef) { + unsigned long val; + + RELOC(prom_initrd_start) = is_kernel_addr(r3) ? __pa(r3) : r3; + RELOC(prom_initrd_end) = RELOC(prom_initrd_start) + r4; + + val = RELOC(prom_initrd_start); + prom_setprop(_prom->chosen, "/chosen", "linux,initrd-start", + &val, sizeof(val)); + val = RELOC(prom_initrd_end); + prom_setprop(_prom->chosen, "/chosen", "linux,initrd-end", + &val, sizeof(val)); + + reserve_mem(RELOC(prom_initrd_start), + RELOC(prom_initrd_end) - RELOC(prom_initrd_start)); + + prom_debug("initrd_start=0x%x\n", RELOC(prom_initrd_start)); + prom_debug("initrd_end=0x%x\n", RELOC(prom_initrd_end)); + } +#endif /* CONFIG_BLK_DEV_INITRD */ +} + + +/* + * We enter here early on, when the Open Firmware prom is still + * handling exceptions and the MMU hash table for us. + */ + +unsigned long __init prom_init(unsigned long r3, unsigned long r4, + unsigned long pp, + unsigned long r6, unsigned long r7, + unsigned long kbase) +{ + struct prom_t *_prom; + unsigned long hdr; + +#ifdef CONFIG_PPC32 + unsigned long offset = reloc_offset(); + reloc_got2(offset); +#endif + + _prom = &RELOC(prom); + + /* + * First zero the BSS + */ + memset(&RELOC(__bss_start), 0, __bss_stop - __bss_start); + + /* + * Init interface to Open Firmware, get some node references, + * like /chosen + */ + prom_init_client_services(pp); + + /* + * See if this OF is old enough that we need to do explicit maps + * and other workarounds + */ + prom_find_mmu(); + + /* + * Init prom stdout device + */ + prom_init_stdout(); + + prom_printf("Preparing to boot %s", RELOC(linux_banner)); + + /* + * Get default machine type. At this point, we do not differentiate + * between pSeries SMP and pSeries LPAR + */ + RELOC(of_platform) = prom_find_machine_type(); + prom_printf("Detected machine type: %x\n", RELOC(of_platform)); + +#ifndef CONFIG_NONSTATIC_KERNEL + /* Bail if this is a kdump kernel. */ + if (PHYSICAL_START > 0) + prom_panic("Error: You can't boot a kdump kernel from OF!\n"); +#endif + + /* + * Check for an initrd + */ + prom_check_initrd(r3, r4); + +#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) + /* + * On pSeries, inform the firmware about our capabilities + */ + if (RELOC(of_platform) == PLATFORM_PSERIES || + RELOC(of_platform) == PLATFORM_PSERIES_LPAR) + prom_send_capabilities(); +#endif + + /* + * Copy the CPU hold code + */ + if (RELOC(of_platform) != PLATFORM_POWERMAC) + copy_and_flush(0, kbase, 0x100, 0); + + /* + * Do early parsing of command line + */ + early_cmdline_parse(); + + /* + * Initialize memory management within prom_init + */ + prom_init_mem(); + + /* + * Determine which cpu is actually running right _now_ + */ + prom_find_boot_cpu(); + + /* + * Initialize display devices + */ + prom_check_displays(); + +#ifdef CONFIG_PPC64 + /* + * Initialize IOMMU (TCE tables) on pSeries. Do that before anything else + * that uses the allocator, we need to make sure we get the top of memory + * available for us here... + */ + if (RELOC(of_platform) == PLATFORM_PSERIES) + prom_initialize_tce_table(); +#endif + + /* + * On non-powermacs, try to instantiate RTAS. PowerMacs don't + * have a usable RTAS implementation. + */ + if (RELOC(of_platform) != PLATFORM_POWERMAC && + RELOC(of_platform) != PLATFORM_OPAL) + prom_instantiate_rtas(); + +#ifdef CONFIG_PPC_POWERNV + /* Detect HAL and try instanciating it & doing takeover */ + if (RELOC(of_platform) == PLATFORM_PSERIES_LPAR) { + prom_query_opal(); + if (RELOC(of_platform) == PLATFORM_OPAL) { + prom_opal_hold_cpus(); + prom_opal_takeover(); + } + } else if (RELOC(of_platform) == PLATFORM_OPAL) + prom_instantiate_opal(); +#endif + + /* + * On non-powermacs, put all CPUs in spin-loops. + * + * PowerMacs use a different mechanism to spin CPUs + */ + if (RELOC(of_platform) != PLATFORM_POWERMAC && + RELOC(of_platform) != PLATFORM_OPAL) + prom_hold_cpus(); + + /* + * Fill in some infos for use by the kernel later on + */ + if (RELOC(prom_memory_limit)) + prom_setprop(_prom->chosen, "/chosen", "linux,memory-limit", + &RELOC(prom_memory_limit), + sizeof(prom_memory_limit)); +#ifdef CONFIG_PPC64 + if (RELOC(prom_iommu_off)) + prom_setprop(_prom->chosen, "/chosen", "linux,iommu-off", + NULL, 0); + + if (RELOC(prom_iommu_force_on)) + prom_setprop(_prom->chosen, "/chosen", "linux,iommu-force-on", + NULL, 0); + + if (RELOC(prom_tce_alloc_start)) { + prom_setprop(_prom->chosen, "/chosen", "linux,tce-alloc-start", + &RELOC(prom_tce_alloc_start), + sizeof(prom_tce_alloc_start)); + prom_setprop(_prom->chosen, "/chosen", "linux,tce-alloc-end", + &RELOC(prom_tce_alloc_end), + sizeof(prom_tce_alloc_end)); + } +#endif + + /* + * Fixup any known bugs in the device-tree + */ + fixup_device_tree(); + + /* + * Now finally create the flattened device-tree + */ + prom_printf("copying OF device tree...\n"); + flatten_device_tree(); + + /* + * in case stdin is USB and still active on IBM machines... + * Unfortunately quiesce crashes on some powermacs if we have + * closed stdin already (in particular the powerbook 101). It + * appears that the OPAL version of OFW doesn't like it either. + */ + if (RELOC(of_platform) != PLATFORM_POWERMAC && + RELOC(of_platform) != PLATFORM_OPAL) + prom_close_stdin(); + + /* + * Call OF "quiesce" method to shut down pending DMA's from + * devices etc... + */ + prom_printf("Calling quiesce...\n"); + call_prom("quiesce", 0, 0); + + /* + * And finally, call the kernel passing it the flattened device + * tree and NULL as r5, thus triggering the new entry point which + * is common to us and kexec + */ + hdr = RELOC(dt_header_start); + + /* Don't print anything after quiesce under OPAL, it crashes OFW */ + if (RELOC(of_platform) != PLATFORM_OPAL) { + prom_printf("returning from prom_init\n"); + prom_debug("->dt_header_start=0x%x\n", hdr); + } + +#ifdef CONFIG_PPC32 + reloc_got2(-offset); +#endif + +#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL + /* OPAL early debug gets the OPAL base & entry in r8 and r9 */ + __start(hdr, kbase, 0, 0, 0, + RELOC(prom_opal_base), RELOC(prom_opal_entry)); +#else + __start(hdr, kbase, 0, 0, 0, 0, 0); +#endif + + return 0; +} diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh new file mode 100644 index 00000000..70f4286e --- /dev/null +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -0,0 +1,80 @@ +#!/bin/sh +# +# Copyright © 2008 IBM Corporation +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version +# 2 of the License, or (at your option) any later version. + +# This script checks prom_init.o to see what external symbols it +# is using, if it finds symbols not in the whitelist it returns +# an error. The point of this is to discourage people from +# intentionally or accidentally adding new code to prom_init.c +# which has side effects on other parts of the kernel. + +# If you really need to reference something from prom_init.o add +# it to the list below: + +WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush +_end enter_prom memcpy memset reloc_offset __secondary_hold +__secondary_hold_acknowledge __secondary_hold_spinloop __start +strcmp strcpy strlcpy strlen strncmp strstr logo_linux_clut224 +reloc_got2 kernstart_addr memstart_addr linux_banner _stext +opal_query_takeover opal_do_takeover opal_enter_rtas opal_secondary_entry +boot_command_line" + +NM="$1" +OBJ="$2" + +ERROR=0 + +for UNDEF in $($NM -u $OBJ | awk '{print $2}') +do + # On 64-bit nm gives us the function descriptors, which have + # a leading . on the name, so strip it off here. + UNDEF="${UNDEF#.}" + + if [ $KBUILD_VERBOSE ]; then + if [ $KBUILD_VERBOSE -ne 0 ]; then + echo "Checking prom_init.o symbol '$UNDEF'" + fi + fi + + OK=0 + for WHITE in $WHITELIST + do + if [ "$UNDEF" = "$WHITE" ]; then + OK=1 + break + fi + done + + # ignore register save/restore funcitons + if [ "${UNDEF:0:9}" = "_restgpr_" ]; then + OK=1 + fi + if [ "${UNDEF:0:10}" = "_restgpr0_" ]; then + OK=1 + fi + if [ "${UNDEF:0:11}" = "_rest32gpr_" ]; then + OK=1 + fi + if [ "${UNDEF:0:9}" = "_savegpr_" ]; then + OK=1 + fi + if [ "${UNDEF:0:10}" = "_savegpr0_" ]; then + OK=1 + fi + if [ "${UNDEF:0:11}" = "_save32gpr_" ]; then + OK=1 + fi + + if [ $OK -eq 0 ]; then + ERROR=1 + echo "Error: External symbol '$UNDEF' referenced" \ + "from prom_init.c" >&2 + fi +done + +exit $ERROR diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c new file mode 100644 index 00000000..4e1331b8 --- /dev/null +++ b/arch/powerpc/kernel/prom_parse.c @@ -0,0 +1,34 @@ +#undef DEBUG + +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/ioport.h> +#include <linux/etherdevice.h> +#include <linux/of_address.h> +#include <asm/prom.h> + +void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop, + unsigned long *busno, unsigned long *phys, unsigned long *size) +{ + const u32 *dma_window; + u32 cells; + const unsigned char *prop; + + dma_window = dma_window_prop; + + /* busno is always one cell */ + *busno = *(dma_window++); + + prop = of_get_property(dn, "ibm,#dma-address-cells", NULL); + if (!prop) + prop = of_get_property(dn, "#address-cells", NULL); + + cells = prop ? *(u32 *)prop : of_n_addr_cells(dn); + *phys = of_read_number(dma_window, cells); + + dma_window += cells; + + prop = of_get_property(dn, "ibm,#dma-size-cells", NULL); + cells = prop ? *(u32 *)prop : of_n_size_cells(dn); + *size = of_read_number(dma_window, cells); +} diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c new file mode 100644 index 00000000..8d8e0288 --- /dev/null +++ b/arch/powerpc/kernel/ptrace.c @@ -0,0 +1,1757 @@ +/* + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Derived from "arch/m68k/kernel/ptrace.c" + * Copyright (C) 1994 by Hamish Macdonald + * Taken from linux/kernel/ptrace.c and modified for M680x0. + * linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds + * + * Modified by Cort Dougan (cort@hq.fsmlabs.com) + * and Paul Mackerras (paulus@samba.org). + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file README.legal in the main directory of + * this archive for more details. + */ + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/errno.h> +#include <linux/ptrace.h> +#include <linux/regset.h> +#include <linux/tracehook.h> +#include <linux/elf.h> +#include <linux/user.h> +#include <linux/security.h> +#include <linux/signal.h> +#include <linux/seccomp.h> +#include <linux/audit.h> +#include <trace/syscall.h> +#include <linux/hw_breakpoint.h> +#include <linux/perf_event.h> + +#include <asm/uaccess.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/switch_to.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/syscalls.h> + +/* + * The parameter save area on the stack is used to store arguments being passed + * to callee function and is located at fixed offset from stack pointer. + */ +#ifdef CONFIG_PPC32 +#define PARAMETER_SAVE_AREA_OFFSET 24 /* bytes */ +#else /* CONFIG_PPC32 */ +#define PARAMETER_SAVE_AREA_OFFSET 48 /* bytes */ +#endif + +struct pt_regs_offset { + const char *name; + int offset; +}; + +#define STR(s) #s /* convert to string */ +#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} +#define GPR_OFFSET_NAME(num) \ + {.name = STR(gpr##num), .offset = offsetof(struct pt_regs, gpr[num])} +#define REG_OFFSET_END {.name = NULL, .offset = 0} + +static const struct pt_regs_offset regoffset_table[] = { + GPR_OFFSET_NAME(0), + GPR_OFFSET_NAME(1), + GPR_OFFSET_NAME(2), + GPR_OFFSET_NAME(3), + GPR_OFFSET_NAME(4), + GPR_OFFSET_NAME(5), + GPR_OFFSET_NAME(6), + GPR_OFFSET_NAME(7), + GPR_OFFSET_NAME(8), + GPR_OFFSET_NAME(9), + GPR_OFFSET_NAME(10), + GPR_OFFSET_NAME(11), + GPR_OFFSET_NAME(12), + GPR_OFFSET_NAME(13), + GPR_OFFSET_NAME(14), + GPR_OFFSET_NAME(15), + GPR_OFFSET_NAME(16), + GPR_OFFSET_NAME(17), + GPR_OFFSET_NAME(18), + GPR_OFFSET_NAME(19), + GPR_OFFSET_NAME(20), + GPR_OFFSET_NAME(21), + GPR_OFFSET_NAME(22), + GPR_OFFSET_NAME(23), + GPR_OFFSET_NAME(24), + GPR_OFFSET_NAME(25), + GPR_OFFSET_NAME(26), + GPR_OFFSET_NAME(27), + GPR_OFFSET_NAME(28), + GPR_OFFSET_NAME(29), + GPR_OFFSET_NAME(30), + GPR_OFFSET_NAME(31), + REG_OFFSET_NAME(nip), + REG_OFFSET_NAME(msr), + REG_OFFSET_NAME(ctr), + REG_OFFSET_NAME(link), + REG_OFFSET_NAME(xer), + REG_OFFSET_NAME(ccr), +#ifdef CONFIG_PPC64 + REG_OFFSET_NAME(softe), +#else + REG_OFFSET_NAME(mq), +#endif + REG_OFFSET_NAME(trap), + REG_OFFSET_NAME(dar), + REG_OFFSET_NAME(dsisr), + REG_OFFSET_END, +}; + +/** + * regs_query_register_offset() - query register offset from its name + * @name: the name of a register + * + * regs_query_register_offset() returns the offset of a register in struct + * pt_regs from its name. If the name is invalid, this returns -EINVAL; + */ +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->offset; + return -EINVAL; +} + +/** + * regs_query_register_name() - query register name from its offset + * @offset: the offset of a register in struct pt_regs. + * + * regs_query_register_name() returns the name of a register from its + * offset in struct pt_regs. If the @offset is invalid, this returns NULL; + */ +const char *regs_query_register_name(unsigned int offset) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (roff->offset == offset) + return roff->name; + return NULL; +} + +/* + * does not yet catch signals sent when the child dies. + * in exit.c or in signal.c. + */ + +/* + * Set of msr bits that gdb can change on behalf of a process. + */ +#ifdef CONFIG_PPC_ADV_DEBUG_REGS +#define MSR_DEBUGCHANGE 0 +#else +#define MSR_DEBUGCHANGE (MSR_SE | MSR_BE) +#endif + +/* + * Max register writeable via put_reg + */ +#ifdef CONFIG_PPC32 +#define PT_MAX_PUT_REG PT_MQ +#else +#define PT_MAX_PUT_REG PT_CCR +#endif + +static unsigned long get_user_msr(struct task_struct *task) +{ + return task->thread.regs->msr | task->thread.fpexc_mode; +} + +static int set_user_msr(struct task_struct *task, unsigned long msr) +{ + task->thread.regs->msr &= ~MSR_DEBUGCHANGE; + task->thread.regs->msr |= msr & MSR_DEBUGCHANGE; + return 0; +} + +/* + * We prevent mucking around with the reserved area of trap + * which are used internally by the kernel. + */ +static int set_user_trap(struct task_struct *task, unsigned long trap) +{ + task->thread.regs->trap = trap & 0xfff0; + return 0; +} + +/* + * Get contents of register REGNO in task TASK. + */ +unsigned long ptrace_get_reg(struct task_struct *task, int regno) +{ + if (task->thread.regs == NULL) + return -EIO; + + if (regno == PT_MSR) + return get_user_msr(task); + + if (regno < (sizeof(struct pt_regs) / sizeof(unsigned long))) + return ((unsigned long *)task->thread.regs)[regno]; + + return -EIO; +} + +/* + * Write contents of register REGNO in task TASK. + */ +int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data) +{ + if (task->thread.regs == NULL) + return -EIO; + + if (regno == PT_MSR) + return set_user_msr(task, data); + if (regno == PT_TRAP) + return set_user_trap(task, data); + + if (regno <= PT_MAX_PUT_REG) { + ((unsigned long *)task->thread.regs)[regno] = data; + return 0; + } + return -EIO; +} + +static int gpr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int i, ret; + + if (target->thread.regs == NULL) + return -EIO; + + if (!FULL_REGS(target->thread.regs)) { + /* We have a partial register set. Fill 14-31 with bogus values */ + for (i = 14; i < 32; i++) + target->thread.regs->gpr[i] = NV_REG_POISON; + } + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + target->thread.regs, + 0, offsetof(struct pt_regs, msr)); + if (!ret) { + unsigned long msr = get_user_msr(target); + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr, + offsetof(struct pt_regs, msr), + offsetof(struct pt_regs, msr) + + sizeof(msr)); + } + + BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != + offsetof(struct pt_regs, msr) + sizeof(long)); + + if (!ret) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.regs->orig_gpr3, + offsetof(struct pt_regs, orig_gpr3), + sizeof(struct pt_regs)); + if (!ret) + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + sizeof(struct pt_regs), -1); + + return ret; +} + +static int gpr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + unsigned long reg; + int ret; + + if (target->thread.regs == NULL) + return -EIO; + + CHECK_FULL_REGS(target->thread.regs); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + target->thread.regs, + 0, PT_MSR * sizeof(reg)); + + if (!ret && count > 0) { + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, + PT_MSR * sizeof(reg), + (PT_MSR + 1) * sizeof(reg)); + if (!ret) + ret = set_user_msr(target, reg); + } + + BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != + offsetof(struct pt_regs, msr) + sizeof(long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.regs->orig_gpr3, + PT_ORIG_R3 * sizeof(reg), + (PT_MAX_PUT_REG + 1) * sizeof(reg)); + + if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret) + ret = user_regset_copyin_ignore( + &pos, &count, &kbuf, &ubuf, + (PT_MAX_PUT_REG + 1) * sizeof(reg), + PT_TRAP * sizeof(reg)); + + if (!ret && count > 0) { + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, + PT_TRAP * sizeof(reg), + (PT_TRAP + 1) * sizeof(reg)); + if (!ret) + ret = set_user_trap(target, reg); + } + + if (!ret) + ret = user_regset_copyin_ignore( + &pos, &count, &kbuf, &ubuf, + (PT_TRAP + 1) * sizeof(reg), -1); + + return ret; +} + +static int fpr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ +#ifdef CONFIG_VSX + double buf[33]; + int i; +#endif + flush_fp_to_thread(target); + +#ifdef CONFIG_VSX + /* copy to local buffer then write that out */ + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.TS_FPR(i); + memcpy(&buf[32], &target->thread.fpscr, sizeof(double)); + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1); + +#else + BUILD_BUG_ON(offsetof(struct thread_struct, fpscr) != + offsetof(struct thread_struct, TS_FPR(32))); + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.fpr, 0, -1); +#endif +} + +static int fpr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ +#ifdef CONFIG_VSX + double buf[33]; + int i; +#endif + flush_fp_to_thread(target); + +#ifdef CONFIG_VSX + /* copy to local buffer then write that out */ + i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1); + if (i) + return i; + for (i = 0; i < 32 ; i++) + target->thread.TS_FPR(i) = buf[i]; + memcpy(&target->thread.fpscr, &buf[32], sizeof(double)); + return 0; +#else + BUILD_BUG_ON(offsetof(struct thread_struct, fpscr) != + offsetof(struct thread_struct, TS_FPR(32))); + + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.fpr, 0, -1); +#endif +} + +#ifdef CONFIG_ALTIVEC +/* + * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go. + * The transfer totals 34 quadword. Quadwords 0-31 contain the + * corresponding vector registers. Quadword 32 contains the vscr as the + * last word (offset 12) within that quadword. Quadword 33 contains the + * vrsave as the first word (offset 0) within the quadword. + * + * This definition of the VMX state is compatible with the current PPC32 + * ptrace interface. This allows signal handling and ptrace to use the + * same structures. This also simplifies the implementation of a bi-arch + * (combined (32- and 64-bit) gdb. + */ + +static int vr_active(struct task_struct *target, + const struct user_regset *regset) +{ + flush_altivec_to_thread(target); + return target->thread.used_vr ? regset->n : 0; +} + +static int vr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + flush_altivec_to_thread(target); + + BUILD_BUG_ON(offsetof(struct thread_struct, vscr) != + offsetof(struct thread_struct, vr[32])); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.vr, 0, + 33 * sizeof(vector128)); + if (!ret) { + /* + * Copy out only the low-order word of vrsave. + */ + union { + elf_vrreg_t reg; + u32 word; + } vrsave; + memset(&vrsave, 0, sizeof(vrsave)); + vrsave.word = target->thread.vrsave; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave, + 33 * sizeof(vector128), -1); + } + + return ret; +} + +static int vr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + flush_altivec_to_thread(target); + + BUILD_BUG_ON(offsetof(struct thread_struct, vscr) != + offsetof(struct thread_struct, vr[32])); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.vr, 0, 33 * sizeof(vector128)); + if (!ret && count > 0) { + /* + * We use only the first word of vrsave. + */ + union { + elf_vrreg_t reg; + u32 word; + } vrsave; + memset(&vrsave, 0, sizeof(vrsave)); + vrsave.word = target->thread.vrsave; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave, + 33 * sizeof(vector128), -1); + if (!ret) + target->thread.vrsave = vrsave.word; + } + + return ret; +} +#endif /* CONFIG_ALTIVEC */ + +#ifdef CONFIG_VSX +/* + * Currently to set and and get all the vsx state, you need to call + * the fp and VMX calls as well. This only get/sets the lower 32 + * 128bit VSX registers. + */ + +static int vsr_active(struct task_struct *target, + const struct user_regset *regset) +{ + flush_vsx_to_thread(target); + return target->thread.used_vsr ? regset->n : 0; +} + +static int vsr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + double buf[32]; + int ret, i; + + flush_vsx_to_thread(target); + + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.fpr[i][TS_VSRLOWOFFSET]; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + buf, 0, 32 * sizeof(double)); + + return ret; +} + +static int vsr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + double buf[32]; + int ret,i; + + flush_vsx_to_thread(target); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + buf, 0, 32 * sizeof(double)); + for (i = 0; i < 32 ; i++) + target->thread.fpr[i][TS_VSRLOWOFFSET] = buf[i]; + + + return ret; +} +#endif /* CONFIG_VSX */ + +#ifdef CONFIG_SPE + +/* + * For get_evrregs/set_evrregs functions 'data' has the following layout: + * + * struct { + * u32 evr[32]; + * u64 acc; + * u32 spefscr; + * } + */ + +static int evr_active(struct task_struct *target, + const struct user_regset *regset) +{ + flush_spe_to_thread(target); + return target->thread.used_spe ? regset->n : 0; +} + +static int evr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + flush_spe_to_thread(target); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.evr, + 0, sizeof(target->thread.evr)); + + BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) != + offsetof(struct thread_struct, spefscr)); + + if (!ret) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.acc, + sizeof(target->thread.evr), -1); + + return ret; +} + +static int evr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + flush_spe_to_thread(target); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.evr, + 0, sizeof(target->thread.evr)); + + BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) != + offsetof(struct thread_struct, spefscr)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.acc, + sizeof(target->thread.evr), -1); + + return ret; +} +#endif /* CONFIG_SPE */ + + +/* + * These are our native regset flavors. + */ +enum powerpc_regset { + REGSET_GPR, + REGSET_FPR, +#ifdef CONFIG_ALTIVEC + REGSET_VMX, +#endif +#ifdef CONFIG_VSX + REGSET_VSX, +#endif +#ifdef CONFIG_SPE + REGSET_SPE, +#endif +}; + +static const struct user_regset native_regsets[] = { + [REGSET_GPR] = { + .core_note_type = NT_PRSTATUS, .n = ELF_NGREG, + .size = sizeof(long), .align = sizeof(long), + .get = gpr_get, .set = gpr_set + }, + [REGSET_FPR] = { + .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, + .size = sizeof(double), .align = sizeof(double), + .get = fpr_get, .set = fpr_set + }, +#ifdef CONFIG_ALTIVEC + [REGSET_VMX] = { + .core_note_type = NT_PPC_VMX, .n = 34, + .size = sizeof(vector128), .align = sizeof(vector128), + .active = vr_active, .get = vr_get, .set = vr_set + }, +#endif +#ifdef CONFIG_VSX + [REGSET_VSX] = { + .core_note_type = NT_PPC_VSX, .n = 32, + .size = sizeof(double), .align = sizeof(double), + .active = vsr_active, .get = vsr_get, .set = vsr_set + }, +#endif +#ifdef CONFIG_SPE + [REGSET_SPE] = { + .n = 35, + .size = sizeof(u32), .align = sizeof(u32), + .active = evr_active, .get = evr_get, .set = evr_set + }, +#endif +}; + +static const struct user_regset_view user_ppc_native_view = { + .name = UTS_MACHINE, .e_machine = ELF_ARCH, .ei_osabi = ELF_OSABI, + .regsets = native_regsets, .n = ARRAY_SIZE(native_regsets) +}; + +#ifdef CONFIG_PPC64 +#include <linux/compat.h> + +static int gpr32_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + const unsigned long *regs = &target->thread.regs->gpr[0]; + compat_ulong_t *k = kbuf; + compat_ulong_t __user *u = ubuf; + compat_ulong_t reg; + int i; + + if (target->thread.regs == NULL) + return -EIO; + + if (!FULL_REGS(target->thread.regs)) { + /* We have a partial register set. Fill 14-31 with bogus values */ + for (i = 14; i < 32; i++) + target->thread.regs->gpr[i] = NV_REG_POISON; + } + + pos /= sizeof(reg); + count /= sizeof(reg); + + if (kbuf) + for (; count > 0 && pos < PT_MSR; --count) + *k++ = regs[pos++]; + else + for (; count > 0 && pos < PT_MSR; --count) + if (__put_user((compat_ulong_t) regs[pos++], u++)) + return -EFAULT; + + if (count > 0 && pos == PT_MSR) { + reg = get_user_msr(target); + if (kbuf) + *k++ = reg; + else if (__put_user(reg, u++)) + return -EFAULT; + ++pos; + --count; + } + + if (kbuf) + for (; count > 0 && pos < PT_REGS_COUNT; --count) + *k++ = regs[pos++]; + else + for (; count > 0 && pos < PT_REGS_COUNT; --count) + if (__put_user((compat_ulong_t) regs[pos++], u++)) + return -EFAULT; + + kbuf = k; + ubuf = u; + pos *= sizeof(reg); + count *= sizeof(reg); + return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + PT_REGS_COUNT * sizeof(reg), -1); +} + +static int gpr32_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + unsigned long *regs = &target->thread.regs->gpr[0]; + const compat_ulong_t *k = kbuf; + const compat_ulong_t __user *u = ubuf; + compat_ulong_t reg; + + if (target->thread.regs == NULL) + return -EIO; + + CHECK_FULL_REGS(target->thread.regs); + + pos /= sizeof(reg); + count /= sizeof(reg); + + if (kbuf) + for (; count > 0 && pos < PT_MSR; --count) + regs[pos++] = *k++; + else + for (; count > 0 && pos < PT_MSR; --count) { + if (__get_user(reg, u++)) + return -EFAULT; + regs[pos++] = reg; + } + + + if (count > 0 && pos == PT_MSR) { + if (kbuf) + reg = *k++; + else if (__get_user(reg, u++)) + return -EFAULT; + set_user_msr(target, reg); + ++pos; + --count; + } + + if (kbuf) { + for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) + regs[pos++] = *k++; + for (; count > 0 && pos < PT_TRAP; --count, ++pos) + ++k; + } else { + for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) { + if (__get_user(reg, u++)) + return -EFAULT; + regs[pos++] = reg; + } + for (; count > 0 && pos < PT_TRAP; --count, ++pos) + if (__get_user(reg, u++)) + return -EFAULT; + } + + if (count > 0 && pos == PT_TRAP) { + if (kbuf) + reg = *k++; + else if (__get_user(reg, u++)) + return -EFAULT; + set_user_trap(target, reg); + ++pos; + --count; + } + + kbuf = k; + ubuf = u; + pos *= sizeof(reg); + count *= sizeof(reg); + return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + (PT_TRAP + 1) * sizeof(reg), -1); +} + +/* + * These are the regset flavors matching the CONFIG_PPC32 native set. + */ +static const struct user_regset compat_regsets[] = { + [REGSET_GPR] = { + .core_note_type = NT_PRSTATUS, .n = ELF_NGREG, + .size = sizeof(compat_long_t), .align = sizeof(compat_long_t), + .get = gpr32_get, .set = gpr32_set + }, + [REGSET_FPR] = { + .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, + .size = sizeof(double), .align = sizeof(double), + .get = fpr_get, .set = fpr_set + }, +#ifdef CONFIG_ALTIVEC + [REGSET_VMX] = { + .core_note_type = NT_PPC_VMX, .n = 34, + .size = sizeof(vector128), .align = sizeof(vector128), + .active = vr_active, .get = vr_get, .set = vr_set + }, +#endif +#ifdef CONFIG_SPE + [REGSET_SPE] = { + .core_note_type = NT_PPC_SPE, .n = 35, + .size = sizeof(u32), .align = sizeof(u32), + .active = evr_active, .get = evr_get, .set = evr_set + }, +#endif +}; + +static const struct user_regset_view user_ppc_compat_view = { + .name = "ppc", .e_machine = EM_PPC, .ei_osabi = ELF_OSABI, + .regsets = compat_regsets, .n = ARRAY_SIZE(compat_regsets) +}; +#endif /* CONFIG_PPC64 */ + +const struct user_regset_view *task_user_regset_view(struct task_struct *task) +{ +#ifdef CONFIG_PPC64 + if (test_tsk_thread_flag(task, TIF_32BIT)) + return &user_ppc_compat_view; +#endif + return &user_ppc_native_view; +} + + +void user_enable_single_step(struct task_struct *task) +{ + struct pt_regs *regs = task->thread.regs; + + if (regs != NULL) { +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + task->thread.dbcr0 &= ~DBCR0_BT; + task->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC; + regs->msr |= MSR_DE; +#else + regs->msr &= ~MSR_BE; + regs->msr |= MSR_SE; +#endif + } + set_tsk_thread_flag(task, TIF_SINGLESTEP); +} + +void user_enable_block_step(struct task_struct *task) +{ + struct pt_regs *regs = task->thread.regs; + + if (regs != NULL) { +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + task->thread.dbcr0 &= ~DBCR0_IC; + task->thread.dbcr0 = DBCR0_IDM | DBCR0_BT; + regs->msr |= MSR_DE; +#else + regs->msr &= ~MSR_SE; + regs->msr |= MSR_BE; +#endif + } + set_tsk_thread_flag(task, TIF_SINGLESTEP); +} + +void user_disable_single_step(struct task_struct *task) +{ + struct pt_regs *regs = task->thread.regs; + + if (regs != NULL) { +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + /* + * The logic to disable single stepping should be as + * simple as turning off the Instruction Complete flag. + * And, after doing so, if all debug flags are off, turn + * off DBCR0(IDM) and MSR(DE) .... Torez + */ + task->thread.dbcr0 &= ~DBCR0_IC; + /* + * Test to see if any of the DBCR_ACTIVE_EVENTS bits are set. + */ + if (!DBCR_ACTIVE_EVENTS(task->thread.dbcr0, + task->thread.dbcr1)) { + /* + * All debug events were off..... + */ + task->thread.dbcr0 &= ~DBCR0_IDM; + regs->msr &= ~MSR_DE; + } +#else + regs->msr &= ~(MSR_SE | MSR_BE); +#endif + } + clear_tsk_thread_flag(task, TIF_SINGLESTEP); +} + +#ifdef CONFIG_HAVE_HW_BREAKPOINT +void ptrace_triggered(struct perf_event *bp, + struct perf_sample_data *data, struct pt_regs *regs) +{ + struct perf_event_attr attr; + + /* + * Disable the breakpoint request here since ptrace has defined a + * one-shot behaviour for breakpoint exceptions in PPC64. + * The SIGTRAP signal is generated automatically for us in do_dabr(). + * We don't have to do anything about that here + */ + attr = bp->attr; + attr.disabled = true; + modify_user_hw_breakpoint(bp, &attr); +} +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + +int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, + unsigned long data) +{ +#ifdef CONFIG_HAVE_HW_BREAKPOINT + int ret; + struct thread_struct *thread = &(task->thread); + struct perf_event *bp; + struct perf_event_attr attr; +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + + /* For ppc64 we support one DABR and no IABR's at the moment (ppc64). + * For embedded processors we support one DAC and no IAC's at the + * moment. + */ + if (addr > 0) + return -EINVAL; + + /* The bottom 3 bits in dabr are flags */ + if ((data & ~0x7UL) >= TASK_SIZE) + return -EIO; + +#ifndef CONFIG_PPC_ADV_DEBUG_REGS + /* For processors using DABR (i.e. 970), the bottom 3 bits are flags. + * It was assumed, on previous implementations, that 3 bits were + * passed together with the data address, fitting the design of the + * DABR register, as follows: + * + * bit 0: Read flag + * bit 1: Write flag + * bit 2: Breakpoint translation + * + * Thus, we use them here as so. + */ + + /* Ensure breakpoint translation bit is set */ + if (data && !(data & DABR_TRANSLATION)) + return -EIO; +#ifdef CONFIG_HAVE_HW_BREAKPOINT + if (ptrace_get_breakpoints(task) < 0) + return -ESRCH; + + bp = thread->ptrace_bps[0]; + if ((!data) || !(data & (DABR_DATA_WRITE | DABR_DATA_READ))) { + if (bp) { + unregister_hw_breakpoint(bp); + thread->ptrace_bps[0] = NULL; + } + ptrace_put_breakpoints(task); + return 0; + } + if (bp) { + attr = bp->attr; + attr.bp_addr = data & ~HW_BREAKPOINT_ALIGN; + arch_bp_generic_fields(data & + (DABR_DATA_WRITE | DABR_DATA_READ), + &attr.bp_type); + ret = modify_user_hw_breakpoint(bp, &attr); + if (ret) { + ptrace_put_breakpoints(task); + return ret; + } + thread->ptrace_bps[0] = bp; + ptrace_put_breakpoints(task); + thread->dabr = data; + return 0; + } + + /* Create a new breakpoint request if one doesn't exist already */ + hw_breakpoint_init(&attr); + attr.bp_addr = data & ~HW_BREAKPOINT_ALIGN; + arch_bp_generic_fields(data & (DABR_DATA_WRITE | DABR_DATA_READ), + &attr.bp_type); + + thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr, + ptrace_triggered, NULL, task); + if (IS_ERR(bp)) { + thread->ptrace_bps[0] = NULL; + ptrace_put_breakpoints(task); + return PTR_ERR(bp); + } + + ptrace_put_breakpoints(task); + +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + + /* Move contents to the DABR register */ + task->thread.dabr = data; +#else /* CONFIG_PPC_ADV_DEBUG_REGS */ + /* As described above, it was assumed 3 bits were passed with the data + * address, but we will assume only the mode bits will be passed + * as to not cause alignment restrictions for DAC-based processors. + */ + + /* DAC's hold the whole address without any mode flags */ + task->thread.dac1 = data & ~0x3UL; + + if (task->thread.dac1 == 0) { + dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W); + if (!DBCR_ACTIVE_EVENTS(task->thread.dbcr0, + task->thread.dbcr1)) { + task->thread.regs->msr &= ~MSR_DE; + task->thread.dbcr0 &= ~DBCR0_IDM; + } + return 0; + } + + /* Read or Write bits must be set */ + + if (!(data & 0x3UL)) + return -EINVAL; + + /* Set the Internal Debugging flag (IDM bit 1) for the DBCR0 + register */ + task->thread.dbcr0 |= DBCR0_IDM; + + /* Check for write and read flags and set DBCR0 + accordingly */ + dbcr_dac(task) &= ~(DBCR_DAC1R|DBCR_DAC1W); + if (data & 0x1UL) + dbcr_dac(task) |= DBCR_DAC1R; + if (data & 0x2UL) + dbcr_dac(task) |= DBCR_DAC1W; + task->thread.regs->msr |= MSR_DE; +#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ + return 0; +} + +/* + * Called by kernel/ptrace.c when detaching.. + * + * Make sure single step bits etc are not set. + */ +void ptrace_disable(struct task_struct *child) +{ + /* make sure the single step bit is not set. */ + user_disable_single_step(child); +} + +#ifdef CONFIG_PPC_ADV_DEBUG_REGS +static long set_intruction_bp(struct task_struct *child, + struct ppc_hw_breakpoint *bp_info) +{ + int slot; + int slot1_in_use = ((child->thread.dbcr0 & DBCR0_IAC1) != 0); + int slot2_in_use = ((child->thread.dbcr0 & DBCR0_IAC2) != 0); + int slot3_in_use = ((child->thread.dbcr0 & DBCR0_IAC3) != 0); + int slot4_in_use = ((child->thread.dbcr0 & DBCR0_IAC4) != 0); + + if (dbcr_iac_range(child) & DBCR_IAC12MODE) + slot2_in_use = 1; + if (dbcr_iac_range(child) & DBCR_IAC34MODE) + slot4_in_use = 1; + + if (bp_info->addr >= TASK_SIZE) + return -EIO; + + if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) { + + /* Make sure range is valid. */ + if (bp_info->addr2 >= TASK_SIZE) + return -EIO; + + /* We need a pair of IAC regsisters */ + if ((!slot1_in_use) && (!slot2_in_use)) { + slot = 1; + child->thread.iac1 = bp_info->addr; + child->thread.iac2 = bp_info->addr2; + child->thread.dbcr0 |= DBCR0_IAC1; + if (bp_info->addr_mode == + PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) + dbcr_iac_range(child) |= DBCR_IAC12X; + else + dbcr_iac_range(child) |= DBCR_IAC12I; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + } else if ((!slot3_in_use) && (!slot4_in_use)) { + slot = 3; + child->thread.iac3 = bp_info->addr; + child->thread.iac4 = bp_info->addr2; + child->thread.dbcr0 |= DBCR0_IAC3; + if (bp_info->addr_mode == + PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) + dbcr_iac_range(child) |= DBCR_IAC34X; + else + dbcr_iac_range(child) |= DBCR_IAC34I; +#endif + } else + return -ENOSPC; + } else { + /* We only need one. If possible leave a pair free in + * case a range is needed later + */ + if (!slot1_in_use) { + /* + * Don't use iac1 if iac1-iac2 are free and either + * iac3 or iac4 (but not both) are free + */ + if (slot2_in_use || (slot3_in_use == slot4_in_use)) { + slot = 1; + child->thread.iac1 = bp_info->addr; + child->thread.dbcr0 |= DBCR0_IAC1; + goto out; + } + } + if (!slot2_in_use) { + slot = 2; + child->thread.iac2 = bp_info->addr; + child->thread.dbcr0 |= DBCR0_IAC2; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + } else if (!slot3_in_use) { + slot = 3; + child->thread.iac3 = bp_info->addr; + child->thread.dbcr0 |= DBCR0_IAC3; + } else if (!slot4_in_use) { + slot = 4; + child->thread.iac4 = bp_info->addr; + child->thread.dbcr0 |= DBCR0_IAC4; +#endif + } else + return -ENOSPC; + } +out: + child->thread.dbcr0 |= DBCR0_IDM; + child->thread.regs->msr |= MSR_DE; + + return slot; +} + +static int del_instruction_bp(struct task_struct *child, int slot) +{ + switch (slot) { + case 1: + if ((child->thread.dbcr0 & DBCR0_IAC1) == 0) + return -ENOENT; + + if (dbcr_iac_range(child) & DBCR_IAC12MODE) { + /* address range - clear slots 1 & 2 */ + child->thread.iac2 = 0; + dbcr_iac_range(child) &= ~DBCR_IAC12MODE; + } + child->thread.iac1 = 0; + child->thread.dbcr0 &= ~DBCR0_IAC1; + break; + case 2: + if ((child->thread.dbcr0 & DBCR0_IAC2) == 0) + return -ENOENT; + + if (dbcr_iac_range(child) & DBCR_IAC12MODE) + /* used in a range */ + return -EINVAL; + child->thread.iac2 = 0; + child->thread.dbcr0 &= ~DBCR0_IAC2; + break; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + case 3: + if ((child->thread.dbcr0 & DBCR0_IAC3) == 0) + return -ENOENT; + + if (dbcr_iac_range(child) & DBCR_IAC34MODE) { + /* address range - clear slots 3 & 4 */ + child->thread.iac4 = 0; + dbcr_iac_range(child) &= ~DBCR_IAC34MODE; + } + child->thread.iac3 = 0; + child->thread.dbcr0 &= ~DBCR0_IAC3; + break; + case 4: + if ((child->thread.dbcr0 & DBCR0_IAC4) == 0) + return -ENOENT; + + if (dbcr_iac_range(child) & DBCR_IAC34MODE) + /* Used in a range */ + return -EINVAL; + child->thread.iac4 = 0; + child->thread.dbcr0 &= ~DBCR0_IAC4; + break; +#endif + default: + return -EINVAL; + } + return 0; +} + +static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info) +{ + int byte_enable = + (bp_info->condition_mode >> PPC_BREAKPOINT_CONDITION_BE_SHIFT) + & 0xf; + int condition_mode = + bp_info->condition_mode & PPC_BREAKPOINT_CONDITION_MODE; + int slot; + + if (byte_enable && (condition_mode == 0)) + return -EINVAL; + + if (bp_info->addr >= TASK_SIZE) + return -EIO; + + if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) { + slot = 1; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) + dbcr_dac(child) |= DBCR_DAC1R; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) + dbcr_dac(child) |= DBCR_DAC1W; + child->thread.dac1 = (unsigned long)bp_info->addr; +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + if (byte_enable) { + child->thread.dvc1 = + (unsigned long)bp_info->condition_value; + child->thread.dbcr2 |= + ((byte_enable << DBCR2_DVC1BE_SHIFT) | + (condition_mode << DBCR2_DVC1M_SHIFT)); + } +#endif +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + } else if (child->thread.dbcr2 & DBCR2_DAC12MODE) { + /* Both dac1 and dac2 are part of a range */ + return -ENOSPC; +#endif + } else if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) { + slot = 2; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) + dbcr_dac(child) |= DBCR_DAC2R; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) + dbcr_dac(child) |= DBCR_DAC2W; + child->thread.dac2 = (unsigned long)bp_info->addr; +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + if (byte_enable) { + child->thread.dvc2 = + (unsigned long)bp_info->condition_value; + child->thread.dbcr2 |= + ((byte_enable << DBCR2_DVC2BE_SHIFT) | + (condition_mode << DBCR2_DVC2M_SHIFT)); + } +#endif + } else + return -ENOSPC; + child->thread.dbcr0 |= DBCR0_IDM; + child->thread.regs->msr |= MSR_DE; + + return slot + 4; +} + +static int del_dac(struct task_struct *child, int slot) +{ + if (slot == 1) { + if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) + return -ENOENT; + + child->thread.dac1 = 0; + dbcr_dac(child) &= ~(DBCR_DAC1R | DBCR_DAC1W); +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + if (child->thread.dbcr2 & DBCR2_DAC12MODE) { + child->thread.dac2 = 0; + child->thread.dbcr2 &= ~DBCR2_DAC12MODE; + } + child->thread.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE); +#endif +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + child->thread.dvc1 = 0; +#endif + } else if (slot == 2) { + if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) + return -ENOENT; + +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + if (child->thread.dbcr2 & DBCR2_DAC12MODE) + /* Part of a range */ + return -EINVAL; + child->thread.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE); +#endif +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + child->thread.dvc2 = 0; +#endif + child->thread.dac2 = 0; + dbcr_dac(child) &= ~(DBCR_DAC2R | DBCR_DAC2W); + } else + return -EINVAL; + + return 0; +} +#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ + +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE +static int set_dac_range(struct task_struct *child, + struct ppc_hw_breakpoint *bp_info) +{ + int mode = bp_info->addr_mode & PPC_BREAKPOINT_MODE_MASK; + + /* We don't allow range watchpoints to be used with DVC */ + if (bp_info->condition_mode) + return -EINVAL; + + /* + * Best effort to verify the address range. The user/supervisor bits + * prevent trapping in kernel space, but let's fail on an obvious bad + * range. The simple test on the mask is not fool-proof, and any + * exclusive range will spill over into kernel space. + */ + if (bp_info->addr >= TASK_SIZE) + return -EIO; + if (mode == PPC_BREAKPOINT_MODE_MASK) { + /* + * dac2 is a bitmask. Don't allow a mask that makes a + * kernel space address from a valid dac1 value + */ + if (~((unsigned long)bp_info->addr2) >= TASK_SIZE) + return -EIO; + } else { + /* + * For range breakpoints, addr2 must also be a valid address + */ + if (bp_info->addr2 >= TASK_SIZE) + return -EIO; + } + + if (child->thread.dbcr0 & + (DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W)) + return -ENOSPC; + + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) + child->thread.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM); + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) + child->thread.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM); + child->thread.dac1 = bp_info->addr; + child->thread.dac2 = bp_info->addr2; + if (mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) + child->thread.dbcr2 |= DBCR2_DAC12M; + else if (mode == PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) + child->thread.dbcr2 |= DBCR2_DAC12MX; + else /* PPC_BREAKPOINT_MODE_MASK */ + child->thread.dbcr2 |= DBCR2_DAC12MM; + child->thread.regs->msr |= MSR_DE; + + return 5; +} +#endif /* CONFIG_PPC_ADV_DEBUG_DAC_RANGE */ + +static long ppc_set_hwdebug(struct task_struct *child, + struct ppc_hw_breakpoint *bp_info) +{ +#ifndef CONFIG_PPC_ADV_DEBUG_REGS + unsigned long dabr; +#endif + + if (bp_info->version != 1) + return -ENOTSUPP; +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + /* + * Check for invalid flags and combinations + */ + if ((bp_info->trigger_type == 0) || + (bp_info->trigger_type & ~(PPC_BREAKPOINT_TRIGGER_EXECUTE | + PPC_BREAKPOINT_TRIGGER_RW)) || + (bp_info->addr_mode & ~PPC_BREAKPOINT_MODE_MASK) || + (bp_info->condition_mode & + ~(PPC_BREAKPOINT_CONDITION_MODE | + PPC_BREAKPOINT_CONDITION_BE_ALL))) + return -EINVAL; +#if CONFIG_PPC_ADV_DEBUG_DVCS == 0 + if (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE) + return -EINVAL; +#endif + + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_EXECUTE) { + if ((bp_info->trigger_type != PPC_BREAKPOINT_TRIGGER_EXECUTE) || + (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)) + return -EINVAL; + return set_intruction_bp(child, bp_info); + } + if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT) + return set_dac(child, bp_info); + +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + return set_dac_range(child, bp_info); +#else + return -EINVAL; +#endif +#else /* !CONFIG_PPC_ADV_DEBUG_DVCS */ + /* + * We only support one data breakpoint + */ + if ((bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_RW) == 0 || + (bp_info->trigger_type & ~PPC_BREAKPOINT_TRIGGER_RW) != 0 || + bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT || + bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE) + return -EINVAL; + + if (child->thread.dabr) + return -ENOSPC; + + if ((unsigned long)bp_info->addr >= TASK_SIZE) + return -EIO; + + dabr = (unsigned long)bp_info->addr & ~7UL; + dabr |= DABR_TRANSLATION; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) + dabr |= DABR_DATA_READ; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) + dabr |= DABR_DATA_WRITE; + + child->thread.dabr = dabr; + + return 1; +#endif /* !CONFIG_PPC_ADV_DEBUG_DVCS */ +} + +static long ppc_del_hwdebug(struct task_struct *child, long addr, long data) +{ +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + int rc; + + if (data <= 4) + rc = del_instruction_bp(child, (int)data); + else + rc = del_dac(child, (int)data - 4); + + if (!rc) { + if (!DBCR_ACTIVE_EVENTS(child->thread.dbcr0, + child->thread.dbcr1)) { + child->thread.dbcr0 &= ~DBCR0_IDM; + child->thread.regs->msr &= ~MSR_DE; + } + } + return rc; +#else + if (data != 1) + return -EINVAL; + if (child->thread.dabr == 0) + return -ENOENT; + + child->thread.dabr = 0; + + return 0; +#endif +} + +/* + * Here are the old "legacy" powerpc specific getregs/setregs ptrace calls, + * we mark them as obsolete now, they will be removed in a future version + */ +static long arch_ptrace_old(struct task_struct *child, long request, + unsigned long addr, unsigned long data) +{ + void __user *datavp = (void __user *) data; + + switch (request) { + case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */ + return copy_regset_to_user(child, &user_ppc_native_view, + REGSET_GPR, 0, 32 * sizeof(long), + datavp); + + case PPC_PTRACE_SETREGS: /* Set GPRs 0 - 31. */ + return copy_regset_from_user(child, &user_ppc_native_view, + REGSET_GPR, 0, 32 * sizeof(long), + datavp); + + case PPC_PTRACE_GETFPREGS: /* Get FPRs 0 - 31. */ + return copy_regset_to_user(child, &user_ppc_native_view, + REGSET_FPR, 0, 32 * sizeof(double), + datavp); + + case PPC_PTRACE_SETFPREGS: /* Set FPRs 0 - 31. */ + return copy_regset_from_user(child, &user_ppc_native_view, + REGSET_FPR, 0, 32 * sizeof(double), + datavp); + } + + return -EPERM; +} + +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) +{ + int ret = -EPERM; + void __user *datavp = (void __user *) data; + unsigned long __user *datalp = datavp; + + switch (request) { + /* read the word at location addr in the USER area. */ + case PTRACE_PEEKUSR: { + unsigned long index, tmp; + + ret = -EIO; + /* convert to index and check */ +#ifdef CONFIG_PPC32 + index = addr >> 2; + if ((addr & 3) || (index > PT_FPSCR) + || (child->thread.regs == NULL)) +#else + index = addr >> 3; + if ((addr & 7) || (index > PT_FPSCR)) +#endif + break; + + CHECK_FULL_REGS(child->thread.regs); + if (index < PT_FPR0) { + tmp = ptrace_get_reg(child, (int) index); + } else { + unsigned int fpidx = index - PT_FPR0; + + flush_fp_to_thread(child); + if (fpidx < (PT_FPSCR - PT_FPR0)) + tmp = ((unsigned long *)child->thread.fpr) + [fpidx * TS_FPRWIDTH]; + else + tmp = child->thread.fpscr.val; + } + ret = put_user(tmp, datalp); + break; + } + + /* write the word at location addr in the USER area */ + case PTRACE_POKEUSR: { + unsigned long index; + + ret = -EIO; + /* convert to index and check */ +#ifdef CONFIG_PPC32 + index = addr >> 2; + if ((addr & 3) || (index > PT_FPSCR) + || (child->thread.regs == NULL)) +#else + index = addr >> 3; + if ((addr & 7) || (index > PT_FPSCR)) +#endif + break; + + CHECK_FULL_REGS(child->thread.regs); + if (index < PT_FPR0) { + ret = ptrace_put_reg(child, index, data); + } else { + unsigned int fpidx = index - PT_FPR0; + + flush_fp_to_thread(child); + if (fpidx < (PT_FPSCR - PT_FPR0)) + ((unsigned long *)child->thread.fpr) + [fpidx * TS_FPRWIDTH] = data; + else + child->thread.fpscr.val = data; + ret = 0; + } + break; + } + + case PPC_PTRACE_GETHWDBGINFO: { + struct ppc_debug_info dbginfo; + + dbginfo.version = 1; +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + dbginfo.num_instruction_bps = CONFIG_PPC_ADV_DEBUG_IACS; + dbginfo.num_data_bps = CONFIG_PPC_ADV_DEBUG_DACS; + dbginfo.num_condition_regs = CONFIG_PPC_ADV_DEBUG_DVCS; + dbginfo.data_bp_alignment = 4; + dbginfo.sizeof_condition = 4; + dbginfo.features = PPC_DEBUG_FEATURE_INSN_BP_RANGE | + PPC_DEBUG_FEATURE_INSN_BP_MASK; +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + dbginfo.features |= + PPC_DEBUG_FEATURE_DATA_BP_RANGE | + PPC_DEBUG_FEATURE_DATA_BP_MASK; +#endif +#else /* !CONFIG_PPC_ADV_DEBUG_REGS */ + dbginfo.num_instruction_bps = 0; + dbginfo.num_data_bps = 1; + dbginfo.num_condition_regs = 0; +#ifdef CONFIG_PPC64 + dbginfo.data_bp_alignment = 8; +#else + dbginfo.data_bp_alignment = 4; +#endif + dbginfo.sizeof_condition = 0; + dbginfo.features = 0; +#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ + + if (!access_ok(VERIFY_WRITE, datavp, + sizeof(struct ppc_debug_info))) + return -EFAULT; + ret = __copy_to_user(datavp, &dbginfo, + sizeof(struct ppc_debug_info)) ? + -EFAULT : 0; + break; + } + + case PPC_PTRACE_SETHWDEBUG: { + struct ppc_hw_breakpoint bp_info; + + if (!access_ok(VERIFY_READ, datavp, + sizeof(struct ppc_hw_breakpoint))) + return -EFAULT; + ret = __copy_from_user(&bp_info, datavp, + sizeof(struct ppc_hw_breakpoint)) ? + -EFAULT : 0; + if (!ret) + ret = ppc_set_hwdebug(child, &bp_info); + break; + } + + case PPC_PTRACE_DELHWDEBUG: { + ret = ppc_del_hwdebug(child, addr, data); + break; + } + + case PTRACE_GET_DEBUGREG: { + ret = -EINVAL; + /* We only support one DABR and no IABRS at the moment */ + if (addr > 0) + break; +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + ret = put_user(child->thread.dac1, datalp); +#else + ret = put_user(child->thread.dabr, datalp); +#endif + break; + } + + case PTRACE_SET_DEBUGREG: + ret = ptrace_set_debugreg(child, addr, data); + break; + +#ifdef CONFIG_PPC64 + case PTRACE_GETREGS64: +#endif + case PTRACE_GETREGS: /* Get all pt_regs from the child. */ + return copy_regset_to_user(child, &user_ppc_native_view, + REGSET_GPR, + 0, sizeof(struct pt_regs), + datavp); + +#ifdef CONFIG_PPC64 + case PTRACE_SETREGS64: +#endif + case PTRACE_SETREGS: /* Set all gp regs in the child. */ + return copy_regset_from_user(child, &user_ppc_native_view, + REGSET_GPR, + 0, sizeof(struct pt_regs), + datavp); + + case PTRACE_GETFPREGS: /* Get the child FPU state (FPR0...31 + FPSCR) */ + return copy_regset_to_user(child, &user_ppc_native_view, + REGSET_FPR, + 0, sizeof(elf_fpregset_t), + datavp); + + case PTRACE_SETFPREGS: /* Set the child FPU state (FPR0...31 + FPSCR) */ + return copy_regset_from_user(child, &user_ppc_native_view, + REGSET_FPR, + 0, sizeof(elf_fpregset_t), + datavp); + +#ifdef CONFIG_ALTIVEC + case PTRACE_GETVRREGS: + return copy_regset_to_user(child, &user_ppc_native_view, + REGSET_VMX, + 0, (33 * sizeof(vector128) + + sizeof(u32)), + datavp); + + case PTRACE_SETVRREGS: + return copy_regset_from_user(child, &user_ppc_native_view, + REGSET_VMX, + 0, (33 * sizeof(vector128) + + sizeof(u32)), + datavp); +#endif +#ifdef CONFIG_VSX + case PTRACE_GETVSRREGS: + return copy_regset_to_user(child, &user_ppc_native_view, + REGSET_VSX, + 0, 32 * sizeof(double), + datavp); + + case PTRACE_SETVSRREGS: + return copy_regset_from_user(child, &user_ppc_native_view, + REGSET_VSX, + 0, 32 * sizeof(double), + datavp); +#endif +#ifdef CONFIG_SPE + case PTRACE_GETEVRREGS: + /* Get the child spe register state. */ + return copy_regset_to_user(child, &user_ppc_native_view, + REGSET_SPE, 0, 35 * sizeof(u32), + datavp); + + case PTRACE_SETEVRREGS: + /* Set the child spe register state. */ + return copy_regset_from_user(child, &user_ppc_native_view, + REGSET_SPE, 0, 35 * sizeof(u32), + datavp); +#endif + + /* Old reverse args ptrace callss */ + case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */ + case PPC_PTRACE_SETREGS: /* Set GPRs 0 - 31. */ + case PPC_PTRACE_GETFPREGS: /* Get FPRs 0 - 31. */ + case PPC_PTRACE_SETFPREGS: /* Get FPRs 0 - 31. */ + ret = arch_ptrace_old(child, request, addr, data); + break; + + default: + ret = ptrace_request(child, request, addr, data); + break; + } + return ret; +} + +/* + * We must return the syscall number to actually look up in the table. + * This can be -1L to skip running any syscall at all. + */ +long do_syscall_trace_enter(struct pt_regs *regs) +{ + long ret = 0; + + secure_computing(regs->gpr[0]); + + if (test_thread_flag(TIF_SYSCALL_TRACE) && + tracehook_report_syscall_entry(regs)) + /* + * Tracing decided this syscall should not happen. + * We'll return a bogus call number to get an ENOSYS + * error, but leave the original number in regs->gpr[0]. + */ + ret = -1L; + + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_enter(regs, regs->gpr[0]); + +#ifdef CONFIG_PPC64 + if (!is_32bit_task()) + audit_syscall_entry(AUDIT_ARCH_PPC64, + regs->gpr[0], + regs->gpr[3], regs->gpr[4], + regs->gpr[5], regs->gpr[6]); + else +#endif + audit_syscall_entry(AUDIT_ARCH_PPC, + regs->gpr[0], + regs->gpr[3] & 0xffffffff, + regs->gpr[4] & 0xffffffff, + regs->gpr[5] & 0xffffffff, + regs->gpr[6] & 0xffffffff); + + return ret ?: regs->gpr[0]; +} + +void do_syscall_trace_leave(struct pt_regs *regs) +{ + int step; + + audit_syscall_exit(regs); + + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_exit(regs, regs->result); + + step = test_thread_flag(TIF_SINGLESTEP); + if (step || test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall_exit(regs, step); +} diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c new file mode 100644 index 00000000..469349d1 --- /dev/null +++ b/arch/powerpc/kernel/ptrace32.c @@ -0,0 +1,337 @@ +/* + * ptrace for 32-bit processes running on a 64-bit kernel. + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Derived from "arch/m68k/kernel/ptrace.c" + * Copyright (C) 1994 by Hamish Macdonald + * Taken from linux/kernel/ptrace.c and modified for M680x0. + * linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds + * + * Modified by Cort Dougan (cort@hq.fsmlabs.com) + * and Paul Mackerras (paulus@samba.org). + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file COPYING in the main directory of + * this archive for more details. + */ + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/errno.h> +#include <linux/ptrace.h> +#include <linux/regset.h> +#include <linux/user.h> +#include <linux/security.h> +#include <linux/signal.h> +#include <linux/compat.h> + +#include <asm/uaccess.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/switch_to.h> + +/* + * does not yet catch signals sent when the child dies. + * in exit.c or in signal.c. + */ + +/* + * Here are the old "legacy" powerpc specific getregs/setregs ptrace calls, + * we mark them as obsolete now, they will be removed in a future version + */ +static long compat_ptrace_old(struct task_struct *child, long request, + long addr, long data) +{ + switch (request) { + case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */ + return copy_regset_to_user(child, + task_user_regset_view(current), 0, + 0, 32 * sizeof(compat_long_t), + compat_ptr(data)); + + case PPC_PTRACE_SETREGS: /* Set GPRs 0 - 31. */ + return copy_regset_from_user(child, + task_user_regset_view(current), 0, + 0, 32 * sizeof(compat_long_t), + compat_ptr(data)); + } + + return -EPERM; +} + +/* Macros to workout the correct index for the FPR in the thread struct */ +#define FPRNUMBER(i) (((i) - PT_FPR0) >> 1) +#define FPRHALF(i) (((i) - PT_FPR0) & 1) +#define FPRINDEX(i) TS_FPRWIDTH * FPRNUMBER(i) * 2 + FPRHALF(i) +#define FPRINDEX_3264(i) (TS_FPRWIDTH * ((i) - PT_FPR0)) + +long compat_arch_ptrace(struct task_struct *child, compat_long_t request, + compat_ulong_t caddr, compat_ulong_t cdata) +{ + unsigned long addr = caddr; + unsigned long data = cdata; + int ret; + + switch (request) { + /* + * Read 4 bytes of the other process' storage + * data is a pointer specifying where the user wants the + * 4 bytes copied into + * addr is a pointer in the user's storage that contains an 8 byte + * address in the other process of the 4 bytes that is to be read + * (this is run in a 32-bit process looking at a 64-bit process) + * when I and D space are separate, these will need to be fixed. + */ + case PPC_PTRACE_PEEKTEXT_3264: + case PPC_PTRACE_PEEKDATA_3264: { + u32 tmp; + int copied; + u32 __user * addrOthers; + + ret = -EIO; + + /* Get the addr in the other process that we want to read */ + if (get_user(addrOthers, (u32 __user * __user *)addr) != 0) + break; + + copied = access_process_vm(child, (u64)addrOthers, &tmp, + sizeof(tmp), 0); + if (copied != sizeof(tmp)) + break; + ret = put_user(tmp, (u32 __user *)data); + break; + } + + /* Read a register (specified by ADDR) out of the "user area" */ + case PTRACE_PEEKUSR: { + int index; + unsigned long tmp; + + ret = -EIO; + /* convert to index and check */ + index = (unsigned long) addr >> 2; + if ((addr & 3) || (index > PT_FPSCR32)) + break; + + CHECK_FULL_REGS(child->thread.regs); + if (index < PT_FPR0) { + tmp = ptrace_get_reg(child, index); + } else { + flush_fp_to_thread(child); + /* + * the user space code considers the floating point + * to be an array of unsigned int (32 bits) - the + * index passed in is based on this assumption. + */ + tmp = ((unsigned int *)child->thread.fpr) + [FPRINDEX(index)]; + } + ret = put_user((unsigned int)tmp, (u32 __user *)data); + break; + } + + /* + * Read 4 bytes out of the other process' pt_regs area + * data is a pointer specifying where the user wants the + * 4 bytes copied into + * addr is the offset into the other process' pt_regs structure + * that is to be read + * (this is run in a 32-bit process looking at a 64-bit process) + */ + case PPC_PTRACE_PEEKUSR_3264: { + u32 index; + u32 reg32bits; + u64 tmp; + u32 numReg; + u32 part; + + ret = -EIO; + /* Determine which register the user wants */ + index = (u64)addr >> 2; + numReg = index / 2; + /* Determine which part of the register the user wants */ + if (index % 2) + part = 1; /* want the 2nd half of the register (right-most). */ + else + part = 0; /* want the 1st half of the register (left-most). */ + + /* Validate the input - check to see if address is on the wrong boundary + * or beyond the end of the user area + */ + if ((addr & 3) || numReg > PT_FPSCR) + break; + + CHECK_FULL_REGS(child->thread.regs); + if (numReg >= PT_FPR0) { + flush_fp_to_thread(child); + /* get 64 bit FPR */ + tmp = ((u64 *)child->thread.fpr) + [FPRINDEX_3264(numReg)]; + } else { /* register within PT_REGS struct */ + tmp = ptrace_get_reg(child, numReg); + } + reg32bits = ((u32*)&tmp)[part]; + ret = put_user(reg32bits, (u32 __user *)data); + break; + } + + /* + * Write 4 bytes into the other process' storage + * data is the 4 bytes that the user wants written + * addr is a pointer in the user's storage that contains an + * 8 byte address in the other process where the 4 bytes + * that is to be written + * (this is run in a 32-bit process looking at a 64-bit process) + * when I and D space are separate, these will need to be fixed. + */ + case PPC_PTRACE_POKETEXT_3264: + case PPC_PTRACE_POKEDATA_3264: { + u32 tmp = data; + u32 __user * addrOthers; + + /* Get the addr in the other process that we want to write into */ + ret = -EIO; + if (get_user(addrOthers, (u32 __user * __user *)addr) != 0) + break; + ret = 0; + if (access_process_vm(child, (u64)addrOthers, &tmp, + sizeof(tmp), 1) == sizeof(tmp)) + break; + ret = -EIO; + break; + } + + /* write the word at location addr in the USER area */ + case PTRACE_POKEUSR: { + unsigned long index; + + ret = -EIO; + /* convert to index and check */ + index = (unsigned long) addr >> 2; + if ((addr & 3) || (index > PT_FPSCR32)) + break; + + CHECK_FULL_REGS(child->thread.regs); + if (index < PT_FPR0) { + ret = ptrace_put_reg(child, index, data); + } else { + flush_fp_to_thread(child); + /* + * the user space code considers the floating point + * to be an array of unsigned int (32 bits) - the + * index passed in is based on this assumption. + */ + ((unsigned int *)child->thread.fpr) + [FPRINDEX(index)] = data; + ret = 0; + } + break; + } + + /* + * Write 4 bytes into the other process' pt_regs area + * data is the 4 bytes that the user wants written + * addr is the offset into the other process' pt_regs structure + * that is to be written into + * (this is run in a 32-bit process looking at a 64-bit process) + */ + case PPC_PTRACE_POKEUSR_3264: { + u32 index; + u32 numReg; + + ret = -EIO; + /* Determine which register the user wants */ + index = (u64)addr >> 2; + numReg = index / 2; + + /* + * Validate the input - check to see if address is on the + * wrong boundary or beyond the end of the user area + */ + if ((addr & 3) || (numReg > PT_FPSCR)) + break; + CHECK_FULL_REGS(child->thread.regs); + if (numReg < PT_FPR0) { + unsigned long freg = ptrace_get_reg(child, numReg); + if (index % 2) + freg = (freg & ~0xfffffffful) | (data & 0xfffffffful); + else + freg = (freg & 0xfffffffful) | (data << 32); + ret = ptrace_put_reg(child, numReg, freg); + } else { + u64 *tmp; + flush_fp_to_thread(child); + /* get 64 bit FPR ... */ + tmp = &(((u64 *)child->thread.fpr) + [FPRINDEX_3264(numReg)]); + /* ... write the 32 bit part we want */ + ((u32 *)tmp)[index % 2] = data; + ret = 0; + } + break; + } + + case PTRACE_GET_DEBUGREG: { + ret = -EINVAL; + /* We only support one DABR and no IABRS at the moment */ + if (addr > 0) + break; +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + ret = put_user(child->thread.dac1, (u32 __user *)data); +#else + ret = put_user(child->thread.dabr, (u32 __user *)data); +#endif + break; + } + + case PTRACE_GETREGS: /* Get all pt_regs from the child. */ + return copy_regset_to_user( + child, task_user_regset_view(current), 0, + 0, PT_REGS_COUNT * sizeof(compat_long_t), + compat_ptr(data)); + + case PTRACE_SETREGS: /* Set all gp regs in the child. */ + return copy_regset_from_user( + child, task_user_regset_view(current), 0, + 0, PT_REGS_COUNT * sizeof(compat_long_t), + compat_ptr(data)); + + case PTRACE_GETFPREGS: + case PTRACE_SETFPREGS: + case PTRACE_GETVRREGS: + case PTRACE_SETVRREGS: + case PTRACE_GETVSRREGS: + case PTRACE_SETVSRREGS: + case PTRACE_GETREGS64: + case PTRACE_SETREGS64: + case PPC_PTRACE_GETFPREGS: + case PPC_PTRACE_SETFPREGS: + case PTRACE_KILL: + case PTRACE_SINGLESTEP: + case PTRACE_DETACH: + case PTRACE_SET_DEBUGREG: + case PTRACE_SYSCALL: + case PTRACE_CONT: + case PPC_PTRACE_GETHWDBGINFO: + case PPC_PTRACE_SETHWDEBUG: + case PPC_PTRACE_DELHWDEBUG: + ret = arch_ptrace(child, request, addr, data); + break; + + /* Old reverse args ptrace callss */ + case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */ + case PPC_PTRACE_SETREGS: /* Set GPRs 0 - 31. */ + ret = compat_ptrace_old(child, request, addr, data); + break; + + default: + ret = compat_ptrace_request(child, request, addr, data); + break; + } + + return ret; +} diff --git a/arch/powerpc/kernel/reloc_32.S b/arch/powerpc/kernel/reloc_32.S new file mode 100644 index 00000000..ef46ba6e --- /dev/null +++ b/arch/powerpc/kernel/reloc_32.S @@ -0,0 +1,208 @@ +/* + * Code to process dynamic relocations for PPC32. + * + * Copyrights (C) IBM Corporation, 2011. + * Author: Suzuki Poulose <suzuki@in.ibm.com> + * + * - Based on ppc64 code - reloc_64.S + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <asm/ppc_asm.h> + +/* Dynamic section table entry tags */ +DT_RELA = 7 /* Tag for Elf32_Rela section */ +DT_RELASZ = 8 /* Size of the Rela relocs */ +DT_RELAENT = 9 /* Size of one Rela reloc entry */ + +STN_UNDEF = 0 /* Undefined symbol index */ +STB_LOCAL = 0 /* Local binding for the symbol */ + +R_PPC_ADDR16_LO = 4 /* Lower half of (S+A) */ +R_PPC_ADDR16_HI = 5 /* Upper half of (S+A) */ +R_PPC_ADDR16_HA = 6 /* High Adjusted (S+A) */ +R_PPC_RELATIVE = 22 + +/* + * r3 = desired final address + */ + +_GLOBAL(relocate) + + mflr r0 /* Save our LR */ + bl 0f /* Find our current runtime address */ +0: mflr r12 /* Make it accessible */ + mtlr r0 + + lwz r11, (p_dyn - 0b)(r12) + add r11, r11, r12 /* runtime address of .dynamic section */ + lwz r9, (p_rela - 0b)(r12) + add r9, r9, r12 /* runtime address of .rela.dyn section */ + lwz r10, (p_st - 0b)(r12) + add r10, r10, r12 /* runtime address of _stext section */ + lwz r13, (p_sym - 0b)(r12) + add r13, r13, r12 /* runtime address of .dynsym section */ + + /* + * Scan the dynamic section for RELA, RELASZ entries + */ + li r6, 0 + li r7, 0 + li r8, 0 +1: lwz r5, 0(r11) /* ELF_Dyn.d_tag */ + cmpwi r5, 0 /* End of ELF_Dyn[] */ + beq eodyn + cmpwi r5, DT_RELA + bne relasz + lwz r7, 4(r11) /* r7 = rela.link */ + b skip +relasz: + cmpwi r5, DT_RELASZ + bne relaent + lwz r8, 4(r11) /* r8 = Total Rela relocs size */ + b skip +relaent: + cmpwi r5, DT_RELAENT + bne skip + lwz r6, 4(r11) /* r6 = Size of one Rela reloc */ +skip: + addi r11, r11, 8 + b 1b +eodyn: /* End of Dyn Table scan */ + + /* Check if we have found all the entries */ + cmpwi r7, 0 + beq done + cmpwi r8, 0 + beq done + cmpwi r6, 0 + beq done + + + /* + * Work out the current offset from the link time address of .rela + * section. + * cur_offset[r7] = rela.run[r9] - rela.link [r7] + * _stext.link[r12] = _stext.run[r10] - cur_offset[r7] + * final_offset[r3] = _stext.final[r3] - _stext.link[r12] + */ + subf r7, r7, r9 /* cur_offset */ + subf r12, r7, r10 + subf r3, r12, r3 /* final_offset */ + + subf r8, r6, r8 /* relaz -= relaent */ + /* + * Scan through the .rela table and process each entry + * r9 - points to the current .rela table entry + * r13 - points to the symbol table + */ + + /* + * Check if we have a relocation based on symbol + * r5 will hold the value of the symbol. + */ +applyrela: + lwz r4, 4(r9) /* r4 = rela.r_info */ + srwi r5, r4, 8 /* ELF32_R_SYM(r_info) */ + cmpwi r5, STN_UNDEF /* sym == STN_UNDEF ? */ + beq get_type /* value = 0 */ + /* Find the value of the symbol at index(r5) */ + slwi r5, r5, 4 /* r5 = r5 * sizeof(Elf32_Sym) */ + add r12, r13, r5 /* r12 = &__dyn_sym[Index] */ + + /* + * GNU ld has a bug, where dynamic relocs based on + * STB_LOCAL symbols, the value should be assumed + * to be zero. - Alan Modra + */ + /* XXX: Do we need to check if we are using GNU ld ? */ + lbz r5, 12(r12) /* r5 = dyn_sym[Index].st_info */ + extrwi r5, r5, 4, 24 /* r5 = ELF32_ST_BIND(r5) */ + cmpwi r5, STB_LOCAL /* st_value = 0, ld bug */ + beq get_type /* We have r5 = 0 */ + lwz r5, 4(r12) /* r5 = __dyn_sym[Index].st_value */ + +get_type: + /* Load the relocation type to r4 */ + extrwi r4, r4, 8, 24 /* r4 = ELF32_R_TYPE(r_info) = ((char*)r4)[3] */ + + /* R_PPC_RELATIVE */ + cmpwi r4, R_PPC_RELATIVE + bne hi16 + lwz r4, 0(r9) /* r_offset */ + lwz r0, 8(r9) /* r_addend */ + add r0, r0, r3 /* final addend */ + stwx r0, r4, r7 /* memory[r4+r7]) = (u32)r0 */ + b nxtrela /* continue */ + + /* R_PPC_ADDR16_HI */ +hi16: + cmpwi r4, R_PPC_ADDR16_HI + bne ha16 + lwz r4, 0(r9) /* r_offset */ + lwz r0, 8(r9) /* r_addend */ + add r0, r0, r3 + add r0, r0, r5 /* r0 = (S+A+Offset) */ + extrwi r0, r0, 16, 0 /* r0 = (r0 >> 16) */ + b store_half + + /* R_PPC_ADDR16_HA */ +ha16: + cmpwi r4, R_PPC_ADDR16_HA + bne lo16 + lwz r4, 0(r9) /* r_offset */ + lwz r0, 8(r9) /* r_addend */ + add r0, r0, r3 + add r0, r0, r5 /* r0 = (S+A+Offset) */ + extrwi r5, r0, 1, 16 /* Extract bit 16 */ + extrwi r0, r0, 16, 0 /* r0 = (r0 >> 16) */ + add r0, r0, r5 /* Add it to r0 */ + b store_half + + /* R_PPC_ADDR16_LO */ +lo16: + cmpwi r4, R_PPC_ADDR16_LO + bne nxtrela + lwz r4, 0(r9) /* r_offset */ + lwz r0, 8(r9) /* r_addend */ + add r0, r0, r3 + add r0, r0, r5 /* r0 = (S+A+Offset) */ + extrwi r0, r0, 16, 16 /* r0 &= 0xffff */ + /* Fall through to */ + + /* Store half word */ +store_half: + sthx r0, r4, r7 /* memory[r4+r7] = (u16)r0 */ + +nxtrela: + /* + * We have to flush the modified instructions to the + * main storage from the d-cache. And also, invalidate the + * cached instructions in i-cache which has been modified. + * + * We delay the sync / isync operation till the end, since + * we won't be executing the modified instructions until + * we return from here. + */ + dcbst r4,r7 + sync /* Ensure the data is flushed before icbi */ + icbi r4,r7 + cmpwi r8, 0 /* relasz = 0 ? */ + ble done + add r9, r9, r6 /* move to next entry in the .rela table */ + subf r8, r6, r8 /* relasz -= relaent */ + b applyrela + +done: + sync /* Wait for the flush to finish */ + isync /* Discard prefetched instructions */ + blr + +p_dyn: .long __dynamic_start - 0b +p_rela: .long __rela_dyn_start - 0b +p_sym: .long __dynamic_symtab - 0b +p_st: .long _stext - 0b diff --git a/arch/powerpc/kernel/reloc_64.S b/arch/powerpc/kernel/reloc_64.S new file mode 100644 index 00000000..b47a0e1a --- /dev/null +++ b/arch/powerpc/kernel/reloc_64.S @@ -0,0 +1,87 @@ +/* + * Code to process dynamic relocations in the kernel. + * + * Copyright 2008 Paul Mackerras, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <asm/ppc_asm.h> + +RELA = 7 +RELACOUNT = 0x6ffffff9 +R_PPC64_RELATIVE = 22 + +/* + * r3 = desired final address of kernel + */ +_GLOBAL(relocate) + mflr r0 + bcl 20,31,$+4 +0: mflr r12 /* r12 has runtime addr of label 0 */ + mtlr r0 + ld r11,(p_dyn - 0b)(r12) + add r11,r11,r12 /* r11 has runtime addr of .dynamic section */ + ld r9,(p_rela - 0b)(r12) + add r9,r9,r12 /* r9 has runtime addr of .rela.dyn section */ + ld r10,(p_st - 0b)(r12) + add r10,r10,r12 /* r10 has runtime addr of _stext */ + + /* + * Scan the dynamic section for the RELA and RELACOUNT entries. + */ + li r7,0 + li r8,0 +1: ld r6,0(r11) /* get tag */ + cmpdi r6,0 + beq 4f /* end of list */ + cmpdi r6,RELA + bne 2f + ld r7,8(r11) /* get RELA pointer in r7 */ + b 3f +2: addis r6,r6,(-RELACOUNT)@ha + cmpdi r6,RELACOUNT@l + bne 3f + ld r8,8(r11) /* get RELACOUNT value in r8 */ +3: addi r11,r11,16 + b 1b +4: cmpdi r7,0 /* check we have both RELA and RELACOUNT */ + cmpdi cr1,r8,0 + beq 6f + beq cr1,6f + + /* + * Work out linktime address of _stext and hence the + * relocation offset to be applied. + * cur_offset [r7] = rela.run [r9] - rela.link [r7] + * _stext.link [r10] = _stext.run [r10] - cur_offset [r7] + * final_offset [r3] = _stext.final [r3] - _stext.link [r10] + */ + subf r7,r7,r9 /* cur_offset */ + subf r10,r7,r10 + subf r3,r10,r3 /* final_offset */ + + /* + * Run through the list of relocations and process the + * R_PPC64_RELATIVE ones. + */ + mtctr r8 +5: lwz r0,12(9) /* ELF64_R_TYPE(reloc->r_info) */ + cmpwi r0,R_PPC64_RELATIVE + bne 6f + ld r6,0(r9) /* reloc->r_offset */ + ld r0,16(r9) /* reloc->r_addend */ + add r0,r0,r3 + stdx r0,r7,r6 + addi r9,r9,24 + bdnz 5b + +6: blr + +p_dyn: .llong __dynamic_start - 0b +p_rela: .llong __rela_dyn_start - 0b +p_st: .llong _stext - 0b + diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c new file mode 100644 index 00000000..8777fb02 --- /dev/null +++ b/arch/powerpc/kernel/rtas-proc.c @@ -0,0 +1,790 @@ +/* + * Copyright (C) 2000 Tilmann Bitterberg + * (tilmann@bitterberg.de) + * + * RTAS (Runtime Abstraction Services) stuff + * Intention is to provide a clean user interface + * to use the RTAS. + * + * TODO: + * Split off a header file and maybe move it to a different + * location. Write Documentation on what the /proc/rtas/ entries + * actually do. + */ + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/proc_fs.h> +#include <linux/stat.h> +#include <linux/ctype.h> +#include <linux/time.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/seq_file.h> +#include <linux/bitops.h> +#include <linux/rtc.h> + +#include <asm/uaccess.h> +#include <asm/processor.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/rtas.h> +#include <asm/machdep.h> /* for ppc_md */ +#include <asm/time.h> + +/* Token for Sensors */ +#define KEY_SWITCH 0x0001 +#define ENCLOSURE_SWITCH 0x0002 +#define THERMAL_SENSOR 0x0003 +#define LID_STATUS 0x0004 +#define POWER_SOURCE 0x0005 +#define BATTERY_VOLTAGE 0x0006 +#define BATTERY_REMAINING 0x0007 +#define BATTERY_PERCENTAGE 0x0008 +#define EPOW_SENSOR 0x0009 +#define BATTERY_CYCLESTATE 0x000a +#define BATTERY_CHARGING 0x000b + +/* IBM specific sensors */ +#define IBM_SURVEILLANCE 0x2328 /* 9000 */ +#define IBM_FANRPM 0x2329 /* 9001 */ +#define IBM_VOLTAGE 0x232a /* 9002 */ +#define IBM_DRCONNECTOR 0x232b /* 9003 */ +#define IBM_POWERSUPPLY 0x232c /* 9004 */ + +/* Status return values */ +#define SENSOR_CRITICAL_HIGH 13 +#define SENSOR_WARNING_HIGH 12 +#define SENSOR_NORMAL 11 +#define SENSOR_WARNING_LOW 10 +#define SENSOR_CRITICAL_LOW 9 +#define SENSOR_SUCCESS 0 +#define SENSOR_HW_ERROR -1 +#define SENSOR_BUSY -2 +#define SENSOR_NOT_EXIST -3 +#define SENSOR_DR_ENTITY -9000 + +/* Location Codes */ +#define LOC_SCSI_DEV_ADDR 'A' +#define LOC_SCSI_DEV_LOC 'B' +#define LOC_CPU 'C' +#define LOC_DISKETTE 'D' +#define LOC_ETHERNET 'E' +#define LOC_FAN 'F' +#define LOC_GRAPHICS 'G' +/* reserved / not used 'H' */ +#define LOC_IO_ADAPTER 'I' +/* reserved / not used 'J' */ +#define LOC_KEYBOARD 'K' +#define LOC_LCD 'L' +#define LOC_MEMORY 'M' +#define LOC_NV_MEMORY 'N' +#define LOC_MOUSE 'O' +#define LOC_PLANAR 'P' +#define LOC_OTHER_IO 'Q' +#define LOC_PARALLEL 'R' +#define LOC_SERIAL 'S' +#define LOC_DEAD_RING 'T' +#define LOC_RACKMOUNTED 'U' /* for _u_nit is rack mounted */ +#define LOC_VOLTAGE 'V' +#define LOC_SWITCH_ADAPTER 'W' +#define LOC_OTHER 'X' +#define LOC_FIRMWARE 'Y' +#define LOC_SCSI 'Z' + +/* Tokens for indicators */ +#define TONE_FREQUENCY 0x0001 /* 0 - 1000 (HZ)*/ +#define TONE_VOLUME 0x0002 /* 0 - 100 (%) */ +#define SYSTEM_POWER_STATE 0x0003 +#define WARNING_LIGHT 0x0004 +#define DISK_ACTIVITY_LIGHT 0x0005 +#define HEX_DISPLAY_UNIT 0x0006 +#define BATTERY_WARNING_TIME 0x0007 +#define CONDITION_CYCLE_REQUEST 0x0008 +#define SURVEILLANCE_INDICATOR 0x2328 /* 9000 */ +#define DR_ACTION 0x2329 /* 9001 */ +#define DR_INDICATOR 0x232a /* 9002 */ +/* 9003 - 9004: Vendor specific */ +/* 9006 - 9999: Vendor specific */ + +/* other */ +#define MAX_SENSORS 17 /* I only know of 17 sensors */ +#define MAX_LINELENGTH 256 +#define SENSOR_PREFIX "ibm,sensor-" +#define cel_to_fahr(x) ((x*9/5)+32) + + +/* Globals */ +static struct rtas_sensors sensors; +static struct device_node *rtas_node = NULL; +static unsigned long power_on_time = 0; /* Save the time the user set */ +static char progress_led[MAX_LINELENGTH]; + +static unsigned long rtas_tone_frequency = 1000; +static unsigned long rtas_tone_volume = 0; + +/* ****************STRUCTS******************************************* */ +struct individual_sensor { + unsigned int token; + unsigned int quant; +}; + +struct rtas_sensors { + struct individual_sensor sensor[MAX_SENSORS]; + unsigned int quant; +}; + +/* ****************************************************************** */ +/* Declarations */ +static int ppc_rtas_sensors_show(struct seq_file *m, void *v); +static int ppc_rtas_clock_show(struct seq_file *m, void *v); +static ssize_t ppc_rtas_clock_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos); +static int ppc_rtas_progress_show(struct seq_file *m, void *v); +static ssize_t ppc_rtas_progress_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos); +static int ppc_rtas_poweron_show(struct seq_file *m, void *v); +static ssize_t ppc_rtas_poweron_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos); + +static ssize_t ppc_rtas_tone_freq_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos); +static int ppc_rtas_tone_freq_show(struct seq_file *m, void *v); +static ssize_t ppc_rtas_tone_volume_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos); +static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v); +static int ppc_rtas_rmo_buf_show(struct seq_file *m, void *v); + +static int sensors_open(struct inode *inode, struct file *file) +{ + return single_open(file, ppc_rtas_sensors_show, NULL); +} + +static const struct file_operations ppc_rtas_sensors_operations = { + .open = sensors_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int poweron_open(struct inode *inode, struct file *file) +{ + return single_open(file, ppc_rtas_poweron_show, NULL); +} + +static const struct file_operations ppc_rtas_poweron_operations = { + .open = poweron_open, + .read = seq_read, + .llseek = seq_lseek, + .write = ppc_rtas_poweron_write, + .release = single_release, +}; + +static int progress_open(struct inode *inode, struct file *file) +{ + return single_open(file, ppc_rtas_progress_show, NULL); +} + +static const struct file_operations ppc_rtas_progress_operations = { + .open = progress_open, + .read = seq_read, + .llseek = seq_lseek, + .write = ppc_rtas_progress_write, + .release = single_release, +}; + +static int clock_open(struct inode *inode, struct file *file) +{ + return single_open(file, ppc_rtas_clock_show, NULL); +} + +static const struct file_operations ppc_rtas_clock_operations = { + .open = clock_open, + .read = seq_read, + .llseek = seq_lseek, + .write = ppc_rtas_clock_write, + .release = single_release, +}; + +static int tone_freq_open(struct inode *inode, struct file *file) +{ + return single_open(file, ppc_rtas_tone_freq_show, NULL); +} + +static const struct file_operations ppc_rtas_tone_freq_operations = { + .open = tone_freq_open, + .read = seq_read, + .llseek = seq_lseek, + .write = ppc_rtas_tone_freq_write, + .release = single_release, +}; + +static int tone_volume_open(struct inode *inode, struct file *file) +{ + return single_open(file, ppc_rtas_tone_volume_show, NULL); +} + +static const struct file_operations ppc_rtas_tone_volume_operations = { + .open = tone_volume_open, + .read = seq_read, + .llseek = seq_lseek, + .write = ppc_rtas_tone_volume_write, + .release = single_release, +}; + +static int rmo_buf_open(struct inode *inode, struct file *file) +{ + return single_open(file, ppc_rtas_rmo_buf_show, NULL); +} + +static const struct file_operations ppc_rtas_rmo_buf_ops = { + .open = rmo_buf_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int ppc_rtas_find_all_sensors(void); +static void ppc_rtas_process_sensor(struct seq_file *m, + struct individual_sensor *s, int state, int error, const char *loc); +static char *ppc_rtas_process_error(int error); +static void get_location_code(struct seq_file *m, + struct individual_sensor *s, const char *loc); +static void check_location_string(struct seq_file *m, const char *c); +static void check_location(struct seq_file *m, const char *c); + +static int __init proc_rtas_init(void) +{ + if (!machine_is(pseries)) + return -ENODEV; + + rtas_node = of_find_node_by_name(NULL, "rtas"); + if (rtas_node == NULL) + return -ENODEV; + + proc_create("powerpc/rtas/progress", S_IRUGO|S_IWUSR, NULL, + &ppc_rtas_progress_operations); + proc_create("powerpc/rtas/clock", S_IRUGO|S_IWUSR, NULL, + &ppc_rtas_clock_operations); + proc_create("powerpc/rtas/poweron", S_IWUSR|S_IRUGO, NULL, + &ppc_rtas_poweron_operations); + proc_create("powerpc/rtas/sensors", S_IRUGO, NULL, + &ppc_rtas_sensors_operations); + proc_create("powerpc/rtas/frequency", S_IWUSR|S_IRUGO, NULL, + &ppc_rtas_tone_freq_operations); + proc_create("powerpc/rtas/volume", S_IWUSR|S_IRUGO, NULL, + &ppc_rtas_tone_volume_operations); + proc_create("powerpc/rtas/rmo_buffer", S_IRUSR, NULL, + &ppc_rtas_rmo_buf_ops); + return 0; +} + +__initcall(proc_rtas_init); + +static int parse_number(const char __user *p, size_t count, unsigned long *val) +{ + char buf[40]; + char *end; + + if (count > 39) + return -EINVAL; + + if (copy_from_user(buf, p, count)) + return -EFAULT; + + buf[count] = 0; + + *val = simple_strtoul(buf, &end, 10); + if (*end && *end != '\n') + return -EINVAL; + + return 0; +} + +/* ****************************************************************** */ +/* POWER-ON-TIME */ +/* ****************************************************************** */ +static ssize_t ppc_rtas_poweron_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + struct rtc_time tm; + unsigned long nowtime; + int error = parse_number(buf, count, &nowtime); + if (error) + return error; + + power_on_time = nowtime; /* save the time */ + + to_tm(nowtime, &tm); + + error = rtas_call(rtas_token("set-time-for-power-on"), 7, 1, NULL, + tm.tm_year, tm.tm_mon, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec, 0 /* nano */); + if (error) + printk(KERN_WARNING "error: setting poweron time returned: %s\n", + ppc_rtas_process_error(error)); + return count; +} +/* ****************************************************************** */ +static int ppc_rtas_poweron_show(struct seq_file *m, void *v) +{ + if (power_on_time == 0) + seq_printf(m, "Power on time not set\n"); + else + seq_printf(m, "%lu\n",power_on_time); + return 0; +} + +/* ****************************************************************** */ +/* PROGRESS */ +/* ****************************************************************** */ +static ssize_t ppc_rtas_progress_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + unsigned long hex; + + if (count >= MAX_LINELENGTH) + count = MAX_LINELENGTH -1; + if (copy_from_user(progress_led, buf, count)) { /* save the string */ + return -EFAULT; + } + progress_led[count] = 0; + + /* Lets see if the user passed hexdigits */ + hex = simple_strtoul(progress_led, NULL, 10); + + rtas_progress ((char *)progress_led, hex); + return count; + + /* clear the line */ + /* rtas_progress(" ", 0xffff);*/ +} +/* ****************************************************************** */ +static int ppc_rtas_progress_show(struct seq_file *m, void *v) +{ + if (progress_led[0]) + seq_printf(m, "%s\n", progress_led); + return 0; +} + +/* ****************************************************************** */ +/* CLOCK */ +/* ****************************************************************** */ +static ssize_t ppc_rtas_clock_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + struct rtc_time tm; + unsigned long nowtime; + int error = parse_number(buf, count, &nowtime); + if (error) + return error; + + to_tm(nowtime, &tm); + error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL, + tm.tm_year, tm.tm_mon, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec, 0); + if (error) + printk(KERN_WARNING "error: setting the clock returned: %s\n", + ppc_rtas_process_error(error)); + return count; +} +/* ****************************************************************** */ +static int ppc_rtas_clock_show(struct seq_file *m, void *v) +{ + int ret[8]; + int error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret); + + if (error) { + printk(KERN_WARNING "error: reading the clock returned: %s\n", + ppc_rtas_process_error(error)); + seq_printf(m, "0"); + } else { + unsigned int year, mon, day, hour, min, sec; + year = ret[0]; mon = ret[1]; day = ret[2]; + hour = ret[3]; min = ret[4]; sec = ret[5]; + seq_printf(m, "%lu\n", + mktime(year, mon, day, hour, min, sec)); + } + return 0; +} + +/* ****************************************************************** */ +/* SENSOR STUFF */ +/* ****************************************************************** */ +static int ppc_rtas_sensors_show(struct seq_file *m, void *v) +{ + int i,j; + int state, error; + int get_sensor_state = rtas_token("get-sensor-state"); + + seq_printf(m, "RTAS (RunTime Abstraction Services) Sensor Information\n"); + seq_printf(m, "Sensor\t\tValue\t\tCondition\tLocation\n"); + seq_printf(m, "********************************************************\n"); + + if (ppc_rtas_find_all_sensors() != 0) { + seq_printf(m, "\nNo sensors are available\n"); + return 0; + } + + for (i=0; i<sensors.quant; i++) { + struct individual_sensor *p = &sensors.sensor[i]; + char rstr[64]; + const char *loc; + int llen, offs; + + sprintf (rstr, SENSOR_PREFIX"%04d", p->token); + loc = of_get_property(rtas_node, rstr, &llen); + + /* A sensor may have multiple instances */ + for (j = 0, offs = 0; j <= p->quant; j++) { + error = rtas_call(get_sensor_state, 2, 2, &state, + p->token, j); + + ppc_rtas_process_sensor(m, p, state, error, loc); + seq_putc(m, '\n'); + if (loc) { + offs += strlen(loc) + 1; + loc += strlen(loc) + 1; + if (offs >= llen) + loc = NULL; + } + } + } + return 0; +} + +/* ****************************************************************** */ + +static int ppc_rtas_find_all_sensors(void) +{ + const unsigned int *utmp; + int len, i; + + utmp = of_get_property(rtas_node, "rtas-sensors", &len); + if (utmp == NULL) { + printk (KERN_ERR "error: could not get rtas-sensors\n"); + return 1; + } + + sensors.quant = len / 8; /* int + int */ + + for (i=0; i<sensors.quant; i++) { + sensors.sensor[i].token = *utmp++; + sensors.sensor[i].quant = *utmp++; + } + return 0; +} + +/* ****************************************************************** */ +/* + * Builds a string of what rtas returned + */ +static char *ppc_rtas_process_error(int error) +{ + switch (error) { + case SENSOR_CRITICAL_HIGH: + return "(critical high)"; + case SENSOR_WARNING_HIGH: + return "(warning high)"; + case SENSOR_NORMAL: + return "(normal)"; + case SENSOR_WARNING_LOW: + return "(warning low)"; + case SENSOR_CRITICAL_LOW: + return "(critical low)"; + case SENSOR_SUCCESS: + return "(read ok)"; + case SENSOR_HW_ERROR: + return "(hardware error)"; + case SENSOR_BUSY: + return "(busy)"; + case SENSOR_NOT_EXIST: + return "(non existent)"; + case SENSOR_DR_ENTITY: + return "(dr entity removed)"; + default: + return "(UNKNOWN)"; + } +} + +/* ****************************************************************** */ +/* + * Builds a string out of what the sensor said + */ + +static void ppc_rtas_process_sensor(struct seq_file *m, + struct individual_sensor *s, int state, int error, const char *loc) +{ + /* Defined return vales */ + const char * key_switch[] = { "Off\t", "Normal\t", "Secure\t", + "Maintenance" }; + const char * enclosure_switch[] = { "Closed", "Open" }; + const char * lid_status[] = { " ", "Open", "Closed" }; + const char * power_source[] = { "AC\t", "Battery", + "AC & Battery" }; + const char * battery_remaining[] = { "Very Low", "Low", "Mid", "High" }; + const char * epow_sensor[] = { + "EPOW Reset", "Cooling warning", "Power warning", + "System shutdown", "System halt", "EPOW main enclosure", + "EPOW power off" }; + const char * battery_cyclestate[] = { "None", "In progress", + "Requested" }; + const char * battery_charging[] = { "Charging", "Discharching", + "No current flow" }; + const char * ibm_drconnector[] = { "Empty", "Present", "Unusable", + "Exchange" }; + + int have_strings = 0; + int num_states = 0; + int temperature = 0; + int unknown = 0; + + /* What kind of sensor do we have here? */ + + switch (s->token) { + case KEY_SWITCH: + seq_printf(m, "Key switch:\t"); + num_states = sizeof(key_switch) / sizeof(char *); + if (state < num_states) { + seq_printf(m, "%s\t", key_switch[state]); + have_strings = 1; + } + break; + case ENCLOSURE_SWITCH: + seq_printf(m, "Enclosure switch:\t"); + num_states = sizeof(enclosure_switch) / sizeof(char *); + if (state < num_states) { + seq_printf(m, "%s\t", + enclosure_switch[state]); + have_strings = 1; + } + break; + case THERMAL_SENSOR: + seq_printf(m, "Temp. (C/F):\t"); + temperature = 1; + break; + case LID_STATUS: + seq_printf(m, "Lid status:\t"); + num_states = sizeof(lid_status) / sizeof(char *); + if (state < num_states) { + seq_printf(m, "%s\t", lid_status[state]); + have_strings = 1; + } + break; + case POWER_SOURCE: + seq_printf(m, "Power source:\t"); + num_states = sizeof(power_source) / sizeof(char *); + if (state < num_states) { + seq_printf(m, "%s\t", + power_source[state]); + have_strings = 1; + } + break; + case BATTERY_VOLTAGE: + seq_printf(m, "Battery voltage:\t"); + break; + case BATTERY_REMAINING: + seq_printf(m, "Battery remaining:\t"); + num_states = sizeof(battery_remaining) / sizeof(char *); + if (state < num_states) + { + seq_printf(m, "%s\t", + battery_remaining[state]); + have_strings = 1; + } + break; + case BATTERY_PERCENTAGE: + seq_printf(m, "Battery percentage:\t"); + break; + case EPOW_SENSOR: + seq_printf(m, "EPOW Sensor:\t"); + num_states = sizeof(epow_sensor) / sizeof(char *); + if (state < num_states) { + seq_printf(m, "%s\t", epow_sensor[state]); + have_strings = 1; + } + break; + case BATTERY_CYCLESTATE: + seq_printf(m, "Battery cyclestate:\t"); + num_states = sizeof(battery_cyclestate) / + sizeof(char *); + if (state < num_states) { + seq_printf(m, "%s\t", + battery_cyclestate[state]); + have_strings = 1; + } + break; + case BATTERY_CHARGING: + seq_printf(m, "Battery Charging:\t"); + num_states = sizeof(battery_charging) / sizeof(char *); + if (state < num_states) { + seq_printf(m, "%s\t", + battery_charging[state]); + have_strings = 1; + } + break; + case IBM_SURVEILLANCE: + seq_printf(m, "Surveillance:\t"); + break; + case IBM_FANRPM: + seq_printf(m, "Fan (rpm):\t"); + break; + case IBM_VOLTAGE: + seq_printf(m, "Voltage (mv):\t"); + break; + case IBM_DRCONNECTOR: + seq_printf(m, "DR connector:\t"); + num_states = sizeof(ibm_drconnector) / sizeof(char *); + if (state < num_states) { + seq_printf(m, "%s\t", + ibm_drconnector[state]); + have_strings = 1; + } + break; + case IBM_POWERSUPPLY: + seq_printf(m, "Powersupply:\t"); + break; + default: + seq_printf(m, "Unknown sensor (type %d), ignoring it\n", + s->token); + unknown = 1; + have_strings = 1; + break; + } + if (have_strings == 0) { + if (temperature) { + seq_printf(m, "%4d /%4d\t", state, cel_to_fahr(state)); + } else + seq_printf(m, "%10d\t", state); + } + if (unknown == 0) { + seq_printf(m, "%s\t", ppc_rtas_process_error(error)); + get_location_code(m, s, loc); + } +} + +/* ****************************************************************** */ + +static void check_location(struct seq_file *m, const char *c) +{ + switch (c[0]) { + case LOC_PLANAR: + seq_printf(m, "Planar #%c", c[1]); + break; + case LOC_CPU: + seq_printf(m, "CPU #%c", c[1]); + break; + case LOC_FAN: + seq_printf(m, "Fan #%c", c[1]); + break; + case LOC_RACKMOUNTED: + seq_printf(m, "Rack #%c", c[1]); + break; + case LOC_VOLTAGE: + seq_printf(m, "Voltage #%c", c[1]); + break; + case LOC_LCD: + seq_printf(m, "LCD #%c", c[1]); + break; + case '.': + seq_printf(m, "- %c", c[1]); + break; + default: + seq_printf(m, "Unknown location"); + break; + } +} + + +/* ****************************************************************** */ +/* + * Format: + * ${LETTER}${NUMBER}[[-/]${LETTER}${NUMBER} [ ... ] ] + * the '.' may be an abbrevation + */ +static void check_location_string(struct seq_file *m, const char *c) +{ + while (*c) { + if (isalpha(*c) || *c == '.') + check_location(m, c); + else if (*c == '/' || *c == '-') + seq_printf(m, " at "); + c++; + } +} + + +/* ****************************************************************** */ + +static void get_location_code(struct seq_file *m, struct individual_sensor *s, + const char *loc) +{ + if (!loc || !*loc) { + seq_printf(m, "---");/* does not have a location */ + } else { + check_location_string(m, loc); + } + seq_putc(m, ' '); +} +/* ****************************************************************** */ +/* INDICATORS - Tone Frequency */ +/* ****************************************************************** */ +static ssize_t ppc_rtas_tone_freq_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + unsigned long freq; + int error = parse_number(buf, count, &freq); + if (error) + return error; + + rtas_tone_frequency = freq; /* save it for later */ + error = rtas_call(rtas_token("set-indicator"), 3, 1, NULL, + TONE_FREQUENCY, 0, freq); + if (error) + printk(KERN_WARNING "error: setting tone frequency returned: %s\n", + ppc_rtas_process_error(error)); + return count; +} +/* ****************************************************************** */ +static int ppc_rtas_tone_freq_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%lu\n", rtas_tone_frequency); + return 0; +} +/* ****************************************************************** */ +/* INDICATORS - Tone Volume */ +/* ****************************************************************** */ +static ssize_t ppc_rtas_tone_volume_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + unsigned long volume; + int error = parse_number(buf, count, &volume); + if (error) + return error; + + if (volume > 100) + volume = 100; + + rtas_tone_volume = volume; /* save it for later */ + error = rtas_call(rtas_token("set-indicator"), 3, 1, NULL, + TONE_VOLUME, 0, volume); + if (error) + printk(KERN_WARNING "error: setting tone volume returned: %s\n", + ppc_rtas_process_error(error)); + return count; +} +/* ****************************************************************** */ +static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%lu\n", rtas_tone_volume); + return 0; +} + +#define RMO_READ_BUF_MAX 30 + +/* RTAS Userspace access */ +static int ppc_rtas_rmo_buf_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%016lx %x\n", rtas_rmo_buf, RTAS_RMOBUF_MAX); + return 0; +} diff --git a/arch/powerpc/kernel/rtas-rtc.c b/arch/powerpc/kernel/rtas-rtc.c new file mode 100644 index 00000000..c57c1935 --- /dev/null +++ b/arch/powerpc/kernel/rtas-rtc.c @@ -0,0 +1,112 @@ +#include <linux/kernel.h> +#include <linux/time.h> +#include <linux/timer.h> +#include <linux/init.h> +#include <linux/rtc.h> +#include <linux/delay.h> +#include <linux/ratelimit.h> +#include <asm/prom.h> +#include <asm/rtas.h> +#include <asm/time.h> + + +#define MAX_RTC_WAIT 5000 /* 5 sec */ +#define RTAS_CLOCK_BUSY (-2) +unsigned long __init rtas_get_boot_time(void) +{ + int ret[8]; + int error; + unsigned int wait_time; + u64 max_wait_tb; + + max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT; + do { + error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret); + + wait_time = rtas_busy_delay_time(error); + if (wait_time) { + /* This is boot time so we spin. */ + udelay(wait_time*1000); + } + } while (wait_time && (get_tb() < max_wait_tb)); + + if (error != 0) { + printk_ratelimited(KERN_WARNING + "error: reading the clock failed (%d)\n", + error); + return 0; + } + + return mktime(ret[0], ret[1], ret[2], ret[3], ret[4], ret[5]); +} + +/* NOTE: get_rtc_time will get an error if executed in interrupt context + * and if a delay is needed to read the clock. In this case we just + * silently return without updating rtc_tm. + */ +void rtas_get_rtc_time(struct rtc_time *rtc_tm) +{ + int ret[8]; + int error; + unsigned int wait_time; + u64 max_wait_tb; + + max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT; + do { + error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret); + + wait_time = rtas_busy_delay_time(error); + if (wait_time) { + if (in_interrupt()) { + memset(rtc_tm, 0, sizeof(struct rtc_time)); + printk_ratelimited(KERN_WARNING + "error: reading clock " + "would delay interrupt\n"); + return; /* delay not allowed */ + } + msleep(wait_time); + } + } while (wait_time && (get_tb() < max_wait_tb)); + + if (error != 0) { + printk_ratelimited(KERN_WARNING + "error: reading the clock failed (%d)\n", + error); + return; + } + + rtc_tm->tm_sec = ret[5]; + rtc_tm->tm_min = ret[4]; + rtc_tm->tm_hour = ret[3]; + rtc_tm->tm_mday = ret[2]; + rtc_tm->tm_mon = ret[1] - 1; + rtc_tm->tm_year = ret[0] - 1900; +} + +int rtas_set_rtc_time(struct rtc_time *tm) +{ + int error, wait_time; + u64 max_wait_tb; + + max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT; + do { + error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL, + tm->tm_year + 1900, tm->tm_mon + 1, + tm->tm_mday, tm->tm_hour, tm->tm_min, + tm->tm_sec, 0); + + wait_time = rtas_busy_delay_time(error); + if (wait_time) { + if (in_interrupt()) + return 1; /* probably decrementer */ + msleep(wait_time); + } + } while (wait_time && (get_tb() < max_wait_tb)); + + if (error != 0) + printk_ratelimited(KERN_WARNING + "error: setting the clock failed (%d)\n", + error); + + return 0; +} diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c new file mode 100644 index 00000000..fcec3824 --- /dev/null +++ b/arch/powerpc/kernel/rtas.c @@ -0,0 +1,1088 @@ +/* + * + * Procedures for interfacing to the RTAS on CHRP machines. + * + * Peter Bergner, IBM March 2001. + * Copyright (C) 2001 IBM. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <stdarg.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/spinlock.h> +#include <linux/export.h> +#include <linux/init.h> +#include <linux/capability.h> +#include <linux/delay.h> +#include <linux/smp.h> +#include <linux/completion.h> +#include <linux/cpumask.h> +#include <linux/memblock.h> +#include <linux/slab.h> +#include <linux/reboot.h> + +#include <asm/prom.h> +#include <asm/rtas.h> +#include <asm/hvcall.h> +#include <asm/machdep.h> +#include <asm/firmware.h> +#include <asm/page.h> +#include <asm/param.h> +#include <asm/delay.h> +#include <asm/uaccess.h> +#include <asm/udbg.h> +#include <asm/syscalls.h> +#include <asm/smp.h> +#include <linux/atomic.h> +#include <asm/time.h> +#include <asm/mmu.h> +#include <asm/topology.h> +#include <asm/pSeries_reconfig.h> + +struct rtas_t rtas = { + .lock = __ARCH_SPIN_LOCK_UNLOCKED +}; +EXPORT_SYMBOL(rtas); + +DEFINE_SPINLOCK(rtas_data_buf_lock); +EXPORT_SYMBOL(rtas_data_buf_lock); + +char rtas_data_buf[RTAS_DATA_BUF_SIZE] __cacheline_aligned; +EXPORT_SYMBOL(rtas_data_buf); + +unsigned long rtas_rmo_buf; + +/* + * If non-NULL, this gets called when the kernel terminates. + * This is done like this so rtas_flash can be a module. + */ +void (*rtas_flash_term_hook)(int); +EXPORT_SYMBOL(rtas_flash_term_hook); + +/* RTAS use home made raw locking instead of spin_lock_irqsave + * because those can be called from within really nasty contexts + * such as having the timebase stopped which would lockup with + * normal locks and spinlock debugging enabled + */ +static unsigned long lock_rtas(void) +{ + unsigned long flags; + + local_irq_save(flags); + preempt_disable(); + arch_spin_lock_flags(&rtas.lock, flags); + return flags; +} + +static void unlock_rtas(unsigned long flags) +{ + arch_spin_unlock(&rtas.lock); + local_irq_restore(flags); + preempt_enable(); +} + +/* + * call_rtas_display_status and call_rtas_display_status_delay + * are designed only for very early low-level debugging, which + * is why the token is hard-coded to 10. + */ +static void call_rtas_display_status(char c) +{ + struct rtas_args *args = &rtas.args; + unsigned long s; + + if (!rtas.base) + return; + s = lock_rtas(); + + args->token = 10; + args->nargs = 1; + args->nret = 1; + args->rets = (rtas_arg_t *)&(args->args[1]); + args->args[0] = (unsigned char)c; + + enter_rtas(__pa(args)); + + unlock_rtas(s); +} + +static void call_rtas_display_status_delay(char c) +{ + static int pending_newline = 0; /* did last write end with unprinted newline? */ + static int width = 16; + + if (c == '\n') { + while (width-- > 0) + call_rtas_display_status(' '); + width = 16; + mdelay(500); + pending_newline = 1; + } else { + if (pending_newline) { + call_rtas_display_status('\r'); + call_rtas_display_status('\n'); + } + pending_newline = 0; + if (width--) { + call_rtas_display_status(c); + udelay(10000); + } + } +} + +void __init udbg_init_rtas_panel(void) +{ + udbg_putc = call_rtas_display_status_delay; +} + +#ifdef CONFIG_UDBG_RTAS_CONSOLE + +/* If you think you're dying before early_init_dt_scan_rtas() does its + * work, you can hard code the token values for your firmware here and + * hardcode rtas.base/entry etc. + */ +static unsigned int rtas_putchar_token = RTAS_UNKNOWN_SERVICE; +static unsigned int rtas_getchar_token = RTAS_UNKNOWN_SERVICE; + +static void udbg_rtascon_putc(char c) +{ + int tries; + + if (!rtas.base) + return; + + /* Add CRs before LFs */ + if (c == '\n') + udbg_rtascon_putc('\r'); + + /* if there is more than one character to be displayed, wait a bit */ + for (tries = 0; tries < 16; tries++) { + if (rtas_call(rtas_putchar_token, 1, 1, NULL, c) == 0) + break; + udelay(1000); + } +} + +static int udbg_rtascon_getc_poll(void) +{ + int c; + + if (!rtas.base) + return -1; + + if (rtas_call(rtas_getchar_token, 0, 2, &c)) + return -1; + + return c; +} + +static int udbg_rtascon_getc(void) +{ + int c; + + while ((c = udbg_rtascon_getc_poll()) == -1) + ; + + return c; +} + + +void __init udbg_init_rtas_console(void) +{ + udbg_putc = udbg_rtascon_putc; + udbg_getc = udbg_rtascon_getc; + udbg_getc_poll = udbg_rtascon_getc_poll; +} +#endif /* CONFIG_UDBG_RTAS_CONSOLE */ + +void rtas_progress(char *s, unsigned short hex) +{ + struct device_node *root; + int width; + const int *p; + char *os; + static int display_character, set_indicator; + static int display_width, display_lines, form_feed; + static const int *row_width; + static DEFINE_SPINLOCK(progress_lock); + static int current_line; + static int pending_newline = 0; /* did last write end with unprinted newline? */ + + if (!rtas.base) + return; + + if (display_width == 0) { + display_width = 0x10; + if ((root = of_find_node_by_path("/rtas"))) { + if ((p = of_get_property(root, + "ibm,display-line-length", NULL))) + display_width = *p; + if ((p = of_get_property(root, + "ibm,form-feed", NULL))) + form_feed = *p; + if ((p = of_get_property(root, + "ibm,display-number-of-lines", NULL))) + display_lines = *p; + row_width = of_get_property(root, + "ibm,display-truncation-length", NULL); + of_node_put(root); + } + display_character = rtas_token("display-character"); + set_indicator = rtas_token("set-indicator"); + } + + if (display_character == RTAS_UNKNOWN_SERVICE) { + /* use hex display if available */ + if (set_indicator != RTAS_UNKNOWN_SERVICE) + rtas_call(set_indicator, 3, 1, NULL, 6, 0, hex); + return; + } + + spin_lock(&progress_lock); + + /* + * Last write ended with newline, but we didn't print it since + * it would just clear the bottom line of output. Print it now + * instead. + * + * If no newline is pending and form feed is supported, clear the + * display with a form feed; otherwise, print a CR to start output + * at the beginning of the line. + */ + if (pending_newline) { + rtas_call(display_character, 1, 1, NULL, '\r'); + rtas_call(display_character, 1, 1, NULL, '\n'); + pending_newline = 0; + } else { + current_line = 0; + if (form_feed) + rtas_call(display_character, 1, 1, NULL, + (char)form_feed); + else + rtas_call(display_character, 1, 1, NULL, '\r'); + } + + if (row_width) + width = row_width[current_line]; + else + width = display_width; + os = s; + while (*os) { + if (*os == '\n' || *os == '\r') { + /* If newline is the last character, save it + * until next call to avoid bumping up the + * display output. + */ + if (*os == '\n' && !os[1]) { + pending_newline = 1; + current_line++; + if (current_line > display_lines-1) + current_line = display_lines-1; + spin_unlock(&progress_lock); + return; + } + + /* RTAS wants CR-LF, not just LF */ + + if (*os == '\n') { + rtas_call(display_character, 1, 1, NULL, '\r'); + rtas_call(display_character, 1, 1, NULL, '\n'); + } else { + /* CR might be used to re-draw a line, so we'll + * leave it alone and not add LF. + */ + rtas_call(display_character, 1, 1, NULL, *os); + } + + if (row_width) + width = row_width[current_line]; + else + width = display_width; + } else { + width--; + rtas_call(display_character, 1, 1, NULL, *os); + } + + os++; + + /* if we overwrite the screen length */ + if (width <= 0) + while ((*os != 0) && (*os != '\n') && (*os != '\r')) + os++; + } + + spin_unlock(&progress_lock); +} +EXPORT_SYMBOL(rtas_progress); /* needed by rtas_flash module */ + +int rtas_token(const char *service) +{ + const int *tokp; + if (rtas.dev == NULL) + return RTAS_UNKNOWN_SERVICE; + tokp = of_get_property(rtas.dev, service, NULL); + return tokp ? *tokp : RTAS_UNKNOWN_SERVICE; +} +EXPORT_SYMBOL(rtas_token); + +int rtas_service_present(const char *service) +{ + return rtas_token(service) != RTAS_UNKNOWN_SERVICE; +} +EXPORT_SYMBOL(rtas_service_present); + +#ifdef CONFIG_RTAS_ERROR_LOGGING +/* + * Return the firmware-specified size of the error log buffer + * for all rtas calls that require an error buffer argument. + * This includes 'check-exception' and 'rtas-last-error'. + */ +int rtas_get_error_log_max(void) +{ + static int rtas_error_log_max; + if (rtas_error_log_max) + return rtas_error_log_max; + + rtas_error_log_max = rtas_token ("rtas-error-log-max"); + if ((rtas_error_log_max == RTAS_UNKNOWN_SERVICE) || + (rtas_error_log_max > RTAS_ERROR_LOG_MAX)) { + printk (KERN_WARNING "RTAS: bad log buffer size %d\n", + rtas_error_log_max); + rtas_error_log_max = RTAS_ERROR_LOG_MAX; + } + return rtas_error_log_max; +} +EXPORT_SYMBOL(rtas_get_error_log_max); + + +static char rtas_err_buf[RTAS_ERROR_LOG_MAX]; +static int rtas_last_error_token; + +/** Return a copy of the detailed error text associated with the + * most recent failed call to rtas. Because the error text + * might go stale if there are any other intervening rtas calls, + * this routine must be called atomically with whatever produced + * the error (i.e. with rtas.lock still held from the previous call). + */ +static char *__fetch_rtas_last_error(char *altbuf) +{ + struct rtas_args err_args, save_args; + u32 bufsz; + char *buf = NULL; + + if (rtas_last_error_token == -1) + return NULL; + + bufsz = rtas_get_error_log_max(); + + err_args.token = rtas_last_error_token; + err_args.nargs = 2; + err_args.nret = 1; + err_args.args[0] = (rtas_arg_t)__pa(rtas_err_buf); + err_args.args[1] = bufsz; + err_args.args[2] = 0; + + save_args = rtas.args; + rtas.args = err_args; + + enter_rtas(__pa(&rtas.args)); + + err_args = rtas.args; + rtas.args = save_args; + + /* Log the error in the unlikely case that there was one. */ + if (unlikely(err_args.args[2] == 0)) { + if (altbuf) { + buf = altbuf; + } else { + buf = rtas_err_buf; + if (mem_init_done) + buf = kmalloc(RTAS_ERROR_LOG_MAX, GFP_ATOMIC); + } + if (buf) + memcpy(buf, rtas_err_buf, RTAS_ERROR_LOG_MAX); + } + + return buf; +} + +#define get_errorlog_buffer() kmalloc(RTAS_ERROR_LOG_MAX, GFP_KERNEL) + +#else /* CONFIG_RTAS_ERROR_LOGGING */ +#define __fetch_rtas_last_error(x) NULL +#define get_errorlog_buffer() NULL +#endif + +int rtas_call(int token, int nargs, int nret, int *outputs, ...) +{ + va_list list; + int i; + unsigned long s; + struct rtas_args *rtas_args; + char *buff_copy = NULL; + int ret; + + if (!rtas.entry || token == RTAS_UNKNOWN_SERVICE) + return -1; + + s = lock_rtas(); + rtas_args = &rtas.args; + + rtas_args->token = token; + rtas_args->nargs = nargs; + rtas_args->nret = nret; + rtas_args->rets = (rtas_arg_t *)&(rtas_args->args[nargs]); + va_start(list, outputs); + for (i = 0; i < nargs; ++i) + rtas_args->args[i] = va_arg(list, rtas_arg_t); + va_end(list); + + for (i = 0; i < nret; ++i) + rtas_args->rets[i] = 0; + + enter_rtas(__pa(rtas_args)); + + /* A -1 return code indicates that the last command couldn't + be completed due to a hardware error. */ + if (rtas_args->rets[0] == -1) + buff_copy = __fetch_rtas_last_error(NULL); + + if (nret > 1 && outputs != NULL) + for (i = 0; i < nret-1; ++i) + outputs[i] = rtas_args->rets[i+1]; + ret = (nret > 0)? rtas_args->rets[0]: 0; + + unlock_rtas(s); + + if (buff_copy) { + log_error(buff_copy, ERR_TYPE_RTAS_LOG, 0); + if (mem_init_done) + kfree(buff_copy); + } + return ret; +} +EXPORT_SYMBOL(rtas_call); + +/* For RTAS_BUSY (-2), delay for 1 millisecond. For an extended busy status + * code of 990n, perform the hinted delay of 10^n (last digit) milliseconds. + */ +unsigned int rtas_busy_delay_time(int status) +{ + int order; + unsigned int ms = 0; + + if (status == RTAS_BUSY) { + ms = 1; + } else if (status >= 9900 && status <= 9905) { + order = status - 9900; + for (ms = 1; order > 0; order--) + ms *= 10; + } + + return ms; +} +EXPORT_SYMBOL(rtas_busy_delay_time); + +/* For an RTAS busy status code, perform the hinted delay. */ +unsigned int rtas_busy_delay(int status) +{ + unsigned int ms; + + might_sleep(); + ms = rtas_busy_delay_time(status); + if (ms && need_resched()) + msleep(ms); + + return ms; +} +EXPORT_SYMBOL(rtas_busy_delay); + +static int rtas_error_rc(int rtas_rc) +{ + int rc; + + switch (rtas_rc) { + case -1: /* Hardware Error */ + rc = -EIO; + break; + case -3: /* Bad indicator/domain/etc */ + rc = -EINVAL; + break; + case -9000: /* Isolation error */ + rc = -EFAULT; + break; + case -9001: /* Outstanding TCE/PTE */ + rc = -EEXIST; + break; + case -9002: /* No usable slot */ + rc = -ENODEV; + break; + default: + printk(KERN_ERR "%s: unexpected RTAS error %d\n", + __func__, rtas_rc); + rc = -ERANGE; + break; + } + return rc; +} + +int rtas_get_power_level(int powerdomain, int *level) +{ + int token = rtas_token("get-power-level"); + int rc; + + if (token == RTAS_UNKNOWN_SERVICE) + return -ENOENT; + + while ((rc = rtas_call(token, 1, 2, level, powerdomain)) == RTAS_BUSY) + udelay(1); + + if (rc < 0) + return rtas_error_rc(rc); + return rc; +} +EXPORT_SYMBOL(rtas_get_power_level); + +int rtas_set_power_level(int powerdomain, int level, int *setlevel) +{ + int token = rtas_token("set-power-level"); + int rc; + + if (token == RTAS_UNKNOWN_SERVICE) + return -ENOENT; + + do { + rc = rtas_call(token, 2, 2, setlevel, powerdomain, level); + } while (rtas_busy_delay(rc)); + + if (rc < 0) + return rtas_error_rc(rc); + return rc; +} +EXPORT_SYMBOL(rtas_set_power_level); + +int rtas_get_sensor(int sensor, int index, int *state) +{ + int token = rtas_token("get-sensor-state"); + int rc; + + if (token == RTAS_UNKNOWN_SERVICE) + return -ENOENT; + + do { + rc = rtas_call(token, 2, 2, state, sensor, index); + } while (rtas_busy_delay(rc)); + + if (rc < 0) + return rtas_error_rc(rc); + return rc; +} +EXPORT_SYMBOL(rtas_get_sensor); + +bool rtas_indicator_present(int token, int *maxindex) +{ + int proplen, count, i; + const struct indicator_elem { + u32 token; + u32 maxindex; + } *indicators; + + indicators = of_get_property(rtas.dev, "rtas-indicators", &proplen); + if (!indicators) + return false; + + count = proplen / sizeof(struct indicator_elem); + + for (i = 0; i < count; i++) { + if (indicators[i].token != token) + continue; + if (maxindex) + *maxindex = indicators[i].maxindex; + return true; + } + + return false; +} +EXPORT_SYMBOL(rtas_indicator_present); + +int rtas_set_indicator(int indicator, int index, int new_value) +{ + int token = rtas_token("set-indicator"); + int rc; + + if (token == RTAS_UNKNOWN_SERVICE) + return -ENOENT; + + do { + rc = rtas_call(token, 3, 1, NULL, indicator, index, new_value); + } while (rtas_busy_delay(rc)); + + if (rc < 0) + return rtas_error_rc(rc); + return rc; +} +EXPORT_SYMBOL(rtas_set_indicator); + +/* + * Ignoring RTAS extended delay + */ +int rtas_set_indicator_fast(int indicator, int index, int new_value) +{ + int rc; + int token = rtas_token("set-indicator"); + + if (token == RTAS_UNKNOWN_SERVICE) + return -ENOENT; + + rc = rtas_call(token, 3, 1, NULL, indicator, index, new_value); + + WARN_ON(rc == -2 || (rc >= 9900 && rc <= 9905)); + + if (rc < 0) + return rtas_error_rc(rc); + + return rc; +} + +void rtas_restart(char *cmd) +{ + if (rtas_flash_term_hook) + rtas_flash_term_hook(SYS_RESTART); + printk("RTAS system-reboot returned %d\n", + rtas_call(rtas_token("system-reboot"), 0, 1, NULL)); + for (;;); +} + +void rtas_power_off(void) +{ + if (rtas_flash_term_hook) + rtas_flash_term_hook(SYS_POWER_OFF); + /* allow power on only with power button press */ + printk("RTAS power-off returned %d\n", + rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1)); + for (;;); +} + +void rtas_halt(void) +{ + if (rtas_flash_term_hook) + rtas_flash_term_hook(SYS_HALT); + /* allow power on only with power button press */ + printk("RTAS power-off returned %d\n", + rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1)); + for (;;); +} + +/* Must be in the RMO region, so we place it here */ +static char rtas_os_term_buf[2048]; + +void rtas_os_term(char *str) +{ + int status; + + /* + * Firmware with the ibm,extended-os-term property is guaranteed + * to always return from an ibm,os-term call. Earlier versions without + * this property may terminate the partition which we want to avoid + * since it interferes with panic_timeout. + */ + if (RTAS_UNKNOWN_SERVICE == rtas_token("ibm,os-term") || + RTAS_UNKNOWN_SERVICE == rtas_token("ibm,extended-os-term")) + return; + + snprintf(rtas_os_term_buf, 2048, "OS panic: %s", str); + + do { + status = rtas_call(rtas_token("ibm,os-term"), 1, 1, NULL, + __pa(rtas_os_term_buf)); + } while (rtas_busy_delay(status)); + + if (status != 0) + printk(KERN_EMERG "ibm,os-term call failed %d\n", status); +} + +static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE; +#ifdef CONFIG_PPC_PSERIES +static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_when_done) +{ + u16 slb_size = mmu_slb_size; + int rc = H_MULTI_THREADS_ACTIVE; + int cpu; + + slb_set_size(SLB_MIN_SIZE); + printk(KERN_DEBUG "calling ibm,suspend-me on cpu %i\n", smp_processor_id()); + + while (rc == H_MULTI_THREADS_ACTIVE && !atomic_read(&data->done) && + !atomic_read(&data->error)) + rc = rtas_call(data->token, 0, 1, NULL); + + if (rc || atomic_read(&data->error)) { + printk(KERN_DEBUG "ibm,suspend-me returned %d\n", rc); + slb_set_size(slb_size); + } + + if (atomic_read(&data->error)) + rc = atomic_read(&data->error); + + atomic_set(&data->error, rc); + pSeries_coalesce_init(); + + if (wake_when_done) { + atomic_set(&data->done, 1); + + for_each_online_cpu(cpu) + plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu)); + } + + if (atomic_dec_return(&data->working) == 0) + complete(data->complete); + + return rc; +} + +int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data) +{ + atomic_inc(&data->working); + return __rtas_suspend_last_cpu(data, 0); +} + +static int __rtas_suspend_cpu(struct rtas_suspend_me_data *data, int wake_when_done) +{ + long rc = H_SUCCESS; + unsigned long msr_save; + int cpu; + + atomic_inc(&data->working); + + /* really need to ensure MSR.EE is off for H_JOIN */ + msr_save = mfmsr(); + mtmsr(msr_save & ~(MSR_EE)); + + while (rc == H_SUCCESS && !atomic_read(&data->done) && !atomic_read(&data->error)) + rc = plpar_hcall_norets(H_JOIN); + + mtmsr(msr_save); + + if (rc == H_SUCCESS) { + /* This cpu was prodded and the suspend is complete. */ + goto out; + } else if (rc == H_CONTINUE) { + /* All other cpus are in H_JOIN, this cpu does + * the suspend. + */ + return __rtas_suspend_last_cpu(data, wake_when_done); + } else { + printk(KERN_ERR "H_JOIN on cpu %i failed with rc = %ld\n", + smp_processor_id(), rc); + atomic_set(&data->error, rc); + } + + if (wake_when_done) { + atomic_set(&data->done, 1); + + /* This cpu did the suspend or got an error; in either case, + * we need to prod all other other cpus out of join state. + * Extra prods are harmless. + */ + for_each_online_cpu(cpu) + plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu)); + } +out: + if (atomic_dec_return(&data->working) == 0) + complete(data->complete); + return rc; +} + +int rtas_suspend_cpu(struct rtas_suspend_me_data *data) +{ + return __rtas_suspend_cpu(data, 0); +} + +static void rtas_percpu_suspend_me(void *info) +{ + __rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1); +} + +int rtas_ibm_suspend_me(struct rtas_args *args) +{ + long state; + long rc; + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + struct rtas_suspend_me_data data; + DECLARE_COMPLETION_ONSTACK(done); + + if (!rtas_service_present("ibm,suspend-me")) + return -ENOSYS; + + /* Make sure the state is valid */ + rc = plpar_hcall(H_VASI_STATE, retbuf, + ((u64)args->args[0] << 32) | args->args[1]); + + state = retbuf[0]; + + if (rc) { + printk(KERN_ERR "rtas_ibm_suspend_me: vasi_state returned %ld\n",rc); + return rc; + } else if (state == H_VASI_ENABLED) { + args->args[args->nargs] = RTAS_NOT_SUSPENDABLE; + return 0; + } else if (state != H_VASI_SUSPENDING) { + printk(KERN_ERR "rtas_ibm_suspend_me: vasi_state returned state %ld\n", + state); + args->args[args->nargs] = -1; + return 0; + } + + atomic_set(&data.working, 0); + atomic_set(&data.done, 0); + atomic_set(&data.error, 0); + data.token = rtas_token("ibm,suspend-me"); + data.complete = &done; + stop_topology_update(); + + /* Call function on all CPUs. One of us will make the + * rtas call + */ + if (on_each_cpu(rtas_percpu_suspend_me, &data, 0)) + atomic_set(&data.error, -EINVAL); + + wait_for_completion(&done); + + if (atomic_read(&data.error) != 0) + printk(KERN_ERR "Error doing global join\n"); + + start_topology_update(); + + return atomic_read(&data.error); +} +#else /* CONFIG_PPC_PSERIES */ +int rtas_ibm_suspend_me(struct rtas_args *args) +{ + return -ENOSYS; +} +#endif + +/** + * Find a specific pseries error log in an RTAS extended event log. + * @log: RTAS error/event log + * @section_id: two character section identifier + * + * Returns a pointer to the specified errorlog or NULL if not found. + */ +struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log, + uint16_t section_id) +{ + struct rtas_ext_event_log_v6 *ext_log = + (struct rtas_ext_event_log_v6 *)log->buffer; + struct pseries_errorlog *sect; + unsigned char *p, *log_end; + + /* Check that we understand the format */ + if (log->extended_log_length < sizeof(struct rtas_ext_event_log_v6) || + ext_log->log_format != RTAS_V6EXT_LOG_FORMAT_EVENT_LOG || + ext_log->company_id != RTAS_V6EXT_COMPANY_ID_IBM) + return NULL; + + log_end = log->buffer + log->extended_log_length; + p = ext_log->vendor_log; + + while (p < log_end) { + sect = (struct pseries_errorlog *)p; + if (sect->id == section_id) + return sect; + p += sect->length; + } + + return NULL; +} + +asmlinkage int ppc_rtas(struct rtas_args __user *uargs) +{ + struct rtas_args args; + unsigned long flags; + char *buff_copy, *errbuf = NULL; + int nargs; + int rc; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(&args, uargs, 3 * sizeof(u32)) != 0) + return -EFAULT; + + nargs = args.nargs; + if (nargs > ARRAY_SIZE(args.args) + || args.nret > ARRAY_SIZE(args.args) + || nargs + args.nret > ARRAY_SIZE(args.args)) + return -EINVAL; + + /* Copy in args. */ + if (copy_from_user(args.args, uargs->args, + nargs * sizeof(rtas_arg_t)) != 0) + return -EFAULT; + + if (args.token == RTAS_UNKNOWN_SERVICE) + return -EINVAL; + + args.rets = &args.args[nargs]; + memset(args.rets, 0, args.nret * sizeof(rtas_arg_t)); + + /* Need to handle ibm,suspend_me call specially */ + if (args.token == ibm_suspend_me_token) { + rc = rtas_ibm_suspend_me(&args); + if (rc) + return rc; + goto copy_return; + } + + buff_copy = get_errorlog_buffer(); + + flags = lock_rtas(); + + rtas.args = args; + enter_rtas(__pa(&rtas.args)); + args = rtas.args; + + /* A -1 return code indicates that the last command couldn't + be completed due to a hardware error. */ + if (args.rets[0] == -1) + errbuf = __fetch_rtas_last_error(buff_copy); + + unlock_rtas(flags); + + if (buff_copy) { + if (errbuf) + log_error(errbuf, ERR_TYPE_RTAS_LOG, 0); + kfree(buff_copy); + } + + copy_return: + /* Copy out args. */ + if (copy_to_user(uargs->args + nargs, + args.args + nargs, + args.nret * sizeof(rtas_arg_t)) != 0) + return -EFAULT; + + return 0; +} + +/* + * Call early during boot, before mem init or bootmem, to retrieve the RTAS + * informations from the device-tree and allocate the RMO buffer for userland + * accesses. + */ +void __init rtas_initialize(void) +{ + unsigned long rtas_region = RTAS_INSTANTIATE_MAX; + + /* Get RTAS dev node and fill up our "rtas" structure with infos + * about it. + */ + rtas.dev = of_find_node_by_name(NULL, "rtas"); + if (rtas.dev) { + const u32 *basep, *entryp, *sizep; + + basep = of_get_property(rtas.dev, "linux,rtas-base", NULL); + sizep = of_get_property(rtas.dev, "rtas-size", NULL); + if (basep != NULL && sizep != NULL) { + rtas.base = *basep; + rtas.size = *sizep; + entryp = of_get_property(rtas.dev, + "linux,rtas-entry", NULL); + if (entryp == NULL) /* Ugh */ + rtas.entry = rtas.base; + else + rtas.entry = *entryp; + } else + rtas.dev = NULL; + } + if (!rtas.dev) + return; + + /* If RTAS was found, allocate the RMO buffer for it and look for + * the stop-self token if any + */ +#ifdef CONFIG_PPC64 + if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR)) { + rtas_region = min(ppc64_rma_size, RTAS_INSTANTIATE_MAX); + ibm_suspend_me_token = rtas_token("ibm,suspend-me"); + } +#endif + rtas_rmo_buf = memblock_alloc_base(RTAS_RMOBUF_MAX, PAGE_SIZE, rtas_region); + +#ifdef CONFIG_RTAS_ERROR_LOGGING + rtas_last_error_token = rtas_token("rtas-last-error"); +#endif +} + +int __init early_init_dt_scan_rtas(unsigned long node, + const char *uname, int depth, void *data) +{ + u32 *basep, *entryp, *sizep; + + if (depth != 1 || strcmp(uname, "rtas") != 0) + return 0; + + basep = of_get_flat_dt_prop(node, "linux,rtas-base", NULL); + entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL); + sizep = of_get_flat_dt_prop(node, "rtas-size", NULL); + + if (basep && entryp && sizep) { + rtas.base = *basep; + rtas.entry = *entryp; + rtas.size = *sizep; + } + +#ifdef CONFIG_UDBG_RTAS_CONSOLE + basep = of_get_flat_dt_prop(node, "put-term-char", NULL); + if (basep) + rtas_putchar_token = *basep; + + basep = of_get_flat_dt_prop(node, "get-term-char", NULL); + if (basep) + rtas_getchar_token = *basep; + + if (rtas_putchar_token != RTAS_UNKNOWN_SERVICE && + rtas_getchar_token != RTAS_UNKNOWN_SERVICE) + udbg_init_rtas_console(); + +#endif + + /* break now */ + return 1; +} + +static arch_spinlock_t timebase_lock; +static u64 timebase = 0; + +void __cpuinit rtas_give_timebase(void) +{ + unsigned long flags; + + local_irq_save(flags); + hard_irq_disable(); + arch_spin_lock(&timebase_lock); + rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL); + timebase = get_tb(); + arch_spin_unlock(&timebase_lock); + + while (timebase) + barrier(); + rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL); + local_irq_restore(flags); +} + +void __cpuinit rtas_take_timebase(void) +{ + while (!timebase) + barrier(); + arch_spin_lock(&timebase_lock); + set_tb(timebase >> 32, timebase & 0xffffffff); + timebase = 0; + arch_spin_unlock(&timebase_lock); +} diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c new file mode 100644 index 00000000..4174b4b2 --- /dev/null +++ b/arch/powerpc/kernel/rtas_flash.c @@ -0,0 +1,807 @@ +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * /proc/powerpc/rtas/firmware_flash interface + * + * This file implements a firmware_flash interface to pump a firmware + * image into the kernel. At reboot time rtas_restart() will see the + * firmware image and flash it as it reboots (see rtas.c). + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/proc_fs.h> +#include <linux/reboot.h> +#include <asm/delay.h> +#include <asm/uaccess.h> +#include <asm/rtas.h> +#include <asm/abs_addr.h> + +#define MODULE_VERS "1.0" +#define MODULE_NAME "rtas_flash" + +#define FIRMWARE_FLASH_NAME "firmware_flash" +#define FIRMWARE_UPDATE_NAME "firmware_update" +#define MANAGE_FLASH_NAME "manage_flash" +#define VALIDATE_FLASH_NAME "validate_flash" + +/* General RTAS Status Codes */ +#define RTAS_RC_SUCCESS 0 +#define RTAS_RC_HW_ERR -1 +#define RTAS_RC_BUSY -2 + +/* Flash image status values */ +#define FLASH_AUTH -9002 /* RTAS Not Service Authority Partition */ +#define FLASH_NO_OP -1099 /* No operation initiated by user */ +#define FLASH_IMG_SHORT -1005 /* Flash image shorter than expected */ +#define FLASH_IMG_BAD_LEN -1004 /* Bad length value in flash list block */ +#define FLASH_IMG_NULL_DATA -1003 /* Bad data value in flash list block */ +#define FLASH_IMG_READY 0 /* Firmware img ready for flash on reboot */ + +/* Manage image status values */ +#define MANAGE_AUTH -9002 /* RTAS Not Service Authority Partition */ +#define MANAGE_ACTIVE_ERR -9001 /* RTAS Cannot Overwrite Active Img */ +#define MANAGE_NO_OP -1099 /* No operation initiated by user */ +#define MANAGE_PARAM_ERR -3 /* RTAS Parameter Error */ +#define MANAGE_HW_ERR -1 /* RTAS Hardware Error */ + +/* Validate image status values */ +#define VALIDATE_AUTH -9002 /* RTAS Not Service Authority Partition */ +#define VALIDATE_NO_OP -1099 /* No operation initiated by the user */ +#define VALIDATE_INCOMPLETE -1002 /* User copied < VALIDATE_BUF_SIZE */ +#define VALIDATE_READY -1001 /* Firmware image ready for validation */ +#define VALIDATE_PARAM_ERR -3 /* RTAS Parameter Error */ +#define VALIDATE_HW_ERR -1 /* RTAS Hardware Error */ +#define VALIDATE_TMP_UPDATE 0 /* Validate Return Status */ +#define VALIDATE_FLASH_AUTH 1 /* Validate Return Status */ +#define VALIDATE_INVALID_IMG 2 /* Validate Return Status */ +#define VALIDATE_CUR_UNKNOWN 3 /* Validate Return Status */ +#define VALIDATE_TMP_COMMIT_DL 4 /* Validate Return Status */ +#define VALIDATE_TMP_COMMIT 5 /* Validate Return Status */ +#define VALIDATE_TMP_UPDATE_DL 6 /* Validate Return Status */ + +/* ibm,manage-flash-image operation tokens */ +#define RTAS_REJECT_TMP_IMG 0 +#define RTAS_COMMIT_TMP_IMG 1 + +/* Array sizes */ +#define VALIDATE_BUF_SIZE 4096 +#define RTAS_MSG_MAXLEN 64 + +/* Quirk - RTAS requires 4k list length and block size */ +#define RTAS_BLKLIST_LENGTH 4096 +#define RTAS_BLK_SIZE 4096 + +struct flash_block { + char *data; + unsigned long length; +}; + +/* This struct is very similar but not identical to + * that needed by the rtas flash update. + * All we need to do for rtas is rewrite num_blocks + * into a version/length and translate the pointers + * to absolute. + */ +#define FLASH_BLOCKS_PER_NODE ((RTAS_BLKLIST_LENGTH - 16) / sizeof(struct flash_block)) +struct flash_block_list { + unsigned long num_blocks; + struct flash_block_list *next; + struct flash_block blocks[FLASH_BLOCKS_PER_NODE]; +}; + +static struct flash_block_list *rtas_firmware_flash_list; + +/* Use slab cache to guarantee 4k alignment */ +static struct kmem_cache *flash_block_cache = NULL; + +#define FLASH_BLOCK_LIST_VERSION (1UL) + +/* Local copy of the flash block list. + * We only allow one open of the flash proc file and create this + * list as we go. The rtas_firmware_flash_list varable will be + * set once the data is fully read. + * + * For convenience as we build the list we use virtual addrs, + * we do not fill in the version number, and the length field + * is treated as the number of entries currently in the block + * (i.e. not a byte count). This is all fixed when calling + * the flash routine. + */ + +/* Status int must be first member of struct */ +struct rtas_update_flash_t +{ + int status; /* Flash update status */ + struct flash_block_list *flist; /* Local copy of flash block list */ +}; + +/* Status int must be first member of struct */ +struct rtas_manage_flash_t +{ + int status; /* Returned status */ + unsigned int op; /* Reject or commit image */ +}; + +/* Status int must be first member of struct */ +struct rtas_validate_flash_t +{ + int status; /* Returned status */ + char buf[VALIDATE_BUF_SIZE]; /* Candidate image buffer */ + unsigned int buf_size; /* Size of image buf */ + unsigned int update_results; /* Update results token */ +}; + +static DEFINE_SPINLOCK(flash_file_open_lock); +static struct proc_dir_entry *firmware_flash_pde; +static struct proc_dir_entry *firmware_update_pde; +static struct proc_dir_entry *validate_pde; +static struct proc_dir_entry *manage_pde; + +/* Do simple sanity checks on the flash image. */ +static int flash_list_valid(struct flash_block_list *flist) +{ + struct flash_block_list *f; + int i; + unsigned long block_size, image_size; + + /* Paranoid self test here. We also collect the image size. */ + image_size = 0; + for (f = flist; f; f = f->next) { + for (i = 0; i < f->num_blocks; i++) { + if (f->blocks[i].data == NULL) { + return FLASH_IMG_NULL_DATA; + } + block_size = f->blocks[i].length; + if (block_size <= 0 || block_size > RTAS_BLK_SIZE) { + return FLASH_IMG_BAD_LEN; + } + image_size += block_size; + } + } + + if (image_size < (256 << 10)) { + if (image_size < 2) + return FLASH_NO_OP; + } + + printk(KERN_INFO "FLASH: flash image with %ld bytes stored for hardware flash on reboot\n", image_size); + + return FLASH_IMG_READY; +} + +static void free_flash_list(struct flash_block_list *f) +{ + struct flash_block_list *next; + int i; + + while (f) { + for (i = 0; i < f->num_blocks; i++) + kmem_cache_free(flash_block_cache, f->blocks[i].data); + next = f->next; + kmem_cache_free(flash_block_cache, f); + f = next; + } +} + +static int rtas_flash_release(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + struct rtas_update_flash_t *uf; + + uf = (struct rtas_update_flash_t *) dp->data; + if (uf->flist) { + /* File was opened in write mode for a new flash attempt */ + /* Clear saved list */ + if (rtas_firmware_flash_list) { + free_flash_list(rtas_firmware_flash_list); + rtas_firmware_flash_list = NULL; + } + + if (uf->status != FLASH_AUTH) + uf->status = flash_list_valid(uf->flist); + + if (uf->status == FLASH_IMG_READY) + rtas_firmware_flash_list = uf->flist; + else + free_flash_list(uf->flist); + + uf->flist = NULL; + } + + atomic_dec(&dp->count); + return 0; +} + +static void get_flash_status_msg(int status, char *buf) +{ + char *msg; + + switch (status) { + case FLASH_AUTH: + msg = "error: this partition does not have service authority\n"; + break; + case FLASH_NO_OP: + msg = "info: no firmware image for flash\n"; + break; + case FLASH_IMG_SHORT: + msg = "error: flash image short\n"; + break; + case FLASH_IMG_BAD_LEN: + msg = "error: internal error bad length\n"; + break; + case FLASH_IMG_NULL_DATA: + msg = "error: internal error null data\n"; + break; + case FLASH_IMG_READY: + msg = "ready: firmware image ready for flash on reboot\n"; + break; + default: + sprintf(buf, "error: unexpected status value %d\n", status); + return; + } + + strcpy(buf, msg); +} + +/* Reading the proc file will show status (not the firmware contents) */ +static ssize_t rtas_flash_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + struct rtas_update_flash_t *uf; + char msg[RTAS_MSG_MAXLEN]; + + uf = dp->data; + + if (!strcmp(dp->name, FIRMWARE_FLASH_NAME)) { + get_flash_status_msg(uf->status, msg); + } else { /* FIRMWARE_UPDATE_NAME */ + sprintf(msg, "%d\n", uf->status); + } + + return simple_read_from_buffer(buf, count, ppos, msg, strlen(msg)); +} + +/* constructor for flash_block_cache */ +void rtas_block_ctor(void *ptr) +{ + memset(ptr, 0, RTAS_BLK_SIZE); +} + +/* We could be much more efficient here. But to keep this function + * simple we allocate a page to the block list no matter how small the + * count is. If the system is low on memory it will be just as well + * that we fail.... + */ +static ssize_t rtas_flash_write(struct file *file, const char __user *buffer, + size_t count, loff_t *off) +{ + struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + struct rtas_update_flash_t *uf; + char *p; + int next_free; + struct flash_block_list *fl; + + uf = (struct rtas_update_flash_t *) dp->data; + + if (uf->status == FLASH_AUTH || count == 0) + return count; /* discard data */ + + /* In the case that the image is not ready for flashing, the memory + * allocated for the block list will be freed upon the release of the + * proc file + */ + if (uf->flist == NULL) { + uf->flist = kmem_cache_alloc(flash_block_cache, GFP_KERNEL); + if (!uf->flist) + return -ENOMEM; + } + + fl = uf->flist; + while (fl->next) + fl = fl->next; /* seek to last block_list for append */ + next_free = fl->num_blocks; + if (next_free == FLASH_BLOCKS_PER_NODE) { + /* Need to allocate another block_list */ + fl->next = kmem_cache_alloc(flash_block_cache, GFP_KERNEL); + if (!fl->next) + return -ENOMEM; + fl = fl->next; + next_free = 0; + } + + if (count > RTAS_BLK_SIZE) + count = RTAS_BLK_SIZE; + p = kmem_cache_alloc(flash_block_cache, GFP_KERNEL); + if (!p) + return -ENOMEM; + + if(copy_from_user(p, buffer, count)) { + kmem_cache_free(flash_block_cache, p); + return -EFAULT; + } + fl->blocks[next_free].data = p; + fl->blocks[next_free].length = count; + fl->num_blocks++; + + return count; +} + +static int rtas_excl_open(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *dp = PDE(inode); + + /* Enforce exclusive open with use count of PDE */ + spin_lock(&flash_file_open_lock); + if (atomic_read(&dp->count) > 2) { + spin_unlock(&flash_file_open_lock); + return -EBUSY; + } + + atomic_inc(&dp->count); + spin_unlock(&flash_file_open_lock); + + return 0; +} + +static int rtas_excl_release(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *dp = PDE(inode); + + atomic_dec(&dp->count); + + return 0; +} + +static void manage_flash(struct rtas_manage_flash_t *args_buf) +{ + s32 rc; + + do { + rc = rtas_call(rtas_token("ibm,manage-flash-image"), 1, + 1, NULL, args_buf->op); + } while (rtas_busy_delay(rc)); + + args_buf->status = rc; +} + +static ssize_t manage_flash_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + struct rtas_manage_flash_t *args_buf; + char msg[RTAS_MSG_MAXLEN]; + int msglen; + + args_buf = dp->data; + if (args_buf == NULL) + return 0; + + msglen = sprintf(msg, "%d\n", args_buf->status); + + return simple_read_from_buffer(buf, count, ppos, msg, msglen); +} + +static ssize_t manage_flash_write(struct file *file, const char __user *buf, + size_t count, loff_t *off) +{ + struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + struct rtas_manage_flash_t *args_buf; + const char reject_str[] = "0"; + const char commit_str[] = "1"; + char stkbuf[10]; + int op; + + args_buf = (struct rtas_manage_flash_t *) dp->data; + if ((args_buf->status == MANAGE_AUTH) || (count == 0)) + return count; + + op = -1; + if (buf) { + if (count > 9) count = 9; + if (copy_from_user (stkbuf, buf, count)) { + return -EFAULT; + } + if (strncmp(stkbuf, reject_str, strlen(reject_str)) == 0) + op = RTAS_REJECT_TMP_IMG; + else if (strncmp(stkbuf, commit_str, strlen(commit_str)) == 0) + op = RTAS_COMMIT_TMP_IMG; + } + + if (op == -1) /* buf is empty, or contains invalid string */ + return -EINVAL; + + args_buf->op = op; + manage_flash(args_buf); + + return count; +} + +static void validate_flash(struct rtas_validate_flash_t *args_buf) +{ + int token = rtas_token("ibm,validate-flash-image"); + int update_results; + s32 rc; + + rc = 0; + do { + spin_lock(&rtas_data_buf_lock); + memcpy(rtas_data_buf, args_buf->buf, VALIDATE_BUF_SIZE); + rc = rtas_call(token, 2, 2, &update_results, + (u32) __pa(rtas_data_buf), args_buf->buf_size); + memcpy(args_buf->buf, rtas_data_buf, VALIDATE_BUF_SIZE); + spin_unlock(&rtas_data_buf_lock); + } while (rtas_busy_delay(rc)); + + args_buf->status = rc; + args_buf->update_results = update_results; +} + +static int get_validate_flash_msg(struct rtas_validate_flash_t *args_buf, + char *msg) +{ + int n; + + if (args_buf->status >= VALIDATE_TMP_UPDATE) { + n = sprintf(msg, "%d\n", args_buf->update_results); + if ((args_buf->update_results >= VALIDATE_CUR_UNKNOWN) || + (args_buf->update_results == VALIDATE_TMP_UPDATE)) + n += sprintf(msg + n, "%s\n", args_buf->buf); + } else { + n = sprintf(msg, "%d\n", args_buf->status); + } + return n; +} + +static ssize_t validate_flash_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + struct rtas_validate_flash_t *args_buf; + char msg[RTAS_MSG_MAXLEN]; + int msglen; + + args_buf = dp->data; + + msglen = get_validate_flash_msg(args_buf, msg); + + return simple_read_from_buffer(buf, count, ppos, msg, msglen); +} + +static ssize_t validate_flash_write(struct file *file, const char __user *buf, + size_t count, loff_t *off) +{ + struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + struct rtas_validate_flash_t *args_buf; + int rc; + + args_buf = (struct rtas_validate_flash_t *) dp->data; + + if (dp->data == NULL) { + dp->data = kmalloc(sizeof(struct rtas_validate_flash_t), + GFP_KERNEL); + if (dp->data == NULL) + return -ENOMEM; + } + + /* We are only interested in the first 4K of the + * candidate image */ + if ((*off >= VALIDATE_BUF_SIZE) || + (args_buf->status == VALIDATE_AUTH)) { + *off += count; + return count; + } + + if (*off + count >= VALIDATE_BUF_SIZE) { + count = VALIDATE_BUF_SIZE - *off; + args_buf->status = VALIDATE_READY; + } else { + args_buf->status = VALIDATE_INCOMPLETE; + } + + if (!access_ok(VERIFY_READ, buf, count)) { + rc = -EFAULT; + goto done; + } + if (copy_from_user(args_buf->buf + *off, buf, count)) { + rc = -EFAULT; + goto done; + } + + *off += count; + rc = count; +done: + if (rc < 0) { + kfree(dp->data); + dp->data = NULL; + } + return rc; +} + +static int validate_flash_release(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + struct rtas_validate_flash_t *args_buf; + + args_buf = (struct rtas_validate_flash_t *) dp->data; + + if (args_buf->status == VALIDATE_READY) { + args_buf->buf_size = VALIDATE_BUF_SIZE; + validate_flash(args_buf); + } + + /* The matching atomic_inc was in rtas_excl_open() */ + atomic_dec(&dp->count); + + return 0; +} + +static void rtas_flash_firmware(int reboot_type) +{ + unsigned long image_size; + struct flash_block_list *f, *next, *flist; + unsigned long rtas_block_list; + int i, status, update_token; + + if (rtas_firmware_flash_list == NULL) + return; /* nothing to do */ + + if (reboot_type != SYS_RESTART) { + printk(KERN_ALERT "FLASH: firmware flash requires a reboot\n"); + printk(KERN_ALERT "FLASH: the firmware image will NOT be flashed\n"); + return; + } + + update_token = rtas_token("ibm,update-flash-64-and-reboot"); + if (update_token == RTAS_UNKNOWN_SERVICE) { + printk(KERN_ALERT "FLASH: ibm,update-flash-64-and-reboot " + "is not available -- not a service partition?\n"); + printk(KERN_ALERT "FLASH: firmware will not be flashed\n"); + return; + } + + /* + * Just before starting the firmware flash, cancel the event scan work + * to avoid any soft lockup issues. + */ + rtas_cancel_event_scan(); + + /* + * NOTE: the "first" block must be under 4GB, so we create + * an entry with no data blocks in the reserved buffer in + * the kernel data segment. + */ + spin_lock(&rtas_data_buf_lock); + flist = (struct flash_block_list *)&rtas_data_buf[0]; + flist->num_blocks = 0; + flist->next = rtas_firmware_flash_list; + rtas_block_list = virt_to_abs(flist); + if (rtas_block_list >= 4UL*1024*1024*1024) { + printk(KERN_ALERT "FLASH: kernel bug...flash list header addr above 4GB\n"); + spin_unlock(&rtas_data_buf_lock); + return; + } + + printk(KERN_ALERT "FLASH: preparing saved firmware image for flash\n"); + /* Update the block_list in place. */ + rtas_firmware_flash_list = NULL; /* too hard to backout on error */ + image_size = 0; + for (f = flist; f; f = next) { + /* Translate data addrs to absolute */ + for (i = 0; i < f->num_blocks; i++) { + f->blocks[i].data = (char *)virt_to_abs(f->blocks[i].data); + image_size += f->blocks[i].length; + } + next = f->next; + /* Don't translate NULL pointer for last entry */ + if (f->next) + f->next = (struct flash_block_list *)virt_to_abs(f->next); + else + f->next = NULL; + /* make num_blocks into the version/length field */ + f->num_blocks = (FLASH_BLOCK_LIST_VERSION << 56) | ((f->num_blocks+1)*16); + } + + printk(KERN_ALERT "FLASH: flash image is %ld bytes\n", image_size); + printk(KERN_ALERT "FLASH: performing flash and reboot\n"); + rtas_progress("Flashing \n", 0x0); + rtas_progress("Please Wait... ", 0x0); + printk(KERN_ALERT "FLASH: this will take several minutes. Do not power off!\n"); + status = rtas_call(update_token, 1, 1, NULL, rtas_block_list); + switch (status) { /* should only get "bad" status */ + case 0: + printk(KERN_ALERT "FLASH: success\n"); + break; + case -1: + printk(KERN_ALERT "FLASH: hardware error. Firmware may not be not flashed\n"); + break; + case -3: + printk(KERN_ALERT "FLASH: image is corrupt or not correct for this platform. Firmware not flashed\n"); + break; + case -4: + printk(KERN_ALERT "FLASH: flash failed when partially complete. System may not reboot\n"); + break; + default: + printk(KERN_ALERT "FLASH: unknown flash return code %d\n", status); + break; + } + spin_unlock(&rtas_data_buf_lock); +} + +static void remove_flash_pde(struct proc_dir_entry *dp) +{ + if (dp) { + kfree(dp->data); + remove_proc_entry(dp->name, dp->parent); + } +} + +static int initialize_flash_pde_data(const char *rtas_call_name, + size_t buf_size, + struct proc_dir_entry *dp) +{ + int *status; + int token; + + dp->data = kzalloc(buf_size, GFP_KERNEL); + if (dp->data == NULL) { + remove_flash_pde(dp); + return -ENOMEM; + } + + /* + * This code assumes that the status int is the first member of the + * struct + */ + status = (int *) dp->data; + token = rtas_token(rtas_call_name); + if (token == RTAS_UNKNOWN_SERVICE) + *status = FLASH_AUTH; + else + *status = FLASH_NO_OP; + + return 0; +} + +static struct proc_dir_entry *create_flash_pde(const char *filename, + const struct file_operations *fops) +{ + return proc_create(filename, S_IRUSR | S_IWUSR, NULL, fops); +} + +static const struct file_operations rtas_flash_operations = { + .owner = THIS_MODULE, + .read = rtas_flash_read, + .write = rtas_flash_write, + .open = rtas_excl_open, + .release = rtas_flash_release, + .llseek = default_llseek, +}; + +static const struct file_operations manage_flash_operations = { + .owner = THIS_MODULE, + .read = manage_flash_read, + .write = manage_flash_write, + .open = rtas_excl_open, + .release = rtas_excl_release, + .llseek = default_llseek, +}; + +static const struct file_operations validate_flash_operations = { + .owner = THIS_MODULE, + .read = validate_flash_read, + .write = validate_flash_write, + .open = rtas_excl_open, + .release = validate_flash_release, + .llseek = default_llseek, +}; + +static int __init rtas_flash_init(void) +{ + int rc; + + if (rtas_token("ibm,update-flash-64-and-reboot") == + RTAS_UNKNOWN_SERVICE) { + printk(KERN_ERR "rtas_flash: no firmware flash support\n"); + return 1; + } + + firmware_flash_pde = create_flash_pde("powerpc/rtas/" + FIRMWARE_FLASH_NAME, + &rtas_flash_operations); + if (firmware_flash_pde == NULL) { + rc = -ENOMEM; + goto cleanup; + } + + rc = initialize_flash_pde_data("ibm,update-flash-64-and-reboot", + sizeof(struct rtas_update_flash_t), + firmware_flash_pde); + if (rc != 0) + goto cleanup; + + firmware_update_pde = create_flash_pde("powerpc/rtas/" + FIRMWARE_UPDATE_NAME, + &rtas_flash_operations); + if (firmware_update_pde == NULL) { + rc = -ENOMEM; + goto cleanup; + } + + rc = initialize_flash_pde_data("ibm,update-flash-64-and-reboot", + sizeof(struct rtas_update_flash_t), + firmware_update_pde); + if (rc != 0) + goto cleanup; + + validate_pde = create_flash_pde("powerpc/rtas/" VALIDATE_FLASH_NAME, + &validate_flash_operations); + if (validate_pde == NULL) { + rc = -ENOMEM; + goto cleanup; + } + + rc = initialize_flash_pde_data("ibm,validate-flash-image", + sizeof(struct rtas_validate_flash_t), + validate_pde); + if (rc != 0) + goto cleanup; + + manage_pde = create_flash_pde("powerpc/rtas/" MANAGE_FLASH_NAME, + &manage_flash_operations); + if (manage_pde == NULL) { + rc = -ENOMEM; + goto cleanup; + } + + rc = initialize_flash_pde_data("ibm,manage-flash-image", + sizeof(struct rtas_manage_flash_t), + manage_pde); + if (rc != 0) + goto cleanup; + + rtas_flash_term_hook = rtas_flash_firmware; + + flash_block_cache = kmem_cache_create("rtas_flash_cache", + RTAS_BLK_SIZE, RTAS_BLK_SIZE, 0, + rtas_block_ctor); + if (!flash_block_cache) { + printk(KERN_ERR "%s: failed to create block cache\n", + __func__); + rc = -ENOMEM; + goto cleanup; + } + return 0; + +cleanup: + remove_flash_pde(firmware_flash_pde); + remove_flash_pde(firmware_update_pde); + remove_flash_pde(validate_pde); + remove_flash_pde(manage_pde); + + return rc; +} + +static void __exit rtas_flash_cleanup(void) +{ + rtas_flash_term_hook = NULL; + + if (flash_block_cache) + kmem_cache_destroy(flash_block_cache); + + remove_flash_pde(firmware_flash_pde); + remove_flash_pde(firmware_update_pde); + remove_flash_pde(validate_pde); + remove_flash_pde(manage_pde); +} + +module_init(rtas_flash_init); +module_exit(rtas_flash_cleanup); +MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c new file mode 100644 index 00000000..179af906 --- /dev/null +++ b/arch/powerpc/kernel/rtas_pci.c @@ -0,0 +1,304 @@ +/* + * Copyright (C) 2001 Dave Engebretsen, IBM Corporation + * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM + * + * RTAS specific routines for PCI. + * + * Based on code from pci.c, chrp_pci.c and pSeries_pci.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/threads.h> +#include <linux/pci.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/bootmem.h> + +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/irq.h> +#include <asm/prom.h> +#include <asm/machdep.h> +#include <asm/pci-bridge.h> +#include <asm/iommu.h> +#include <asm/rtas.h> +#include <asm/mpic.h> +#include <asm/ppc-pci.h> +#include <asm/eeh.h> + +/* RTAS tokens */ +static int read_pci_config; +static int write_pci_config; +static int ibm_read_pci_config; +static int ibm_write_pci_config; + +static inline int config_access_valid(struct pci_dn *dn, int where) +{ + if (where < 256) + return 1; + if (where < 4096 && dn->pci_ext_config_space) + return 1; + + return 0; +} + +int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val) +{ + int returnval = -1; + unsigned long buid, addr; + int ret; + + if (!pdn) + return PCIBIOS_DEVICE_NOT_FOUND; + if (!config_access_valid(pdn, where)) + return PCIBIOS_BAD_REGISTER_NUMBER; + + addr = rtas_config_addr(pdn->busno, pdn->devfn, where); + buid = pdn->phb->buid; + if (buid) { + ret = rtas_call(ibm_read_pci_config, 4, 2, &returnval, + addr, BUID_HI(buid), BUID_LO(buid), size); + } else { + ret = rtas_call(read_pci_config, 2, 2, &returnval, addr, size); + } + *val = returnval; + + if (ret) + return PCIBIOS_DEVICE_NOT_FOUND; + + if (returnval == EEH_IO_ERROR_VALUE(size) && + eeh_dn_check_failure (pdn->node, NULL)) + return PCIBIOS_DEVICE_NOT_FOUND; + + return PCIBIOS_SUCCESSFUL; +} + +static int rtas_pci_read_config(struct pci_bus *bus, + unsigned int devfn, + int where, int size, u32 *val) +{ + struct device_node *busdn, *dn; + + busdn = pci_bus_to_OF_node(bus); + + /* Search only direct children of the bus */ + for (dn = busdn->child; dn; dn = dn->sibling) { + struct pci_dn *pdn = PCI_DN(dn); + if (pdn && pdn->devfn == devfn + && of_device_is_available(dn)) + return rtas_read_config(pdn, where, size, val); + } + + return PCIBIOS_DEVICE_NOT_FOUND; +} + +int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val) +{ + unsigned long buid, addr; + int ret; + + if (!pdn) + return PCIBIOS_DEVICE_NOT_FOUND; + if (!config_access_valid(pdn, where)) + return PCIBIOS_BAD_REGISTER_NUMBER; + + addr = rtas_config_addr(pdn->busno, pdn->devfn, where); + buid = pdn->phb->buid; + if (buid) { + ret = rtas_call(ibm_write_pci_config, 5, 1, NULL, addr, + BUID_HI(buid), BUID_LO(buid), size, (ulong) val); + } else { + ret = rtas_call(write_pci_config, 3, 1, NULL, addr, size, (ulong)val); + } + + if (ret) + return PCIBIOS_DEVICE_NOT_FOUND; + + return PCIBIOS_SUCCESSFUL; +} + +static int rtas_pci_write_config(struct pci_bus *bus, + unsigned int devfn, + int where, int size, u32 val) +{ + struct device_node *busdn, *dn; + + busdn = pci_bus_to_OF_node(bus); + + /* Search only direct children of the bus */ + for (dn = busdn->child; dn; dn = dn->sibling) { + struct pci_dn *pdn = PCI_DN(dn); + if (pdn && pdn->devfn == devfn + && of_device_is_available(dn)) + return rtas_write_config(pdn, where, size, val); + } + return PCIBIOS_DEVICE_NOT_FOUND; +} + +static struct pci_ops rtas_pci_ops = { + .read = rtas_pci_read_config, + .write = rtas_pci_write_config, +}; + +static int is_python(struct device_node *dev) +{ + const char *model = of_get_property(dev, "model", NULL); + + if (model && strstr(model, "Python")) + return 1; + + return 0; +} + +static void python_countermeasures(struct device_node *dev) +{ + struct resource registers; + void __iomem *chip_regs; + volatile u32 val; + + if (of_address_to_resource(dev, 0, ®isters)) { + printk(KERN_ERR "Can't get address for Python workarounds !\n"); + return; + } + + /* Python's register file is 1 MB in size. */ + chip_regs = ioremap(registers.start & ~(0xfffffUL), 0x100000); + + /* + * Firmware doesn't always clear this bit which is critical + * for good performance - Anton + */ + +#define PRG_CL_RESET_VALID 0x00010000 + + val = in_be32(chip_regs + 0xf6030); + if (val & PRG_CL_RESET_VALID) { + printk(KERN_INFO "Python workaround: "); + val &= ~PRG_CL_RESET_VALID; + out_be32(chip_regs + 0xf6030, val); + /* + * We must read it back for changes to + * take effect + */ + val = in_be32(chip_regs + 0xf6030); + printk("reg0: %x\n", val); + } + + iounmap(chip_regs); +} + +void __init init_pci_config_tokens (void) +{ + read_pci_config = rtas_token("read-pci-config"); + write_pci_config = rtas_token("write-pci-config"); + ibm_read_pci_config = rtas_token("ibm,read-pci-config"); + ibm_write_pci_config = rtas_token("ibm,write-pci-config"); +} + +unsigned long __devinit get_phb_buid (struct device_node *phb) +{ + struct resource r; + + if (ibm_read_pci_config == -1) + return 0; + if (of_address_to_resource(phb, 0, &r)) + return 0; + return r.start; +} + +static int phb_set_bus_ranges(struct device_node *dev, + struct pci_controller *phb) +{ + const int *bus_range; + unsigned int len; + + bus_range = of_get_property(dev, "bus-range", &len); + if (bus_range == NULL || len < 2 * sizeof(int)) { + return 1; + } + + phb->first_busno = bus_range[0]; + phb->last_busno = bus_range[1]; + + return 0; +} + +int __devinit rtas_setup_phb(struct pci_controller *phb) +{ + struct device_node *dev = phb->dn; + + if (is_python(dev)) + python_countermeasures(dev); + + if (phb_set_bus_ranges(dev, phb)) + return 1; + + phb->ops = &rtas_pci_ops; + phb->buid = get_phb_buid(dev); + + return 0; +} + +void __init find_and_init_phbs(void) +{ + struct device_node *node; + struct pci_controller *phb; + struct device_node *root = of_find_node_by_path("/"); + + for_each_child_of_node(root, node) { + if (node->type == NULL || (strcmp(node->type, "pci") != 0 && + strcmp(node->type, "pciex") != 0)) + continue; + + phb = pcibios_alloc_controller(node); + if (!phb) + continue; + rtas_setup_phb(phb); + pci_process_bridge_OF_ranges(phb, node, 0); + isa_bridge_find_early(phb); + } + + of_node_put(root); + pci_devs_phb_init(); + + /* Create EEH devices for all PHBs */ + eeh_dev_phb_init(); + + /* + * PCI_PROBE_ONLY and PCI_REASSIGN_ALL_BUS can be set via properties + * in chosen. + */ + if (of_chosen) { + const int *prop; + + prop = of_get_property(of_chosen, + "linux,pci-probe-only", NULL); + if (prop) { + if (*prop) + pci_add_flags(PCI_PROBE_ONLY); + else + pci_clear_flags(PCI_PROBE_ONLY); + } + +#ifdef CONFIG_PPC32 /* Will be made generic soon */ + prop = of_get_property(of_chosen, + "linux,pci-assign-all-buses", NULL); + if (prop && *prop) + pci_add_flags(PCI_REASSIGN_ALL_BUS); +#endif /* CONFIG_PPC32 */ + } +} diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c new file mode 100644 index 00000000..1045ff49 --- /dev/null +++ b/arch/powerpc/kernel/rtasd.c @@ -0,0 +1,555 @@ +/* + * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Communication to userspace based on kernel/printk.c + */ + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/poll.h> +#include <linux/proc_fs.h> +#include <linux/init.h> +#include <linux/vmalloc.h> +#include <linux/spinlock.h> +#include <linux/cpu.h> +#include <linux/workqueue.h> +#include <linux/slab.h> + +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/rtas.h> +#include <asm/prom.h> +#include <asm/nvram.h> +#include <linux/atomic.h> +#include <asm/machdep.h> + + +static DEFINE_SPINLOCK(rtasd_log_lock); + +static DECLARE_WAIT_QUEUE_HEAD(rtas_log_wait); + +static char *rtas_log_buf; +static unsigned long rtas_log_start; +static unsigned long rtas_log_size; + +static int surveillance_timeout = -1; + +static unsigned int rtas_error_log_max; +static unsigned int rtas_error_log_buffer_max; + +/* RTAS service tokens */ +static unsigned int event_scan; +static unsigned int rtas_event_scan_rate; + +static int full_rtas_msgs = 0; + +/* Stop logging to nvram after first fatal error */ +static int logging_enabled; /* Until we initialize everything, + * make sure we don't try logging + * anything */ +static int error_log_cnt; + +/* + * Since we use 32 bit RTAS, the physical address of this must be below + * 4G or else bad things happen. Allocate this in the kernel data and + * make it big enough. + */ +static unsigned char logdata[RTAS_ERROR_LOG_MAX]; + +static char *rtas_type[] = { + "Unknown", "Retry", "TCE Error", "Internal Device Failure", + "Timeout", "Data Parity", "Address Parity", "Cache Parity", + "Address Invalid", "ECC Uncorrected", "ECC Corrupted", +}; + +static char *rtas_event_type(int type) +{ + if ((type > 0) && (type < 11)) + return rtas_type[type]; + + switch (type) { + case RTAS_TYPE_EPOW: + return "EPOW"; + case RTAS_TYPE_PLATFORM: + return "Platform Error"; + case RTAS_TYPE_IO: + return "I/O Event"; + case RTAS_TYPE_INFO: + return "Platform Information Event"; + case RTAS_TYPE_DEALLOC: + return "Resource Deallocation Event"; + case RTAS_TYPE_DUMP: + return "Dump Notification Event"; + } + + return rtas_type[0]; +} + +/* To see this info, grep RTAS /var/log/messages and each entry + * will be collected together with obvious begin/end. + * There will be a unique identifier on the begin and end lines. + * This will persist across reboots. + * + * format of error logs returned from RTAS: + * bytes (size) : contents + * -------------------------------------------------------- + * 0-7 (8) : rtas_error_log + * 8-47 (40) : extended info + * 48-51 (4) : vendor id + * 52-1023 (vendor specific) : location code and debug data + */ +static void printk_log_rtas(char *buf, int len) +{ + + int i,j,n = 0; + int perline = 16; + char buffer[64]; + char * str = "RTAS event"; + + if (full_rtas_msgs) { + printk(RTAS_DEBUG "%d -------- %s begin --------\n", + error_log_cnt, str); + + /* + * Print perline bytes on each line, each line will start + * with RTAS and a changing number, so syslogd will + * print lines that are otherwise the same. Separate every + * 4 bytes with a space. + */ + for (i = 0; i < len; i++) { + j = i % perline; + if (j == 0) { + memset(buffer, 0, sizeof(buffer)); + n = sprintf(buffer, "RTAS %d:", i/perline); + } + + if ((i % 4) == 0) + n += sprintf(buffer+n, " "); + + n += sprintf(buffer+n, "%02x", (unsigned char)buf[i]); + + if (j == (perline-1)) + printk(KERN_DEBUG "%s\n", buffer); + } + if ((i % perline) != 0) + printk(KERN_DEBUG "%s\n", buffer); + + printk(RTAS_DEBUG "%d -------- %s end ----------\n", + error_log_cnt, str); + } else { + struct rtas_error_log *errlog = (struct rtas_error_log *)buf; + + printk(RTAS_DEBUG "event: %d, Type: %s, Severity: %d\n", + error_log_cnt, rtas_event_type(errlog->type), + errlog->severity); + } +} + +static int log_rtas_len(char * buf) +{ + int len; + struct rtas_error_log *err; + + /* rtas fixed header */ + len = 8; + err = (struct rtas_error_log *)buf; + if (err->extended && err->extended_log_length) { + + /* extended header */ + len += err->extended_log_length; + } + + if (rtas_error_log_max == 0) + rtas_error_log_max = rtas_get_error_log_max(); + + if (len > rtas_error_log_max) + len = rtas_error_log_max; + + return len; +} + +/* + * First write to nvram, if fatal error, that is the only + * place we log the info. The error will be picked up + * on the next reboot by rtasd. If not fatal, run the + * method for the type of error. Currently, only RTAS + * errors have methods implemented, but in the future + * there might be a need to store data in nvram before a + * call to panic(). + * + * XXX We write to nvram periodically, to indicate error has + * been written and sync'd, but there is a possibility + * that if we don't shutdown correctly, a duplicate error + * record will be created on next reboot. + */ +void pSeries_log_error(char *buf, unsigned int err_type, int fatal) +{ + unsigned long offset; + unsigned long s; + int len = 0; + + pr_debug("rtasd: logging event\n"); + if (buf == NULL) + return; + + spin_lock_irqsave(&rtasd_log_lock, s); + + /* get length and increase count */ + switch (err_type & ERR_TYPE_MASK) { + case ERR_TYPE_RTAS_LOG: + len = log_rtas_len(buf); + if (!(err_type & ERR_FLAG_BOOT)) + error_log_cnt++; + break; + case ERR_TYPE_KERNEL_PANIC: + default: + WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ + spin_unlock_irqrestore(&rtasd_log_lock, s); + return; + } + +#ifdef CONFIG_PPC64 + /* Write error to NVRAM */ + if (logging_enabled && !(err_type & ERR_FLAG_BOOT)) + nvram_write_error_log(buf, len, err_type, error_log_cnt); +#endif /* CONFIG_PPC64 */ + + /* + * rtas errors can occur during boot, and we do want to capture + * those somewhere, even if nvram isn't ready (why not?), and even + * if rtasd isn't ready. Put them into the boot log, at least. + */ + if ((err_type & ERR_TYPE_MASK) == ERR_TYPE_RTAS_LOG) + printk_log_rtas(buf, len); + + /* Check to see if we need to or have stopped logging */ + if (fatal || !logging_enabled) { + logging_enabled = 0; + WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ + spin_unlock_irqrestore(&rtasd_log_lock, s); + return; + } + + /* call type specific method for error */ + switch (err_type & ERR_TYPE_MASK) { + case ERR_TYPE_RTAS_LOG: + offset = rtas_error_log_buffer_max * + ((rtas_log_start+rtas_log_size) & LOG_NUMBER_MASK); + + /* First copy over sequence number */ + memcpy(&rtas_log_buf[offset], (void *) &error_log_cnt, sizeof(int)); + + /* Second copy over error log data */ + offset += sizeof(int); + memcpy(&rtas_log_buf[offset], buf, len); + + if (rtas_log_size < LOG_NUMBER) + rtas_log_size += 1; + else + rtas_log_start += 1; + + WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ + spin_unlock_irqrestore(&rtasd_log_lock, s); + wake_up_interruptible(&rtas_log_wait); + break; + case ERR_TYPE_KERNEL_PANIC: + default: + WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ + spin_unlock_irqrestore(&rtasd_log_lock, s); + return; + } + +} + +static int rtas_log_open(struct inode * inode, struct file * file) +{ + return 0; +} + +static int rtas_log_release(struct inode * inode, struct file * file) +{ + return 0; +} + +/* This will check if all events are logged, if they are then, we + * know that we can safely clear the events in NVRAM. + * Next we'll sit and wait for something else to log. + */ +static ssize_t rtas_log_read(struct file * file, char __user * buf, + size_t count, loff_t *ppos) +{ + int error; + char *tmp; + unsigned long s; + unsigned long offset; + + if (!buf || count < rtas_error_log_buffer_max) + return -EINVAL; + + count = rtas_error_log_buffer_max; + + if (!access_ok(VERIFY_WRITE, buf, count)) + return -EFAULT; + + tmp = kmalloc(count, GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + spin_lock_irqsave(&rtasd_log_lock, s); + + /* if it's 0, then we know we got the last one (the one in NVRAM) */ + while (rtas_log_size == 0) { + if (file->f_flags & O_NONBLOCK) { + spin_unlock_irqrestore(&rtasd_log_lock, s); + error = -EAGAIN; + goto out; + } + + if (!logging_enabled) { + spin_unlock_irqrestore(&rtasd_log_lock, s); + error = -ENODATA; + goto out; + } +#ifdef CONFIG_PPC64 + nvram_clear_error_log(); +#endif /* CONFIG_PPC64 */ + + spin_unlock_irqrestore(&rtasd_log_lock, s); + error = wait_event_interruptible(rtas_log_wait, rtas_log_size); + if (error) + goto out; + spin_lock_irqsave(&rtasd_log_lock, s); + } + + offset = rtas_error_log_buffer_max * (rtas_log_start & LOG_NUMBER_MASK); + memcpy(tmp, &rtas_log_buf[offset], count); + + rtas_log_start += 1; + rtas_log_size -= 1; + spin_unlock_irqrestore(&rtasd_log_lock, s); + + error = copy_to_user(buf, tmp, count) ? -EFAULT : count; +out: + kfree(tmp); + return error; +} + +static unsigned int rtas_log_poll(struct file *file, poll_table * wait) +{ + poll_wait(file, &rtas_log_wait, wait); + if (rtas_log_size) + return POLLIN | POLLRDNORM; + return 0; +} + +static const struct file_operations proc_rtas_log_operations = { + .read = rtas_log_read, + .poll = rtas_log_poll, + .open = rtas_log_open, + .release = rtas_log_release, + .llseek = noop_llseek, +}; + +static int enable_surveillance(int timeout) +{ + int error; + + error = rtas_set_indicator(SURVEILLANCE_TOKEN, 0, timeout); + + if (error == 0) + return 0; + + if (error == -EINVAL) { + printk(KERN_DEBUG "rtasd: surveillance not supported\n"); + return 0; + } + + printk(KERN_ERR "rtasd: could not update surveillance\n"); + return -1; +} + +static void do_event_scan(void) +{ + int error; + do { + memset(logdata, 0, rtas_error_log_max); + error = rtas_call(event_scan, 4, 1, NULL, + RTAS_EVENT_SCAN_ALL_EVENTS, 0, + __pa(logdata), rtas_error_log_max); + if (error == -1) { + printk(KERN_ERR "event-scan failed\n"); + break; + } + + if (error == 0) + pSeries_log_error(logdata, ERR_TYPE_RTAS_LOG, 0); + + } while(error == 0); +} + +static void rtas_event_scan(struct work_struct *w); +DECLARE_DELAYED_WORK(event_scan_work, rtas_event_scan); + +/* + * Delay should be at least one second since some machines have problems if + * we call event-scan too quickly. + */ +static unsigned long event_scan_delay = 1*HZ; +static int first_pass = 1; + +static void rtas_event_scan(struct work_struct *w) +{ + unsigned int cpu; + + do_event_scan(); + + get_online_cpus(); + + /* raw_ OK because just using CPU as starting point. */ + cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); + if (cpu >= nr_cpu_ids) { + cpu = cpumask_first(cpu_online_mask); + + if (first_pass) { + first_pass = 0; + event_scan_delay = 30*HZ/rtas_event_scan_rate; + + if (surveillance_timeout != -1) { + pr_debug("rtasd: enabling surveillance\n"); + enable_surveillance(surveillance_timeout); + pr_debug("rtasd: surveillance enabled\n"); + } + } + } + + schedule_delayed_work_on(cpu, &event_scan_work, + __round_jiffies_relative(event_scan_delay, cpu)); + + put_online_cpus(); +} + +#ifdef CONFIG_PPC64 +static void retreive_nvram_error_log(void) +{ + unsigned int err_type ; + int rc ; + + /* See if we have any error stored in NVRAM */ + memset(logdata, 0, rtas_error_log_max); + rc = nvram_read_error_log(logdata, rtas_error_log_max, + &err_type, &error_log_cnt); + /* We can use rtas_log_buf now */ + logging_enabled = 1; + if (!rc) { + if (err_type != ERR_FLAG_ALREADY_LOGGED) { + pSeries_log_error(logdata, err_type | ERR_FLAG_BOOT, 0); + } + } +} +#else /* CONFIG_PPC64 */ +static void retreive_nvram_error_log(void) +{ +} +#endif /* CONFIG_PPC64 */ + +static void start_event_scan(void) +{ + printk(KERN_DEBUG "RTAS daemon started\n"); + pr_debug("rtasd: will sleep for %d milliseconds\n", + (30000 / rtas_event_scan_rate)); + + /* Retrieve errors from nvram if any */ + retreive_nvram_error_log(); + + schedule_delayed_work_on(cpumask_first(cpu_online_mask), + &event_scan_work, event_scan_delay); +} + +/* Cancel the rtas event scan work */ +void rtas_cancel_event_scan(void) +{ + cancel_delayed_work_sync(&event_scan_work); +} +EXPORT_SYMBOL_GPL(rtas_cancel_event_scan); + +static int __init rtas_init(void) +{ + struct proc_dir_entry *entry; + + if (!machine_is(pseries) && !machine_is(chrp)) + return 0; + + /* No RTAS */ + event_scan = rtas_token("event-scan"); + if (event_scan == RTAS_UNKNOWN_SERVICE) { + printk(KERN_INFO "rtasd: No event-scan on system\n"); + return -ENODEV; + } + + rtas_event_scan_rate = rtas_token("rtas-event-scan-rate"); + if (rtas_event_scan_rate == RTAS_UNKNOWN_SERVICE) { + printk(KERN_ERR "rtasd: no rtas-event-scan-rate on system\n"); + return -ENODEV; + } + + if (!rtas_event_scan_rate) { + /* Broken firmware: take a rate of zero to mean don't scan */ + printk(KERN_DEBUG "rtasd: scan rate is 0, not scanning\n"); + return 0; + } + + /* Make room for the sequence number */ + rtas_error_log_max = rtas_get_error_log_max(); + rtas_error_log_buffer_max = rtas_error_log_max + sizeof(int); + + rtas_log_buf = vmalloc(rtas_error_log_buffer_max*LOG_NUMBER); + if (!rtas_log_buf) { + printk(KERN_ERR "rtasd: no memory\n"); + return -ENOMEM; + } + + entry = proc_create("powerpc/rtas/error_log", S_IRUSR, NULL, + &proc_rtas_log_operations); + if (!entry) + printk(KERN_ERR "Failed to create error_log proc entry\n"); + + start_event_scan(); + + return 0; +} +__initcall(rtas_init); + +static int __init surveillance_setup(char *str) +{ + int i; + + /* We only do surveillance on pseries */ + if (!machine_is(pseries)) + return 0; + + if (get_option(&str,&i)) { + if (i >= 0 && i <= 255) + surveillance_timeout = i; + } + + return 1; +} +__setup("surveillance=", surveillance_setup); + +static int __init rtasmsgs_setup(char *str) +{ + if (strcmp(str, "on") == 0) + full_rtas_msgs = 1; + else if (strcmp(str, "off") == 0) + full_rtas_msgs = 0; + + return 1; +} +__setup("rtasmsgs=", rtasmsgs_setup); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c new file mode 100644 index 00000000..afd4f051 --- /dev/null +++ b/arch/powerpc/kernel/setup-common.c @@ -0,0 +1,733 @@ +/* + * Common boot and setup code for both 32-bit and 64-bit. + * Extracted from arch/powerpc/kernel/setup_64.c. + * + * Copyright (C) 2001 PPC64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG + +#include <linux/export.h> +#include <linux/string.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/reboot.h> +#include <linux/delay.h> +#include <linux/initrd.h> +#include <linux/platform_device.h> +#include <linux/seq_file.h> +#include <linux/ioport.h> +#include <linux/console.h> +#include <linux/screen_info.h> +#include <linux/root_dev.h> +#include <linux/notifier.h> +#include <linux/cpu.h> +#include <linux/unistd.h> +#include <linux/serial.h> +#include <linux/serial_8250.h> +#include <linux/debugfs.h> +#include <linux/percpu.h> +#include <linux/memblock.h> +#include <linux/of_platform.h> +#include <asm/io.h> +#include <asm/paca.h> +#include <asm/prom.h> +#include <asm/processor.h> +#include <asm/vdso_datapage.h> +#include <asm/pgtable.h> +#include <asm/smp.h> +#include <asm/elf.h> +#include <asm/machdep.h> +#include <asm/time.h> +#include <asm/cputable.h> +#include <asm/sections.h> +#include <asm/firmware.h> +#include <asm/btext.h> +#include <asm/nvram.h> +#include <asm/setup.h> +#include <asm/rtas.h> +#include <asm/iommu.h> +#include <asm/serial.h> +#include <asm/cache.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/xmon.h> +#include <asm/cputhreads.h> +#include <mm/mmu_decl.h> +#include <asm/fadump.h> + +#include "setup.h" + +#ifdef DEBUG +#include <asm/udbg.h> +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +/* The main machine-dep calls structure + */ +struct machdep_calls ppc_md; +EXPORT_SYMBOL(ppc_md); +struct machdep_calls *machine_id; +EXPORT_SYMBOL(machine_id); + +unsigned long klimit = (unsigned long) _end; + +char cmd_line[COMMAND_LINE_SIZE]; + +/* + * This still seems to be needed... -- paulus + */ +struct screen_info screen_info = { + .orig_x = 0, + .orig_y = 25, + .orig_video_cols = 80, + .orig_video_lines = 25, + .orig_video_isVGA = 1, + .orig_video_points = 16 +}; + +/* Variables required to store legacy IO irq routing */ +int of_i8042_kbd_irq; +EXPORT_SYMBOL_GPL(of_i8042_kbd_irq); +int of_i8042_aux_irq; +EXPORT_SYMBOL_GPL(of_i8042_aux_irq); + +#ifdef __DO_IRQ_CANON +/* XXX should go elsewhere eventually */ +int ppc_do_canonicalize_irqs; +EXPORT_SYMBOL(ppc_do_canonicalize_irqs); +#endif + +/* also used by kexec */ +void machine_shutdown(void) +{ +#ifdef CONFIG_FA_DUMP + /* + * if fadump is active, cleanup the fadump registration before we + * shutdown. + */ + fadump_cleanup(); +#endif + + if (ppc_md.machine_shutdown) + ppc_md.machine_shutdown(); +} + +void machine_restart(char *cmd) +{ + machine_shutdown(); + if (ppc_md.restart) + ppc_md.restart(cmd); +#ifdef CONFIG_SMP + smp_send_stop(); +#endif + printk(KERN_EMERG "System Halted, OK to turn off power\n"); + local_irq_disable(); + while (1) ; +} + +void machine_power_off(void) +{ + machine_shutdown(); + if (ppc_md.power_off) + ppc_md.power_off(); +#ifdef CONFIG_SMP + smp_send_stop(); +#endif + printk(KERN_EMERG "System Halted, OK to turn off power\n"); + local_irq_disable(); + while (1) ; +} +/* Used by the G5 thermal driver */ +EXPORT_SYMBOL_GPL(machine_power_off); + +void (*pm_power_off)(void) = machine_power_off; +EXPORT_SYMBOL_GPL(pm_power_off); + +void machine_halt(void) +{ + machine_shutdown(); + if (ppc_md.halt) + ppc_md.halt(); +#ifdef CONFIG_SMP + smp_send_stop(); +#endif + printk(KERN_EMERG "System Halted, OK to turn off power\n"); + local_irq_disable(); + while (1) ; +} + + +#ifdef CONFIG_TAU +extern u32 cpu_temp(unsigned long cpu); +extern u32 cpu_temp_both(unsigned long cpu); +#endif /* CONFIG_TAU */ + +#ifdef CONFIG_SMP +DEFINE_PER_CPU(unsigned int, cpu_pvr); +#endif + +static void show_cpuinfo_summary(struct seq_file *m) +{ + struct device_node *root; + const char *model = NULL; +#if defined(CONFIG_SMP) && defined(CONFIG_PPC32) + unsigned long bogosum = 0; + int i; + for_each_online_cpu(i) + bogosum += loops_per_jiffy; + seq_printf(m, "total bogomips\t: %lu.%02lu\n", + bogosum/(500000/HZ), bogosum/(5000/HZ) % 100); +#endif /* CONFIG_SMP && CONFIG_PPC32 */ + seq_printf(m, "timebase\t: %lu\n", ppc_tb_freq); + if (ppc_md.name) + seq_printf(m, "platform\t: %s\n", ppc_md.name); + root = of_find_node_by_path("/"); + if (root) + model = of_get_property(root, "model", NULL); + if (model) + seq_printf(m, "model\t\t: %s\n", model); + of_node_put(root); + + if (ppc_md.show_cpuinfo != NULL) + ppc_md.show_cpuinfo(m); + +#ifdef CONFIG_PPC32 + /* Display the amount of memory */ + seq_printf(m, "Memory\t\t: %d MB\n", + (unsigned int)(total_memory / (1024 * 1024))); +#endif +} + +static int show_cpuinfo(struct seq_file *m, void *v) +{ + unsigned long cpu_id = (unsigned long)v - 1; + unsigned int pvr; + unsigned short maj; + unsigned short min; + + /* We only show online cpus: disable preempt (overzealous, I + * knew) to prevent cpu going down. */ + preempt_disable(); + if (!cpu_online(cpu_id)) { + preempt_enable(); + return 0; + } + +#ifdef CONFIG_SMP + pvr = per_cpu(cpu_pvr, cpu_id); +#else + pvr = mfspr(SPRN_PVR); +#endif + maj = (pvr >> 8) & 0xFF; + min = pvr & 0xFF; + + seq_printf(m, "processor\t: %lu\n", cpu_id); + seq_printf(m, "cpu\t\t: "); + + if (cur_cpu_spec->pvr_mask) + seq_printf(m, "%s", cur_cpu_spec->cpu_name); + else + seq_printf(m, "unknown (%08x)", pvr); + +#ifdef CONFIG_ALTIVEC + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + seq_printf(m, ", altivec supported"); +#endif /* CONFIG_ALTIVEC */ + + seq_printf(m, "\n"); + +#ifdef CONFIG_TAU + if (cur_cpu_spec->cpu_features & CPU_FTR_TAU) { +#ifdef CONFIG_TAU_AVERAGE + /* more straightforward, but potentially misleading */ + seq_printf(m, "temperature \t: %u C (uncalibrated)\n", + cpu_temp(cpu_id)); +#else + /* show the actual temp sensor range */ + u32 temp; + temp = cpu_temp_both(cpu_id); + seq_printf(m, "temperature \t: %u-%u C (uncalibrated)\n", + temp & 0xff, temp >> 16); +#endif + } +#endif /* CONFIG_TAU */ + + /* + * Assume here that all clock rates are the same in a + * smp system. -- Cort + */ + if (ppc_proc_freq) + seq_printf(m, "clock\t\t: %lu.%06luMHz\n", + ppc_proc_freq / 1000000, ppc_proc_freq % 1000000); + + if (ppc_md.show_percpuinfo != NULL) + ppc_md.show_percpuinfo(m, cpu_id); + + /* If we are a Freescale core do a simple check so + * we dont have to keep adding cases in the future */ + if (PVR_VER(pvr) & 0x8000) { + switch (PVR_VER(pvr)) { + case 0x8000: /* 7441/7450/7451, Voyager */ + case 0x8001: /* 7445/7455, Apollo 6 */ + case 0x8002: /* 7447/7457, Apollo 7 */ + case 0x8003: /* 7447A, Apollo 7 PM */ + case 0x8004: /* 7448, Apollo 8 */ + case 0x800c: /* 7410, Nitro */ + maj = ((pvr >> 8) & 0xF); + min = PVR_MIN(pvr); + break; + default: /* e500/book-e */ + maj = PVR_MAJ(pvr); + min = PVR_MIN(pvr); + break; + } + } else { + switch (PVR_VER(pvr)) { + case 0x0020: /* 403 family */ + maj = PVR_MAJ(pvr) + 1; + min = PVR_MIN(pvr); + break; + case 0x1008: /* 740P/750P ?? */ + maj = ((pvr >> 8) & 0xFF) - 1; + min = pvr & 0xFF; + break; + default: + maj = (pvr >> 8) & 0xFF; + min = pvr & 0xFF; + break; + } + } + + seq_printf(m, "revision\t: %hd.%hd (pvr %04x %04x)\n", + maj, min, PVR_VER(pvr), PVR_REV(pvr)); + +#ifdef CONFIG_PPC32 + seq_printf(m, "bogomips\t: %lu.%02lu\n", + loops_per_jiffy / (500000/HZ), + (loops_per_jiffy / (5000/HZ)) % 100); +#endif + +#ifdef CONFIG_SMP + seq_printf(m, "\n"); +#endif + + preempt_enable(); + + /* If this is the last cpu, print the summary */ + if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids) + show_cpuinfo_summary(m); + + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + if (*pos == 0) /* just in case, cpu 0 is not the first */ + *pos = cpumask_first(cpu_online_mask); + else + *pos = cpumask_next(*pos - 1, cpu_online_mask); + if ((*pos) < nr_cpu_ids) + return (void *)(unsigned long)(*pos + 1); + return NULL; +} + +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + (*pos)++; + return c_start(m, pos); +} + +static void c_stop(struct seq_file *m, void *v) +{ +} + +const struct seq_operations cpuinfo_op = { + .start =c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo, +}; + +void __init check_for_initrd(void) +{ +#ifdef CONFIG_BLK_DEV_INITRD + DBG(" -> check_for_initrd() initrd_start=0x%lx initrd_end=0x%lx\n", + initrd_start, initrd_end); + + /* If we were passed an initrd, set the ROOT_DEV properly if the values + * look sensible. If not, clear initrd reference. + */ + if (is_kernel_addr(initrd_start) && is_kernel_addr(initrd_end) && + initrd_end > initrd_start) + ROOT_DEV = Root_RAM0; + else + initrd_start = initrd_end = 0; + + if (initrd_start) + printk("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end); + + DBG(" <- check_for_initrd()\n"); +#endif /* CONFIG_BLK_DEV_INITRD */ +} + +#ifdef CONFIG_SMP + +int threads_per_core, threads_shift; +cpumask_t threads_core_mask; +EXPORT_SYMBOL_GPL(threads_per_core); +EXPORT_SYMBOL_GPL(threads_shift); +EXPORT_SYMBOL_GPL(threads_core_mask); + +static void __init cpu_init_thread_core_maps(int tpc) +{ + int i; + + threads_per_core = tpc; + cpumask_clear(&threads_core_mask); + + /* This implementation only supports power of 2 number of threads + * for simplicity and performance + */ + threads_shift = ilog2(tpc); + BUG_ON(tpc != (1 << threads_shift)); + + for (i = 0; i < tpc; i++) + cpumask_set_cpu(i, &threads_core_mask); + + printk(KERN_INFO "CPU maps initialized for %d thread%s per core\n", + tpc, tpc > 1 ? "s" : ""); + printk(KERN_DEBUG " (thread shift is %d)\n", threads_shift); +} + + +/** + * setup_cpu_maps - initialize the following cpu maps: + * cpu_possible_mask + * cpu_present_mask + * + * Having the possible map set up early allows us to restrict allocations + * of things like irqstacks to nr_cpu_ids rather than NR_CPUS. + * + * We do not initialize the online map here; cpus set their own bits in + * cpu_online_mask as they come up. + * + * This function is valid only for Open Firmware systems. finish_device_tree + * must be called before using this. + * + * While we're here, we may as well set the "physical" cpu ids in the paca. + * + * NOTE: This must match the parsing done in early_init_dt_scan_cpus. + */ +void __init smp_setup_cpu_maps(void) +{ + struct device_node *dn = NULL; + int cpu = 0; + int nthreads = 1; + + DBG("smp_setup_cpu_maps()\n"); + + while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < nr_cpu_ids) { + const int *intserv; + int j, len; + + DBG(" * %s...\n", dn->full_name); + + intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", + &len); + if (intserv) { + nthreads = len / sizeof(int); + DBG(" ibm,ppc-interrupt-server#s -> %d threads\n", + nthreads); + } else { + DBG(" no ibm,ppc-interrupt-server#s -> 1 thread\n"); + intserv = of_get_property(dn, "reg", NULL); + if (!intserv) + intserv = &cpu; /* assume logical == phys */ + } + + for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) { + DBG(" thread %d -> cpu %d (hard id %d)\n", + j, cpu, intserv[j]); + set_cpu_present(cpu, true); + set_hard_smp_processor_id(cpu, intserv[j]); + set_cpu_possible(cpu, true); + cpu++; + } + } + + /* If no SMT supported, nthreads is forced to 1 */ + if (!cpu_has_feature(CPU_FTR_SMT)) { + DBG(" SMT disabled ! nthreads forced to 1\n"); + nthreads = 1; + } + +#ifdef CONFIG_PPC64 + /* + * On pSeries LPAR, we need to know how many cpus + * could possibly be added to this partition. + */ + if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR) && + (dn = of_find_node_by_path("/rtas"))) { + int num_addr_cell, num_size_cell, maxcpus; + const unsigned int *ireg; + + num_addr_cell = of_n_addr_cells(dn); + num_size_cell = of_n_size_cells(dn); + + ireg = of_get_property(dn, "ibm,lrdr-capacity", NULL); + + if (!ireg) + goto out; + + maxcpus = ireg[num_addr_cell + num_size_cell]; + + /* Double maxcpus for processors which have SMT capability */ + if (cpu_has_feature(CPU_FTR_SMT)) + maxcpus *= nthreads; + + if (maxcpus > nr_cpu_ids) { + printk(KERN_WARNING + "Partition configured for %d cpus, " + "operating system maximum is %d.\n", + maxcpus, nr_cpu_ids); + maxcpus = nr_cpu_ids; + } else + printk(KERN_INFO "Partition configured for %d cpus.\n", + maxcpus); + + for (cpu = 0; cpu < maxcpus; cpu++) + set_cpu_possible(cpu, true); + out: + of_node_put(dn); + } + vdso_data->processorCount = num_present_cpus(); +#endif /* CONFIG_PPC64 */ + + /* Initialize CPU <=> thread mapping/ + * + * WARNING: We assume that the number of threads is the same for + * every CPU in the system. If that is not the case, then some code + * here will have to be reworked + */ + cpu_init_thread_core_maps(nthreads); + + /* Now that possible cpus are set, set nr_cpu_ids for later use */ + setup_nr_cpu_ids(); + + free_unused_pacas(); +} +#endif /* CONFIG_SMP */ + +#ifdef CONFIG_PCSPKR_PLATFORM +static __init int add_pcspkr(void) +{ + struct device_node *np; + struct platform_device *pd; + int ret; + + np = of_find_compatible_node(NULL, NULL, "pnpPNP,100"); + of_node_put(np); + if (!np) + return -ENODEV; + + pd = platform_device_alloc("pcspkr", -1); + if (!pd) + return -ENOMEM; + + ret = platform_device_add(pd); + if (ret) + platform_device_put(pd); + + return ret; +} +device_initcall(add_pcspkr); +#endif /* CONFIG_PCSPKR_PLATFORM */ + +void probe_machine(void) +{ + extern struct machdep_calls __machine_desc_start; + extern struct machdep_calls __machine_desc_end; + + /* + * Iterate all ppc_md structures until we find the proper + * one for the current machine type + */ + DBG("Probing machine type ...\n"); + + for (machine_id = &__machine_desc_start; + machine_id < &__machine_desc_end; + machine_id++) { + DBG(" %s ...", machine_id->name); + memcpy(&ppc_md, machine_id, sizeof(struct machdep_calls)); + if (ppc_md.probe()) { + DBG(" match !\n"); + break; + } + DBG("\n"); + } + /* What can we do if we didn't find ? */ + if (machine_id >= &__machine_desc_end) { + DBG("No suitable machine found !\n"); + for (;;); + } + + printk(KERN_INFO "Using %s machine description\n", ppc_md.name); +} + +/* Match a class of boards, not a specific device configuration. */ +int check_legacy_ioport(unsigned long base_port) +{ + struct device_node *parent, *np = NULL; + int ret = -ENODEV; + + switch(base_port) { + case I8042_DATA_REG: + if (!(np = of_find_compatible_node(NULL, NULL, "pnpPNP,303"))) + np = of_find_compatible_node(NULL, NULL, "pnpPNP,f03"); + if (np) { + parent = of_get_parent(np); + + of_i8042_kbd_irq = irq_of_parse_and_map(parent, 0); + if (!of_i8042_kbd_irq) + of_i8042_kbd_irq = 1; + + of_i8042_aux_irq = irq_of_parse_and_map(parent, 1); + if (!of_i8042_aux_irq) + of_i8042_aux_irq = 12; + + of_node_put(np); + np = parent; + break; + } + np = of_find_node_by_type(NULL, "8042"); + /* Pegasos has no device_type on its 8042 node, look for the + * name instead */ + if (!np) + np = of_find_node_by_name(NULL, "8042"); + if (np) { + of_i8042_kbd_irq = 1; + of_i8042_aux_irq = 12; + } + break; + case FDC_BASE: /* FDC1 */ + np = of_find_node_by_type(NULL, "fdc"); + break; +#ifdef CONFIG_PPC_PREP + case _PIDXR: + case _PNPWRP: + case PNPBIOS_BASE: + /* implement me */ +#endif + default: + /* ipmi is supposed to fail here */ + break; + } + if (!np) + return ret; + parent = of_get_parent(np); + if (parent) { + if (strcmp(parent->type, "isa") == 0) + ret = 0; + of_node_put(parent); + } + of_node_put(np); + return ret; +} +EXPORT_SYMBOL(check_legacy_ioport); + +static int ppc_panic_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + /* + * If firmware-assisted dump has been registered then trigger + * firmware-assisted dump and let firmware handle everything else. + */ + crash_fadump(NULL, ptr); + ppc_md.panic(ptr); /* May not return */ + return NOTIFY_DONE; +} + +static struct notifier_block ppc_panic_block = { + .notifier_call = ppc_panic_event, + .priority = INT_MIN /* may not return; must be done last */ +}; + +void __init setup_panic(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, &ppc_panic_block); +} + +#ifdef CONFIG_CHECK_CACHE_COHERENCY +/* + * For platforms that have configurable cache-coherency. This function + * checks that the cache coherency setting of the kernel matches the setting + * left by the firmware, as indicated in the device tree. Since a mismatch + * will eventually result in DMA failures, we print * and error and call + * BUG() in that case. + */ + +#ifdef CONFIG_NOT_COHERENT_CACHE +#define KERNEL_COHERENCY 0 +#else +#define KERNEL_COHERENCY 1 +#endif + +static int __init check_cache_coherency(void) +{ + struct device_node *np; + const void *prop; + int devtree_coherency; + + np = of_find_node_by_path("/"); + prop = of_get_property(np, "coherency-off", NULL); + of_node_put(np); + + devtree_coherency = prop ? 0 : 1; + + if (devtree_coherency != KERNEL_COHERENCY) { + printk(KERN_ERR + "kernel coherency:%s != device tree_coherency:%s\n", + KERNEL_COHERENCY ? "on" : "off", + devtree_coherency ? "on" : "off"); + BUG(); + } + + return 0; +} + +late_initcall(check_cache_coherency); +#endif /* CONFIG_CHECK_CACHE_COHERENCY */ + +#ifdef CONFIG_DEBUG_FS +struct dentry *powerpc_debugfs_root; +EXPORT_SYMBOL(powerpc_debugfs_root); + +static int powerpc_debugfs_init(void) +{ + powerpc_debugfs_root = debugfs_create_dir("powerpc", NULL); + + return powerpc_debugfs_root == NULL; +} +arch_initcall(powerpc_debugfs_init); +#endif + +void ppc_printk_progress(char *s, unsigned short hex) +{ + pr_info("%s\n", s); +} + +void arch_setup_pdev_archdata(struct platform_device *pdev) +{ + pdev->archdata.dma_mask = DMA_BIT_MASK(32); + pdev->dev.dma_mask = &pdev->archdata.dma_mask; + set_dma_ops(&pdev->dev, &dma_direct_ops); +} diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h new file mode 100644 index 00000000..4c67ad7f --- /dev/null +++ b/arch/powerpc/kernel/setup.h @@ -0,0 +1,9 @@ +#ifndef _POWERPC_KERNEL_SETUP_H +#define _POWERPC_KERNEL_SETUP_H + +void check_for_initrd(void); +void do_init_bootmem(void); +void setup_panic(void); +extern int do_early_xmon; + +#endif /* _POWERPC_KERNEL_SETUP_H */ diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c new file mode 100644 index 00000000..ec8a53fa --- /dev/null +++ b/arch/powerpc/kernel/setup_32.c @@ -0,0 +1,354 @@ +/* + * Common prep/pmac/chrp boot and setup code. + */ + +#include <linux/module.h> +#include <linux/string.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/reboot.h> +#include <linux/delay.h> +#include <linux/initrd.h> +#include <linux/tty.h> +#include <linux/bootmem.h> +#include <linux/seq_file.h> +#include <linux/root_dev.h> +#include <linux/cpu.h> +#include <linux/console.h> +#include <linux/memblock.h> + +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/processor.h> +#include <asm/pgtable.h> +#include <asm/setup.h> +#include <asm/smp.h> +#include <asm/elf.h> +#include <asm/cputable.h> +#include <asm/bootx.h> +#include <asm/btext.h> +#include <asm/machdep.h> +#include <asm/uaccess.h> +#include <asm/pmac_feature.h> +#include <asm/sections.h> +#include <asm/nvram.h> +#include <asm/xmon.h> +#include <asm/time.h> +#include <asm/serial.h> +#include <asm/udbg.h> +#include <asm/mmu_context.h> + +#include "setup.h" + +#define DBG(fmt...) + +extern void bootx_init(unsigned long r4, unsigned long phys); + +int boot_cpuid = -1; +EXPORT_SYMBOL_GPL(boot_cpuid); +int boot_cpuid_phys; +EXPORT_SYMBOL_GPL(boot_cpuid_phys); + +int smp_hw_index[NR_CPUS]; + +unsigned long ISA_DMA_THRESHOLD; +unsigned int DMA_MODE_READ; +unsigned int DMA_MODE_WRITE; + +#ifdef CONFIG_VGA_CONSOLE +unsigned long vgacon_remap_base; +EXPORT_SYMBOL(vgacon_remap_base); +#endif + +/* + * These are used in binfmt_elf.c to put aux entries on the stack + * for each elf executable being started. + */ +int dcache_bsize; +int icache_bsize; +int ucache_bsize; + +/* + * We're called here very early in the boot. We determine the machine + * type and call the appropriate low-level setup functions. + * -- Cort <cort@fsmlabs.com> + * + * Note that the kernel may be running at an address which is different + * from the address that it was linked at, so we must use RELOC/PTRRELOC + * to access static data (including strings). -- paulus + */ +notrace unsigned long __init early_init(unsigned long dt_ptr) +{ + unsigned long offset = reloc_offset(); + struct cpu_spec *spec; + + /* First zero the BSS -- use memset_io, some platforms don't have + * caches on yet */ + memset_io((void __iomem *)PTRRELOC(&__bss_start), 0, + __bss_stop - __bss_start); + + /* + * Identify the CPU type and fix up code sections + * that depend on which cpu we have. + */ + spec = identify_cpu(offset, mfspr(SPRN_PVR)); + + do_feature_fixups(spec->cpu_features, + PTRRELOC(&__start___ftr_fixup), + PTRRELOC(&__stop___ftr_fixup)); + + do_feature_fixups(spec->mmu_features, + PTRRELOC(&__start___mmu_ftr_fixup), + PTRRELOC(&__stop___mmu_ftr_fixup)); + + do_lwsync_fixups(spec->cpu_features, + PTRRELOC(&__start___lwsync_fixup), + PTRRELOC(&__stop___lwsync_fixup)); + + do_final_fixups(); + + return KERNELBASE + offset; +} + + +/* + * Find out what kind of machine we're on and save any data we need + * from the early boot process (devtree is copied on pmac by prom_init()). + * This is called very early on the boot process, after a minimal + * MMU environment has been set up but before MMU_init is called. + */ +notrace void __init machine_init(u64 dt_ptr) +{ + lockdep_init(); + + /* Enable early debugging if any specified (see udbg.h) */ + udbg_early_init(); + + /* Do some early initialization based on the flat device tree */ + early_init_devtree(__va(dt_ptr)); + + early_init_mmu(); + + probe_machine(); + + setup_kdump_trampoline(); + +#ifdef CONFIG_6xx + if (cpu_has_feature(CPU_FTR_CAN_DOZE) || + cpu_has_feature(CPU_FTR_CAN_NAP)) + ppc_md.power_save = ppc6xx_idle; +#endif + +#ifdef CONFIG_E500 + if (cpu_has_feature(CPU_FTR_CAN_DOZE) || + cpu_has_feature(CPU_FTR_CAN_NAP)) + ppc_md.power_save = e500_idle; +#endif + if (ppc_md.progress) + ppc_md.progress("id mach(): done", 0x200); +} + +#ifdef CONFIG_BOOKE_WDT +extern u32 booke_wdt_enabled; +extern u32 booke_wdt_period; + +/* Checks wdt=x and wdt_period=xx command-line option */ +notrace int __init early_parse_wdt(char *p) +{ + if (p && strncmp(p, "0", 1) != 0) + booke_wdt_enabled = 1; + + return 0; +} +early_param("wdt", early_parse_wdt); + +int __init early_parse_wdt_period (char *p) +{ + if (p) + booke_wdt_period = simple_strtoul(p, NULL, 0); + + return 0; +} +early_param("wdt_period", early_parse_wdt_period); +#endif /* CONFIG_BOOKE_WDT */ + +/* Checks "l2cr=xxxx" command-line option */ +int __init ppc_setup_l2cr(char *str) +{ + if (cpu_has_feature(CPU_FTR_L2CR)) { + unsigned long val = simple_strtoul(str, NULL, 0); + printk(KERN_INFO "l2cr set to %lx\n", val); + _set_L2CR(0); /* force invalidate by disable cache */ + _set_L2CR(val); /* and enable it */ + } + return 1; +} +__setup("l2cr=", ppc_setup_l2cr); + +/* Checks "l3cr=xxxx" command-line option */ +int __init ppc_setup_l3cr(char *str) +{ + if (cpu_has_feature(CPU_FTR_L3CR)) { + unsigned long val = simple_strtoul(str, NULL, 0); + printk(KERN_INFO "l3cr set to %lx\n", val); + _set_L3CR(val); /* and enable it */ + } + return 1; +} +__setup("l3cr=", ppc_setup_l3cr); + +#ifdef CONFIG_GENERIC_NVRAM + +/* Generic nvram hooks used by drivers/char/gen_nvram.c */ +unsigned char nvram_read_byte(int addr) +{ + if (ppc_md.nvram_read_val) + return ppc_md.nvram_read_val(addr); + return 0xff; +} +EXPORT_SYMBOL(nvram_read_byte); + +void nvram_write_byte(unsigned char val, int addr) +{ + if (ppc_md.nvram_write_val) + ppc_md.nvram_write_val(addr, val); +} +EXPORT_SYMBOL(nvram_write_byte); + +ssize_t nvram_get_size(void) +{ + if (ppc_md.nvram_size) + return ppc_md.nvram_size(); + return -1; +} +EXPORT_SYMBOL(nvram_get_size); + +void nvram_sync(void) +{ + if (ppc_md.nvram_sync) + ppc_md.nvram_sync(); +} +EXPORT_SYMBOL(nvram_sync); + +#endif /* CONFIG_NVRAM */ + +int __init ppc_init(void) +{ + /* clear the progress line */ + if (ppc_md.progress) + ppc_md.progress(" ", 0xffff); + + /* call platform init */ + if (ppc_md.init != NULL) { + ppc_md.init(); + } + return 0; +} + +arch_initcall(ppc_init); + +static void __init irqstack_early_init(void) +{ + unsigned int i; + + /* interrupt stacks must be in lowmem, we get that for free on ppc32 + * as the memblock is limited to lowmem by default */ + for_each_possible_cpu(i) { + softirq_ctx[i] = (struct thread_info *) + __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); + hardirq_ctx[i] = (struct thread_info *) + __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); + } +} + +#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +static void __init exc_lvl_early_init(void) +{ + unsigned int i, hw_cpu; + + /* interrupt stacks must be in lowmem, we get that for free on ppc32 + * as the memblock is limited to lowmem by MEMBLOCK_REAL_LIMIT */ + for_each_possible_cpu(i) { + hw_cpu = get_hard_smp_processor_id(i); + critirq_ctx[hw_cpu] = (struct thread_info *) + __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); +#ifdef CONFIG_BOOKE + dbgirq_ctx[hw_cpu] = (struct thread_info *) + __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); + mcheckirq_ctx[hw_cpu] = (struct thread_info *) + __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); +#endif + } +} +#else +#define exc_lvl_early_init() +#endif + +/* Warning, IO base is not yet inited */ +void __init setup_arch(char **cmdline_p) +{ + *cmdline_p = cmd_line; + + /* so udelay does something sensible, assume <= 1000 bogomips */ + loops_per_jiffy = 500000000 / HZ; + + unflatten_device_tree(); + check_for_initrd(); + + if (ppc_md.init_early) + ppc_md.init_early(); + + find_legacy_serial_ports(); + + smp_setup_cpu_maps(); + + /* Register early console */ + register_early_udbg_console(); + + xmon_setup(); + + /* + * Set cache line size based on type of cpu as a default. + * Systems with OF can look in the properties on the cpu node(s) + * for a possibly more accurate value. + */ + dcache_bsize = cur_cpu_spec->dcache_bsize; + icache_bsize = cur_cpu_spec->icache_bsize; + ucache_bsize = 0; + if (cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE)) + ucache_bsize = icache_bsize = dcache_bsize; + + /* reboot on panic */ + panic_timeout = 180; + + if (ppc_md.panic) + setup_panic(); + + init_mm.start_code = (unsigned long)_stext; + init_mm.end_code = (unsigned long) _etext; + init_mm.end_data = (unsigned long) _edata; + init_mm.brk = klimit; + + exc_lvl_early_init(); + + irqstack_early_init(); + + /* set up the bootmem stuff with available memory */ + do_init_bootmem(); + if ( ppc_md.progress ) ppc_md.progress("setup_arch: bootmem", 0x3eab); + +#ifdef CONFIG_DUMMY_CONSOLE + conswitchp = &dummy_con; +#endif + + if (ppc_md.setup_arch) + ppc_md.setup_arch(); + if ( ppc_md.progress ) ppc_md.progress("arch: exit", 0x3eab); + + paging_init(); + + /* Initialize the MMU context management stuff */ + mmu_context_init(); + +} diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c new file mode 100644 index 00000000..389bd4f0 --- /dev/null +++ b/arch/powerpc/kernel/setup_64.c @@ -0,0 +1,691 @@ +/* + * + * Common boot and setup code. + * + * Copyright (C) 2001 PPC64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG + +#include <linux/export.h> +#include <linux/string.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/reboot.h> +#include <linux/delay.h> +#include <linux/initrd.h> +#include <linux/seq_file.h> +#include <linux/ioport.h> +#include <linux/console.h> +#include <linux/utsname.h> +#include <linux/tty.h> +#include <linux/root_dev.h> +#include <linux/notifier.h> +#include <linux/cpu.h> +#include <linux/unistd.h> +#include <linux/serial.h> +#include <linux/serial_8250.h> +#include <linux/bootmem.h> +#include <linux/pci.h> +#include <linux/lockdep.h> +#include <linux/memblock.h> +#include <linux/hugetlb.h> + +#include <asm/io.h> +#include <asm/kdump.h> +#include <asm/prom.h> +#include <asm/processor.h> +#include <asm/pgtable.h> +#include <asm/smp.h> +#include <asm/elf.h> +#include <asm/machdep.h> +#include <asm/paca.h> +#include <asm/time.h> +#include <asm/cputable.h> +#include <asm/sections.h> +#include <asm/btext.h> +#include <asm/nvram.h> +#include <asm/setup.h> +#include <asm/rtas.h> +#include <asm/iommu.h> +#include <asm/serial.h> +#include <asm/cache.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/firmware.h> +#include <asm/xmon.h> +#include <asm/udbg.h> +#include <asm/kexec.h> +#include <asm/mmu_context.h> +#include <asm/code-patching.h> +#include <asm/kvm_ppc.h> +#include <asm/hugetlb.h> + +#include "setup.h" + +#ifdef DEBUG +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +int boot_cpuid = 0; +int __initdata spinning_secondaries; +u64 ppc64_pft_size; + +/* Pick defaults since we might want to patch instructions + * before we've read this from the device tree. + */ +struct ppc64_caches ppc64_caches = { + .dline_size = 0x40, + .log_dline_size = 6, + .iline_size = 0x40, + .log_iline_size = 6 +}; +EXPORT_SYMBOL_GPL(ppc64_caches); + +/* + * These are used in binfmt_elf.c to put aux entries on the stack + * for each elf executable being started. + */ +int dcache_bsize; +int icache_bsize; +int ucache_bsize; + +#ifdef CONFIG_SMP + +static char *smt_enabled_cmdline; + +/* Look for ibm,smt-enabled OF option */ +static void check_smt_enabled(void) +{ + struct device_node *dn; + const char *smt_option; + + /* Default to enabling all threads */ + smt_enabled_at_boot = threads_per_core; + + /* Allow the command line to overrule the OF option */ + if (smt_enabled_cmdline) { + if (!strcmp(smt_enabled_cmdline, "on")) + smt_enabled_at_boot = threads_per_core; + else if (!strcmp(smt_enabled_cmdline, "off")) + smt_enabled_at_boot = 0; + else { + long smt; + int rc; + + rc = strict_strtol(smt_enabled_cmdline, 10, &smt); + if (!rc) + smt_enabled_at_boot = + min(threads_per_core, (int)smt); + } + } else { + dn = of_find_node_by_path("/options"); + if (dn) { + smt_option = of_get_property(dn, "ibm,smt-enabled", + NULL); + + if (smt_option) { + if (!strcmp(smt_option, "on")) + smt_enabled_at_boot = threads_per_core; + else if (!strcmp(smt_option, "off")) + smt_enabled_at_boot = 0; + } + + of_node_put(dn); + } + } +} + +/* Look for smt-enabled= cmdline option */ +static int __init early_smt_enabled(char *p) +{ + smt_enabled_cmdline = p; + return 0; +} +early_param("smt-enabled", early_smt_enabled); + +#else +#define check_smt_enabled() +#endif /* CONFIG_SMP */ + +/* + * Early initialization entry point. This is called by head.S + * with MMU translation disabled. We rely on the "feature" of + * the CPU that ignores the top 2 bits of the address in real + * mode so we can access kernel globals normally provided we + * only toy with things in the RMO region. From here, we do + * some early parsing of the device-tree to setup out MEMBLOCK + * data structures, and allocate & initialize the hash table + * and segment tables so we can start running with translation + * enabled. + * + * It is this function which will call the probe() callback of + * the various platform types and copy the matching one to the + * global ppc_md structure. Your platform can eventually do + * some very early initializations from the probe() routine, but + * this is not recommended, be very careful as, for example, the + * device-tree is not accessible via normal means at this point. + */ + +void __init early_setup(unsigned long dt_ptr) +{ + /* -------- printk is _NOT_ safe to use here ! ------- */ + + /* Identify CPU type */ + identify_cpu(0, mfspr(SPRN_PVR)); + + /* Assume we're on cpu 0 for now. Don't write to the paca yet! */ + initialise_paca(&boot_paca, 0); + setup_paca(&boot_paca); + + /* Initialize lockdep early or else spinlocks will blow */ + lockdep_init(); + + /* -------- printk is now safe to use ------- */ + + /* Enable early debugging if any specified (see udbg.h) */ + udbg_early_init(); + + DBG(" -> early_setup(), dt_ptr: 0x%lx\n", dt_ptr); + + /* + * Do early initialization using the flattened device + * tree, such as retrieving the physical memory map or + * calculating/retrieving the hash table size. + */ + early_init_devtree(__va(dt_ptr)); + + /* Now we know the logical id of our boot cpu, setup the paca. */ + setup_paca(&paca[boot_cpuid]); + + /* Fix up paca fields required for the boot cpu */ + get_paca()->cpu_start = 1; + + /* Probe the machine type */ + probe_machine(); + + setup_kdump_trampoline(); + + DBG("Found, Initializing memory management...\n"); + + /* Initialize the hash table or TLB handling */ + early_init_mmu(); + + /* + * Reserve any gigantic pages requested on the command line. + * memblock needs to have been initialized by the time this is + * called since this will reserve memory. + */ + reserve_hugetlb_gpages(); + + DBG(" <- early_setup()\n"); +} + +#ifdef CONFIG_SMP +void early_setup_secondary(void) +{ + /* Mark interrupts enabled in PACA */ + get_paca()->soft_enabled = 0; + + /* Initialize the hash table or TLB handling */ + early_init_mmu_secondary(); +} + +#endif /* CONFIG_SMP */ + +#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC) +void smp_release_cpus(void) +{ + unsigned long *ptr; + int i; + + DBG(" -> smp_release_cpus()\n"); + + /* All secondary cpus are spinning on a common spinloop, release them + * all now so they can start to spin on their individual paca + * spinloops. For non SMP kernels, the secondary cpus never get out + * of the common spinloop. + */ + + ptr = (unsigned long *)((unsigned long)&__secondary_hold_spinloop + - PHYSICAL_START); + *ptr = __pa(generic_secondary_smp_init); + + /* And wait a bit for them to catch up */ + for (i = 0; i < 100000; i++) { + mb(); + HMT_low(); + if (spinning_secondaries == 0) + break; + udelay(1); + } + DBG("spinning_secondaries = %d\n", spinning_secondaries); + + DBG(" <- smp_release_cpus()\n"); +} +#endif /* CONFIG_SMP || CONFIG_KEXEC */ + +/* + * Initialize some remaining members of the ppc64_caches and systemcfg + * structures + * (at least until we get rid of them completely). This is mostly some + * cache informations about the CPU that will be used by cache flush + * routines and/or provided to userland + */ +static void __init initialize_cache_info(void) +{ + struct device_node *np; + unsigned long num_cpus = 0; + + DBG(" -> initialize_cache_info()\n"); + + for_each_node_by_type(np, "cpu") { + num_cpus += 1; + + /* + * We're assuming *all* of the CPUs have the same + * d-cache and i-cache sizes... -Peter + */ + if (num_cpus == 1) { + const u32 *sizep, *lsizep; + u32 size, lsize; + + size = 0; + lsize = cur_cpu_spec->dcache_bsize; + sizep = of_get_property(np, "d-cache-size", NULL); + if (sizep != NULL) + size = *sizep; + lsizep = of_get_property(np, "d-cache-block-size", + NULL); + /* fallback if block size missing */ + if (lsizep == NULL) + lsizep = of_get_property(np, + "d-cache-line-size", + NULL); + if (lsizep != NULL) + lsize = *lsizep; + if (sizep == 0 || lsizep == 0) + DBG("Argh, can't find dcache properties ! " + "sizep: %p, lsizep: %p\n", sizep, lsizep); + + ppc64_caches.dsize = size; + ppc64_caches.dline_size = lsize; + ppc64_caches.log_dline_size = __ilog2(lsize); + ppc64_caches.dlines_per_page = PAGE_SIZE / lsize; + + size = 0; + lsize = cur_cpu_spec->icache_bsize; + sizep = of_get_property(np, "i-cache-size", NULL); + if (sizep != NULL) + size = *sizep; + lsizep = of_get_property(np, "i-cache-block-size", + NULL); + if (lsizep == NULL) + lsizep = of_get_property(np, + "i-cache-line-size", + NULL); + if (lsizep != NULL) + lsize = *lsizep; + if (sizep == 0 || lsizep == 0) + DBG("Argh, can't find icache properties ! " + "sizep: %p, lsizep: %p\n", sizep, lsizep); + + ppc64_caches.isize = size; + ppc64_caches.iline_size = lsize; + ppc64_caches.log_iline_size = __ilog2(lsize); + ppc64_caches.ilines_per_page = PAGE_SIZE / lsize; + } + } + + DBG(" <- initialize_cache_info()\n"); +} + + +/* + * Do some initial setup of the system. The parameters are those which + * were passed in from the bootloader. + */ +void __init setup_system(void) +{ + DBG(" -> setup_system()\n"); + + /* Apply the CPUs-specific and firmware specific fixups to kernel + * text (nop out sections not relevant to this CPU or this firmware) + */ + do_feature_fixups(cur_cpu_spec->cpu_features, + &__start___ftr_fixup, &__stop___ftr_fixup); + do_feature_fixups(cur_cpu_spec->mmu_features, + &__start___mmu_ftr_fixup, &__stop___mmu_ftr_fixup); + do_feature_fixups(powerpc_firmware_features, + &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup); + do_lwsync_fixups(cur_cpu_spec->cpu_features, + &__start___lwsync_fixup, &__stop___lwsync_fixup); + do_final_fixups(); + + /* + * Unflatten the device-tree passed by prom_init or kexec + */ + unflatten_device_tree(); + + /* + * Fill the ppc64_caches & systemcfg structures with informations + * retrieved from the device-tree. + */ + initialize_cache_info(); + +#ifdef CONFIG_PPC_RTAS + /* + * Initialize RTAS if available + */ + rtas_initialize(); +#endif /* CONFIG_PPC_RTAS */ + + /* + * Check if we have an initrd provided via the device-tree + */ + check_for_initrd(); + + /* + * Do some platform specific early initializations, that includes + * setting up the hash table pointers. It also sets up some interrupt-mapping + * related options that will be used by finish_device_tree() + */ + if (ppc_md.init_early) + ppc_md.init_early(); + + /* + * We can discover serial ports now since the above did setup the + * hash table management for us, thus ioremap works. We do that early + * so that further code can be debugged + */ + find_legacy_serial_ports(); + + /* + * Register early console + */ + register_early_udbg_console(); + + /* + * Initialize xmon + */ + xmon_setup(); + + smp_setup_cpu_maps(); + check_smt_enabled(); + +#ifdef CONFIG_SMP + /* Release secondary cpus out of their spinloops at 0x60 now that + * we can map physical -> logical CPU ids + */ + smp_release_cpus(); +#endif + + printk("Starting Linux PPC64 %s\n", init_utsname()->version); + + printk("-----------------------------------------------------\n"); + printk("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); + printk("physicalMemorySize = 0x%llx\n", memblock_phys_mem_size()); + if (ppc64_caches.dline_size != 0x80) + printk("ppc64_caches.dcache_line_size = 0x%x\n", + ppc64_caches.dline_size); + if (ppc64_caches.iline_size != 0x80) + printk("ppc64_caches.icache_line_size = 0x%x\n", + ppc64_caches.iline_size); +#ifdef CONFIG_PPC_STD_MMU_64 + if (htab_address) + printk("htab_address = 0x%p\n", htab_address); + printk("htab_hash_mask = 0x%lx\n", htab_hash_mask); +#endif /* CONFIG_PPC_STD_MMU_64 */ + if (PHYSICAL_START > 0) + printk("physical_start = 0x%llx\n", + (unsigned long long)PHYSICAL_START); + printk("-----------------------------------------------------\n"); + + DBG(" <- setup_system()\n"); +} + +/* This returns the limit below which memory accesses to the linear + * mapping are guarnateed not to cause a TLB or SLB miss. This is + * used to allocate interrupt or emergency stacks for which our + * exception entry path doesn't deal with being interrupted. + */ +static u64 safe_stack_limit(void) +{ +#ifdef CONFIG_PPC_BOOK3E + /* Freescale BookE bolts the entire linear mapping */ + if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) + return linear_map_top; + /* Other BookE, we assume the first GB is bolted */ + return 1ul << 30; +#else + /* BookS, the first segment is bolted */ + if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) + return 1UL << SID_SHIFT_1T; + return 1UL << SID_SHIFT; +#endif +} + +static void __init irqstack_early_init(void) +{ + u64 limit = safe_stack_limit(); + unsigned int i; + + /* + * Interrupt stacks must be in the first segment since we + * cannot afford to take SLB misses on them. + */ + for_each_possible_cpu(i) { + softirq_ctx[i] = (struct thread_info *) + __va(memblock_alloc_base(THREAD_SIZE, + THREAD_SIZE, limit)); + hardirq_ctx[i] = (struct thread_info *) + __va(memblock_alloc_base(THREAD_SIZE, + THREAD_SIZE, limit)); + } +} + +#ifdef CONFIG_PPC_BOOK3E +static void __init exc_lvl_early_init(void) +{ + extern unsigned int interrupt_base_book3e; + extern unsigned int exc_debug_debug_book3e; + + unsigned int i; + + for_each_possible_cpu(i) { + critirq_ctx[i] = (struct thread_info *) + __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); + dbgirq_ctx[i] = (struct thread_info *) + __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); + mcheckirq_ctx[i] = (struct thread_info *) + __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); + } + + if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) + patch_branch(&interrupt_base_book3e + (0x040 / 4) + 1, + (unsigned long)&exc_debug_debug_book3e, 0); +} +#else +#define exc_lvl_early_init() +#endif + +/* + * Stack space used when we detect a bad kernel stack pointer, and + * early in SMP boots before relocation is enabled. + */ +static void __init emergency_stack_init(void) +{ + u64 limit; + unsigned int i; + + /* + * Emergency stacks must be under 256MB, we cannot afford to take + * SLB misses on them. The ABI also requires them to be 128-byte + * aligned. + * + * Since we use these as temporary stacks during secondary CPU + * bringup, we need to get at them in real mode. This means they + * must also be within the RMO region. + */ + limit = min(safe_stack_limit(), ppc64_rma_size); + + for_each_possible_cpu(i) { + unsigned long sp; + sp = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit); + sp += THREAD_SIZE; + paca[i].emergency_sp = __va(sp); + } +} + +/* + * Called into from start_kernel this initializes bootmem, which is used + * to manage page allocation until mem_init is called. + */ +void __init setup_arch(char **cmdline_p) +{ + ppc64_boot_msg(0x12, "Setup Arch"); + + *cmdline_p = cmd_line; + + /* + * Set cache line size based on type of cpu as a default. + * Systems with OF can look in the properties on the cpu node(s) + * for a possibly more accurate value. + */ + dcache_bsize = ppc64_caches.dline_size; + icache_bsize = ppc64_caches.iline_size; + + /* reboot on panic */ + panic_timeout = 180; + + if (ppc_md.panic) + setup_panic(); + + init_mm.start_code = (unsigned long)_stext; + init_mm.end_code = (unsigned long) _etext; + init_mm.end_data = (unsigned long) _edata; + init_mm.brk = klimit; + + irqstack_early_init(); + exc_lvl_early_init(); + emergency_stack_init(); + +#ifdef CONFIG_PPC_STD_MMU_64 + stabs_alloc(); +#endif + /* set up the bootmem stuff with available memory */ + do_init_bootmem(); + sparse_init(); + +#ifdef CONFIG_DUMMY_CONSOLE + conswitchp = &dummy_con; +#endif + + if (ppc_md.setup_arch) + ppc_md.setup_arch(); + + paging_init(); + + /* Initialize the MMU context management stuff */ + mmu_context_init(); + + kvm_linear_init(); + + ppc64_boot_msg(0x15, "Setup Done"); +} + + +/* ToDo: do something useful if ppc_md is not yet setup. */ +#define PPC64_LINUX_FUNCTION 0x0f000000 +#define PPC64_IPL_MESSAGE 0xc0000000 +#define PPC64_TERM_MESSAGE 0xb0000000 + +static void ppc64_do_msg(unsigned int src, const char *msg) +{ + if (ppc_md.progress) { + char buf[128]; + + sprintf(buf, "%08X\n", src); + ppc_md.progress(buf, 0); + snprintf(buf, 128, "%s", msg); + ppc_md.progress(buf, 0); + } +} + +/* Print a boot progress message. */ +void ppc64_boot_msg(unsigned int src, const char *msg) +{ + ppc64_do_msg(PPC64_LINUX_FUNCTION|PPC64_IPL_MESSAGE|src, msg); + printk("[boot]%04x %s\n", src, msg); +} + +#ifdef CONFIG_SMP +#define PCPU_DYN_SIZE () + +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) +{ + return __alloc_bootmem_node(NODE_DATA(cpu_to_node(cpu)), size, align, + __pa(MAX_DMA_ADDRESS)); +} + +static void __init pcpu_fc_free(void *ptr, size_t size) +{ + free_bootmem(__pa(ptr), size); +} + +static int pcpu_cpu_distance(unsigned int from, unsigned int to) +{ + if (cpu_to_node(from) == cpu_to_node(to)) + return LOCAL_DISTANCE; + else + return REMOTE_DISTANCE; +} + +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(__per_cpu_offset); + +void __init setup_per_cpu_areas(void) +{ + const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; + size_t atom_size; + unsigned long delta; + unsigned int cpu; + int rc; + + /* + * Linear mapping is one of 4K, 1M and 16M. For 4K, no need + * to group units. For larger mappings, use 1M atom which + * should be large enough to contain a number of units. + */ + if (mmu_linear_psize == MMU_PAGE_4K) + atom_size = PAGE_SIZE; + else + atom_size = 1 << 20; + + rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance, + pcpu_fc_alloc, pcpu_fc_free); + if (rc < 0) + panic("cannot initialize percpu area (err=%d)", rc); + + delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; + for_each_possible_cpu(cpu) { + __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; + paca[cpu].data_offset = __per_cpu_offset[cpu]; + } +} +#endif + + +#ifdef CONFIG_PPC_INDIRECT_IO +struct ppc_pci_io ppc_pci_io; +EXPORT_SYMBOL(ppc_pci_io); +#endif /* CONFIG_PPC_INDIRECT_IO */ + diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c new file mode 100644 index 00000000..651c5963 --- /dev/null +++ b/arch/powerpc/kernel/signal.c @@ -0,0 +1,206 @@ +/* + * Common signal handling code for both 32 and 64 bits + * + * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Coproration + * Extracted from signal_32.c and signal_64.c + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file README.legal in the main directory of + * this archive for more details. + */ + +#include <linux/tracehook.h> +#include <linux/signal.h> +#include <linux/key.h> +#include <asm/hw_breakpoint.h> +#include <asm/uaccess.h> +#include <asm/unistd.h> +#include <asm/debug.h> + +#include "signal.h" + +/* Log an error when sending an unhandled signal to a process. Controlled + * through debug.exception-trace sysctl. + */ + +int show_unhandled_signals = 0; + +/* + * Allocate space for the signal frame + */ +void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, + size_t frame_size, int is_32) +{ + unsigned long oldsp, newsp; + + /* Default to using normal stack */ + oldsp = get_clean_sp(regs, is_32); + + /* Check for alt stack */ + if ((ka->sa.sa_flags & SA_ONSTACK) && + current->sas_ss_size && !on_sig_stack(oldsp)) + oldsp = (current->sas_ss_sp + current->sas_ss_size); + + /* Get aligned frame */ + newsp = (oldsp - frame_size) & ~0xFUL; + + /* Check access */ + if (!access_ok(VERIFY_WRITE, (void __user *)newsp, oldsp - newsp)) + return NULL; + + return (void __user *)newsp; +} + + +/* + * Restore the user process's signal mask + */ +void restore_sigmask(sigset_t *set) +{ + sigdelsetmask(set, ~_BLOCKABLE); + set_current_blocked(set); +} + +static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka, + int has_handler) +{ + unsigned long ret = regs->gpr[3]; + int restart = 1; + + /* syscall ? */ + if (TRAP(regs) != 0x0C00) + return; + + /* error signalled ? */ + if (!(regs->ccr & 0x10000000)) + return; + + switch (ret) { + case ERESTART_RESTARTBLOCK: + case ERESTARTNOHAND: + /* ERESTARTNOHAND means that the syscall should only be + * restarted if there was no handler for the signal, and since + * we only get here if there is a handler, we dont restart. + */ + restart = !has_handler; + break; + case ERESTARTSYS: + /* ERESTARTSYS means to restart the syscall if there is no + * handler or the handler was registered with SA_RESTART + */ + restart = !has_handler || (ka->sa.sa_flags & SA_RESTART) != 0; + break; + case ERESTARTNOINTR: + /* ERESTARTNOINTR means that the syscall should be + * called again after the signal handler returns. + */ + break; + default: + return; + } + if (restart) { + if (ret == ERESTART_RESTARTBLOCK) + regs->gpr[0] = __NR_restart_syscall; + else + regs->gpr[3] = regs->orig_gpr3; + regs->nip -= 4; + regs->result = 0; + } else { + regs->result = -EINTR; + regs->gpr[3] = EINTR; + regs->ccr |= 0x10000000; + } +} + +static int do_signal(struct pt_regs *regs) +{ + sigset_t *oldset; + siginfo_t info; + int signr; + struct k_sigaction ka; + int ret; + int is32 = is_32bit_task(); + + if (current_thread_info()->local_flags & _TLF_RESTORE_SIGMASK) + oldset = ¤t->saved_sigmask; + else + oldset = ¤t->blocked; + + signr = get_signal_to_deliver(&info, &ka, regs, NULL); + + /* Is there any syscall restart business here ? */ + check_syscall_restart(regs, &ka, signr > 0); + + if (signr <= 0) { + struct thread_info *ti = current_thread_info(); + /* No signal to deliver -- put the saved sigmask back */ + if (ti->local_flags & _TLF_RESTORE_SIGMASK) { + ti->local_flags &= ~_TLF_RESTORE_SIGMASK; + sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); + } + regs->trap = 0; + return 0; /* no signals delivered */ + } + +#ifndef CONFIG_PPC_ADV_DEBUG_REGS + /* + * Reenable the DABR before delivering the signal to + * user space. The DABR will have been cleared if it + * triggered inside the kernel. + */ + if (current->thread.dabr) + set_dabr(current->thread.dabr); +#endif + /* Re-enable the breakpoints for the signal stack */ + thread_change_pc(current, regs); + + if (is32) { + if (ka.sa.sa_flags & SA_SIGINFO) + ret = handle_rt_signal32(signr, &ka, &info, oldset, + regs); + else + ret = handle_signal32(signr, &ka, &info, oldset, + regs); + } else { + ret = handle_rt_signal64(signr, &ka, &info, oldset, regs); + } + + regs->trap = 0; + if (ret) { + block_sigmask(&ka, signr); + + /* + * A signal was successfully delivered; the saved sigmask is in + * its frame, and we can clear the TLF_RESTORE_SIGMASK flag. + */ + current_thread_info()->local_flags &= ~_TLF_RESTORE_SIGMASK; + + /* + * Let tracing know that we've done the handler setup. + */ + tracehook_signal_handler(signr, &info, &ka, regs, + test_thread_flag(TIF_SINGLESTEP)); + } + + return ret; +} + +void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) +{ + if (thread_info_flags & _TIF_SIGPENDING) + do_signal(regs); + + if (thread_info_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + if (current->replacement_session_keyring) + key_replace_session_keyring(); + } +} + +long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, + unsigned long r5, unsigned long r6, unsigned long r7, + unsigned long r8, struct pt_regs *regs) +{ + return do_sigaltstack(uss, uoss, regs->gpr[1]); +} diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h new file mode 100644 index 00000000..8dde973a --- /dev/null +++ b/arch/powerpc/kernel/signal.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Coproration + * Extracted from signal_32.c and signal_64.c + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file README.legal in the main directory of + * this archive for more details. + */ + +#ifndef _POWERPC_ARCH_SIGNAL_H +#define _POWERPC_ARCH_SIGNAL_H + +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +extern void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags); + +extern void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, + size_t frame_size, int is_32); +extern void restore_sigmask(sigset_t *set); + +extern int handle_signal32(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, + struct pt_regs *regs); + +extern int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, + struct pt_regs *regs); + +extern unsigned long copy_fpr_to_user(void __user *to, + struct task_struct *task); +extern unsigned long copy_fpr_from_user(struct task_struct *task, + void __user *from); +#ifdef CONFIG_VSX +extern unsigned long copy_vsx_to_user(void __user *to, + struct task_struct *task); +extern unsigned long copy_vsx_from_user(struct task_struct *task, + void __user *from); +#endif + +#ifdef CONFIG_PPC64 + +extern int handle_rt_signal64(int signr, struct k_sigaction *ka, + siginfo_t *info, sigset_t *set, + struct pt_regs *regs); + +#else /* CONFIG_PPC64 */ + +static inline int handle_rt_signal64(int signr, struct k_sigaction *ka, + siginfo_t *info, sigset_t *set, + struct pt_regs *regs) +{ + return -EFAULT; +} + +#endif /* !defined(CONFIG_PPC64) */ + +#endif /* _POWERPC_ARCH_SIGNAL_H */ diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c new file mode 100644 index 00000000..45eb9985 --- /dev/null +++ b/arch/powerpc/kernel/signal_32.c @@ -0,0 +1,1306 @@ +/* + * Signal handling for 32bit PPC and 32bit tasks on 64bit PPC + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * Copyright (C) 2001 IBM + * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) + * + * Derived from "arch/i386/kernel/signal.c" + * Copyright (C) 1991, 1992 Linus Torvalds + * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/kernel.h> +#include <linux/signal.h> +#include <linux/errno.h> +#include <linux/elf.h> +#include <linux/ptrace.h> +#include <linux/ratelimit.h> +#ifdef CONFIG_PPC64 +#include <linux/syscalls.h> +#include <linux/compat.h> +#else +#include <linux/wait.h> +#include <linux/unistd.h> +#include <linux/stddef.h> +#include <linux/tty.h> +#include <linux/binfmts.h> +#include <linux/freezer.h> +#endif + +#include <asm/uaccess.h> +#include <asm/cacheflush.h> +#include <asm/syscalls.h> +#include <asm/sigcontext.h> +#include <asm/vdso.h> +#include <asm/switch_to.h> +#ifdef CONFIG_PPC64 +#include "ppc32.h" +#include <asm/unistd.h> +#else +#include <asm/ucontext.h> +#include <asm/pgtable.h> +#endif + +#include "signal.h" + +#undef DEBUG_SIG + +#ifdef CONFIG_PPC64 +#define sys_sigsuspend compat_sys_sigsuspend +#define sys_rt_sigsuspend compat_sys_rt_sigsuspend +#define sys_rt_sigreturn compat_sys_rt_sigreturn +#define sys_sigaction compat_sys_sigaction +#define sys_swapcontext compat_sys_swapcontext +#define sys_sigreturn compat_sys_sigreturn + +#define old_sigaction old_sigaction32 +#define sigcontext sigcontext32 +#define mcontext mcontext32 +#define ucontext ucontext32 + +/* + * Userspace code may pass a ucontext which doesn't include VSX added + * at the end. We need to check for this case. + */ +#define UCONTEXTSIZEWITHOUTVSX \ + (sizeof(struct ucontext) - sizeof(elf_vsrreghalf_t32)) + +/* + * Returning 0 means we return to userspace via + * ret_from_except and thus restore all user + * registers from *regs. This is what we need + * to do when a signal has been delivered. + */ + +#define GP_REGS_SIZE min(sizeof(elf_gregset_t32), sizeof(struct pt_regs32)) +#undef __SIGNAL_FRAMESIZE +#define __SIGNAL_FRAMESIZE __SIGNAL_FRAMESIZE32 +#undef ELF_NVRREG +#define ELF_NVRREG ELF_NVRREG32 + +/* + * Functions for flipping sigsets (thanks to brain dead generic + * implementation that makes things simple for little endian only) + */ +static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set) +{ + compat_sigset_t cset; + + switch (_NSIG_WORDS) { + case 4: cset.sig[6] = set->sig[3] & 0xffffffffull; + cset.sig[7] = set->sig[3] >> 32; + case 3: cset.sig[4] = set->sig[2] & 0xffffffffull; + cset.sig[5] = set->sig[2] >> 32; + case 2: cset.sig[2] = set->sig[1] & 0xffffffffull; + cset.sig[3] = set->sig[1] >> 32; + case 1: cset.sig[0] = set->sig[0] & 0xffffffffull; + cset.sig[1] = set->sig[0] >> 32; + } + return copy_to_user(uset, &cset, sizeof(*uset)); +} + +static inline int get_sigset_t(sigset_t *set, + const compat_sigset_t __user *uset) +{ + compat_sigset_t s32; + + if (copy_from_user(&s32, uset, sizeof(*uset))) + return -EFAULT; + + /* + * Swap the 2 words of the 64-bit sigset_t (they are stored + * in the "wrong" endian in 32-bit user storage). + */ + switch (_NSIG_WORDS) { + case 4: set->sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32); + case 3: set->sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32); + case 2: set->sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32); + case 1: set->sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32); + } + return 0; +} + +static inline int get_old_sigaction(struct k_sigaction *new_ka, + struct old_sigaction __user *act) +{ + compat_old_sigset_t mask; + compat_uptr_t handler, restorer; + + if (get_user(handler, &act->sa_handler) || + __get_user(restorer, &act->sa_restorer) || + __get_user(new_ka->sa.sa_flags, &act->sa_flags) || + __get_user(mask, &act->sa_mask)) + return -EFAULT; + new_ka->sa.sa_handler = compat_ptr(handler); + new_ka->sa.sa_restorer = compat_ptr(restorer); + siginitset(&new_ka->sa.sa_mask, mask); + return 0; +} + +#define to_user_ptr(p) ptr_to_compat(p) +#define from_user_ptr(p) compat_ptr(p) + +static inline int save_general_regs(struct pt_regs *regs, + struct mcontext __user *frame) +{ + elf_greg_t64 *gregs = (elf_greg_t64 *)regs; + int i; + + WARN_ON(!FULL_REGS(regs)); + + for (i = 0; i <= PT_RESULT; i ++) { + if (i == 14 && !FULL_REGS(regs)) + i = 32; + if (__put_user((unsigned int)gregs[i], &frame->mc_gregs[i])) + return -EFAULT; + } + return 0; +} + +static inline int restore_general_regs(struct pt_regs *regs, + struct mcontext __user *sr) +{ + elf_greg_t64 *gregs = (elf_greg_t64 *)regs; + int i; + + for (i = 0; i <= PT_RESULT; i++) { + if ((i == PT_MSR) || (i == PT_SOFTE)) + continue; + if (__get_user(gregs[i], &sr->mc_gregs[i])) + return -EFAULT; + } + return 0; +} + +#else /* CONFIG_PPC64 */ + +#define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs)) + +static inline int put_sigset_t(sigset_t __user *uset, sigset_t *set) +{ + return copy_to_user(uset, set, sizeof(*uset)); +} + +static inline int get_sigset_t(sigset_t *set, const sigset_t __user *uset) +{ + return copy_from_user(set, uset, sizeof(*uset)); +} + +static inline int get_old_sigaction(struct k_sigaction *new_ka, + struct old_sigaction __user *act) +{ + old_sigset_t mask; + + if (!access_ok(VERIFY_READ, act, sizeof(*act)) || + __get_user(new_ka->sa.sa_handler, &act->sa_handler) || + __get_user(new_ka->sa.sa_restorer, &act->sa_restorer)) + return -EFAULT; + __get_user(new_ka->sa.sa_flags, &act->sa_flags); + __get_user(mask, &act->sa_mask); + siginitset(&new_ka->sa.sa_mask, mask); + return 0; +} + +#define to_user_ptr(p) ((unsigned long)(p)) +#define from_user_ptr(p) ((void __user *)(p)) + +static inline int save_general_regs(struct pt_regs *regs, + struct mcontext __user *frame) +{ + WARN_ON(!FULL_REGS(regs)); + return __copy_to_user(&frame->mc_gregs, regs, GP_REGS_SIZE); +} + +static inline int restore_general_regs(struct pt_regs *regs, + struct mcontext __user *sr) +{ + /* copy up to but not including MSR */ + if (__copy_from_user(regs, &sr->mc_gregs, + PT_MSR * sizeof(elf_greg_t))) + return -EFAULT; + /* copy from orig_r3 (the word after the MSR) up to the end */ + if (__copy_from_user(®s->orig_gpr3, &sr->mc_gregs[PT_ORIG_R3], + GP_REGS_SIZE - PT_ORIG_R3 * sizeof(elf_greg_t))) + return -EFAULT; + return 0; +} + +#endif /* CONFIG_PPC64 */ + +/* + * Atomically swap in the new signal mask, and wait for a signal. + */ +long sys_sigsuspend(old_sigset_t mask) +{ + sigset_t blocked; + + current->saved_sigmask = current->blocked; + + mask &= _BLOCKABLE; + siginitset(&blocked, mask); + set_current_blocked(&blocked); + + current->state = TASK_INTERRUPTIBLE; + schedule(); + set_restore_sigmask(); + return -ERESTARTNOHAND; +} + +long sys_sigaction(int sig, struct old_sigaction __user *act, + struct old_sigaction __user *oact) +{ + struct k_sigaction new_ka, old_ka; + int ret; + +#ifdef CONFIG_PPC64 + if (sig < 0) + sig = -sig; +#endif + + if (act) { + if (get_old_sigaction(&new_ka, act)) + return -EFAULT; + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + if (!ret && oact) { + if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || + __put_user(to_user_ptr(old_ka.sa.sa_handler), + &oact->sa_handler) || + __put_user(to_user_ptr(old_ka.sa.sa_restorer), + &oact->sa_restorer) || + __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || + __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) + return -EFAULT; + } + + return ret; +} + +/* + * When we have signals to deliver, we set up on the + * user stack, going down from the original stack pointer: + * an ABI gap of 56 words + * an mcontext struct + * a sigcontext struct + * a gap of __SIGNAL_FRAMESIZE bytes + * + * Each of these things must be a multiple of 16 bytes in size. The following + * structure represent all of this except the __SIGNAL_FRAMESIZE gap + * + */ +struct sigframe { + struct sigcontext sctx; /* the sigcontext */ + struct mcontext mctx; /* all the register values */ + /* + * Programs using the rs6000/xcoff abi can save up to 19 gp + * regs and 18 fp regs below sp before decrementing it. + */ + int abigap[56]; +}; + +/* We use the mc_pad field for the signal return trampoline. */ +#define tramp mc_pad + +/* + * When we have rt signals to deliver, we set up on the + * user stack, going down from the original stack pointer: + * one rt_sigframe struct (siginfo + ucontext + ABI gap) + * a gap of __SIGNAL_FRAMESIZE+16 bytes + * (the +16 is to get the siginfo and ucontext in the same + * positions as in older kernels). + * + * Each of these things must be a multiple of 16 bytes in size. + * + */ +struct rt_sigframe { +#ifdef CONFIG_PPC64 + compat_siginfo_t info; +#else + struct siginfo info; +#endif + struct ucontext uc; + /* + * Programs using the rs6000/xcoff abi can save up to 19 gp + * regs and 18 fp regs below sp before decrementing it. + */ + int abigap[56]; +}; + +#ifdef CONFIG_VSX +unsigned long copy_fpr_to_user(void __user *to, + struct task_struct *task) +{ + double buf[ELF_NFPREG]; + int i; + + /* save FPR copy to local buffer then write to the thread_struct */ + for (i = 0; i < (ELF_NFPREG - 1) ; i++) + buf[i] = task->thread.TS_FPR(i); + memcpy(&buf[i], &task->thread.fpscr, sizeof(double)); + return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double)); +} + +unsigned long copy_fpr_from_user(struct task_struct *task, + void __user *from) +{ + double buf[ELF_NFPREG]; + int i; + + if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double))) + return 1; + for (i = 0; i < (ELF_NFPREG - 1) ; i++) + task->thread.TS_FPR(i) = buf[i]; + memcpy(&task->thread.fpscr, &buf[i], sizeof(double)); + + return 0; +} + +unsigned long copy_vsx_to_user(void __user *to, + struct task_struct *task) +{ + double buf[ELF_NVSRHALFREG]; + int i; + + /* save FPR copy to local buffer then write to the thread_struct */ + for (i = 0; i < ELF_NVSRHALFREG; i++) + buf[i] = task->thread.fpr[i][TS_VSRLOWOFFSET]; + return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double)); +} + +unsigned long copy_vsx_from_user(struct task_struct *task, + void __user *from) +{ + double buf[ELF_NVSRHALFREG]; + int i; + + if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double))) + return 1; + for (i = 0; i < ELF_NVSRHALFREG ; i++) + task->thread.fpr[i][TS_VSRLOWOFFSET] = buf[i]; + return 0; +} +#else +inline unsigned long copy_fpr_to_user(void __user *to, + struct task_struct *task) +{ + return __copy_to_user(to, task->thread.fpr, + ELF_NFPREG * sizeof(double)); +} + +inline unsigned long copy_fpr_from_user(struct task_struct *task, + void __user *from) +{ + return __copy_from_user(task->thread.fpr, from, + ELF_NFPREG * sizeof(double)); +} +#endif + +/* + * Save the current user registers on the user stack. + * We only save the altivec/spe registers if the process has used + * altivec/spe instructions at some point. + */ +static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, + int sigret, int ctx_has_vsx_region) +{ + unsigned long msr = regs->msr; + + /* Make sure floating point registers are stored in regs */ + flush_fp_to_thread(current); + + /* save general registers */ + if (save_general_regs(regs, frame)) + return 1; + +#ifdef CONFIG_ALTIVEC + /* save altivec registers */ + if (current->thread.used_vr) { + flush_altivec_to_thread(current); + if (__copy_to_user(&frame->mc_vregs, current->thread.vr, + ELF_NVRREG * sizeof(vector128))) + return 1; + /* set MSR_VEC in the saved MSR value to indicate that + frame->mc_vregs contains valid data */ + msr |= MSR_VEC; + } + /* else assert((regs->msr & MSR_VEC) == 0) */ + + /* We always copy to/from vrsave, it's 0 if we don't have or don't + * use altivec. Since VSCR only contains 32 bits saved in the least + * significant bits of a vector, we "cheat" and stuff VRSAVE in the + * most significant bits of that same vector. --BenH + */ + if (__put_user(current->thread.vrsave, (u32 __user *)&frame->mc_vregs[32])) + return 1; +#endif /* CONFIG_ALTIVEC */ + if (copy_fpr_to_user(&frame->mc_fregs, current)) + return 1; +#ifdef CONFIG_VSX + /* + * Copy VSR 0-31 upper half from thread_struct to local + * buffer, then write that to userspace. Also set MSR_VSX in + * the saved MSR value to indicate that frame->mc_vregs + * contains valid data + */ + if (current->thread.used_vsr && ctx_has_vsx_region) { + __giveup_vsx(current); + if (copy_vsx_to_user(&frame->mc_vsregs, current)) + return 1; + msr |= MSR_VSX; + } +#endif /* CONFIG_VSX */ +#ifdef CONFIG_SPE + /* save spe registers */ + if (current->thread.used_spe) { + flush_spe_to_thread(current); + if (__copy_to_user(&frame->mc_vregs, current->thread.evr, + ELF_NEVRREG * sizeof(u32))) + return 1; + /* set MSR_SPE in the saved MSR value to indicate that + frame->mc_vregs contains valid data */ + msr |= MSR_SPE; + } + /* else assert((regs->msr & MSR_SPE) == 0) */ + + /* We always copy to/from spefscr */ + if (__put_user(current->thread.spefscr, (u32 __user *)&frame->mc_vregs + ELF_NEVRREG)) + return 1; +#endif /* CONFIG_SPE */ + + if (__put_user(msr, &frame->mc_gregs[PT_MSR])) + return 1; + if (sigret) { + /* Set up the sigreturn trampoline: li r0,sigret; sc */ + if (__put_user(0x38000000UL + sigret, &frame->tramp[0]) + || __put_user(0x44000002UL, &frame->tramp[1])) + return 1; + flush_icache_range((unsigned long) &frame->tramp[0], + (unsigned long) &frame->tramp[2]); + } + + return 0; +} + +/* + * Restore the current user register values from the user stack, + * (except for MSR). + */ +static long restore_user_regs(struct pt_regs *regs, + struct mcontext __user *sr, int sig) +{ + long err; + unsigned int save_r2 = 0; + unsigned long msr; +#ifdef CONFIG_VSX + int i; +#endif + + /* + * restore general registers but not including MSR or SOFTE. Also + * take care of keeping r2 (TLS) intact if not a signal + */ + if (!sig) + save_r2 = (unsigned int)regs->gpr[2]; + err = restore_general_regs(regs, sr); + regs->trap = 0; + err |= __get_user(msr, &sr->mc_gregs[PT_MSR]); + if (!sig) + regs->gpr[2] = (unsigned long) save_r2; + if (err) + return 1; + + /* if doing signal return, restore the previous little-endian mode */ + if (sig) + regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); + + /* + * Do this before updating the thread state in + * current->thread.fpr/vr/evr. That way, if we get preempted + * and another task grabs the FPU/Altivec/SPE, it won't be + * tempted to save the current CPU state into the thread_struct + * and corrupt what we are writing there. + */ + discard_lazy_cpu_state(); + +#ifdef CONFIG_ALTIVEC + /* + * Force the process to reload the altivec registers from + * current->thread when it next does altivec instructions + */ + regs->msr &= ~MSR_VEC; + if (msr & MSR_VEC) { + /* restore altivec registers from the stack */ + if (__copy_from_user(current->thread.vr, &sr->mc_vregs, + sizeof(sr->mc_vregs))) + return 1; + } else if (current->thread.used_vr) + memset(current->thread.vr, 0, ELF_NVRREG * sizeof(vector128)); + + /* Always get VRSAVE back */ + if (__get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32])) + return 1; +#endif /* CONFIG_ALTIVEC */ + if (copy_fpr_from_user(current, &sr->mc_fregs)) + return 1; + +#ifdef CONFIG_VSX + /* + * Force the process to reload the VSX registers from + * current->thread when it next does VSX instruction. + */ + regs->msr &= ~MSR_VSX; + if (msr & MSR_VSX) { + /* + * Restore altivec registers from the stack to a local + * buffer, then write this out to the thread_struct + */ + if (copy_vsx_from_user(current, &sr->mc_vsregs)) + return 1; + } else if (current->thread.used_vsr) + for (i = 0; i < 32 ; i++) + current->thread.fpr[i][TS_VSRLOWOFFSET] = 0; +#endif /* CONFIG_VSX */ + /* + * force the process to reload the FP registers from + * current->thread when it next does FP instructions + */ + regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1); + +#ifdef CONFIG_SPE + /* force the process to reload the spe registers from + current->thread when it next does spe instructions */ + regs->msr &= ~MSR_SPE; + if (msr & MSR_SPE) { + /* restore spe registers from the stack */ + if (__copy_from_user(current->thread.evr, &sr->mc_vregs, + ELF_NEVRREG * sizeof(u32))) + return 1; + } else if (current->thread.used_spe) + memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32)); + + /* Always get SPEFSCR back */ + if (__get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs + ELF_NEVRREG)) + return 1; +#endif /* CONFIG_SPE */ + + return 0; +} + +#ifdef CONFIG_PPC64 +long compat_sys_rt_sigaction(int sig, const struct sigaction32 __user *act, + struct sigaction32 __user *oact, size_t sigsetsize) +{ + struct k_sigaction new_ka, old_ka; + int ret; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(compat_sigset_t)) + return -EINVAL; + + if (act) { + compat_uptr_t handler; + + ret = get_user(handler, &act->sa_handler); + new_ka.sa.sa_handler = compat_ptr(handler); + ret |= get_sigset_t(&new_ka.sa.sa_mask, &act->sa_mask); + ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags); + if (ret) + return -EFAULT; + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + if (!ret && oact) { + ret = put_user(to_user_ptr(old_ka.sa.sa_handler), &oact->sa_handler); + ret |= put_sigset_t(&oact->sa_mask, &old_ka.sa.sa_mask); + ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags); + } + return ret; +} + +/* + * Note: it is necessary to treat how as an unsigned int, with the + * corresponding cast to a signed int to insure that the proper + * conversion (sign extension) between the register representation + * of a signed int (msr in 32-bit mode) and the register representation + * of a signed int (msr in 64-bit mode) is performed. + */ +long compat_sys_rt_sigprocmask(u32 how, compat_sigset_t __user *set, + compat_sigset_t __user *oset, size_t sigsetsize) +{ + sigset_t s; + sigset_t __user *up; + int ret; + mm_segment_t old_fs = get_fs(); + + if (set) { + if (get_sigset_t(&s, set)) + return -EFAULT; + } + + set_fs(KERNEL_DS); + /* This is valid because of the set_fs() */ + up = (sigset_t __user *) &s; + ret = sys_rt_sigprocmask((int)how, set ? up : NULL, oset ? up : NULL, + sigsetsize); + set_fs(old_fs); + if (ret) + return ret; + if (oset) { + if (put_sigset_t(oset, &s)) + return -EFAULT; + } + return 0; +} + +long compat_sys_rt_sigpending(compat_sigset_t __user *set, compat_size_t sigsetsize) +{ + sigset_t s; + int ret; + mm_segment_t old_fs = get_fs(); + + set_fs(KERNEL_DS); + /* The __user pointer cast is valid because of the set_fs() */ + ret = sys_rt_sigpending((sigset_t __user *) &s, sigsetsize); + set_fs(old_fs); + if (!ret) { + if (put_sigset_t(set, &s)) + return -EFAULT; + } + return ret; +} + + +int copy_siginfo_to_user32(struct compat_siginfo __user *d, siginfo_t *s) +{ + int err; + + if (!access_ok (VERIFY_WRITE, d, sizeof(*d))) + return -EFAULT; + + /* If you change siginfo_t structure, please be sure + * this code is fixed accordingly. + * It should never copy any pad contained in the structure + * to avoid security leaks, but must copy the generic + * 3 ints plus the relevant union member. + * This routine must convert siginfo from 64bit to 32bit as well + * at the same time. + */ + err = __put_user(s->si_signo, &d->si_signo); + err |= __put_user(s->si_errno, &d->si_errno); + err |= __put_user((short)s->si_code, &d->si_code); + if (s->si_code < 0) + err |= __copy_to_user(&d->_sifields._pad, &s->_sifields._pad, + SI_PAD_SIZE32); + else switch(s->si_code >> 16) { + case __SI_CHLD >> 16: + err |= __put_user(s->si_pid, &d->si_pid); + err |= __put_user(s->si_uid, &d->si_uid); + err |= __put_user(s->si_utime, &d->si_utime); + err |= __put_user(s->si_stime, &d->si_stime); + err |= __put_user(s->si_status, &d->si_status); + break; + case __SI_FAULT >> 16: + err |= __put_user((unsigned int)(unsigned long)s->si_addr, + &d->si_addr); + break; + case __SI_POLL >> 16: + err |= __put_user(s->si_band, &d->si_band); + err |= __put_user(s->si_fd, &d->si_fd); + break; + case __SI_TIMER >> 16: + err |= __put_user(s->si_tid, &d->si_tid); + err |= __put_user(s->si_overrun, &d->si_overrun); + err |= __put_user(s->si_int, &d->si_int); + break; + case __SI_RT >> 16: /* This is not generated by the kernel as of now. */ + case __SI_MESGQ >> 16: + err |= __put_user(s->si_int, &d->si_int); + /* fallthrough */ + case __SI_KILL >> 16: + default: + err |= __put_user(s->si_pid, &d->si_pid); + err |= __put_user(s->si_uid, &d->si_uid); + break; + } + return err; +} + +#define copy_siginfo_to_user copy_siginfo_to_user32 + +int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from) +{ + memset(to, 0, sizeof *to); + + if (copy_from_user(to, from, 3*sizeof(int)) || + copy_from_user(to->_sifields._pad, + from->_sifields._pad, SI_PAD_SIZE32)) + return -EFAULT; + + return 0; +} + +/* + * Note: it is necessary to treat pid and sig as unsigned ints, with the + * corresponding cast to a signed int to insure that the proper conversion + * (sign extension) between the register representation of a signed int + * (msr in 32-bit mode) and the register representation of a signed int + * (msr in 64-bit mode) is performed. + */ +long compat_sys_rt_sigqueueinfo(u32 pid, u32 sig, compat_siginfo_t __user *uinfo) +{ + siginfo_t info; + int ret; + mm_segment_t old_fs = get_fs(); + + ret = copy_siginfo_from_user32(&info, uinfo); + if (unlikely(ret)) + return ret; + + set_fs (KERNEL_DS); + /* The __user pointer cast is valid becasuse of the set_fs() */ + ret = sys_rt_sigqueueinfo((int)pid, (int)sig, (siginfo_t __user *) &info); + set_fs (old_fs); + return ret; +} +/* + * Start Alternate signal stack support + * + * System Calls + * sigaltatck compat_sys_sigaltstack + */ + +int compat_sys_sigaltstack(u32 __new, u32 __old, int r5, + int r6, int r7, int r8, struct pt_regs *regs) +{ + stack_32_t __user * newstack = compat_ptr(__new); + stack_32_t __user * oldstack = compat_ptr(__old); + stack_t uss, uoss; + int ret; + mm_segment_t old_fs; + unsigned long sp; + compat_uptr_t ss_sp; + + /* + * set sp to the user stack on entry to the system call + * the system call router sets R9 to the saved registers + */ + sp = regs->gpr[1]; + + /* Put new stack info in local 64 bit stack struct */ + if (newstack) { + if (get_user(ss_sp, &newstack->ss_sp) || + __get_user(uss.ss_flags, &newstack->ss_flags) || + __get_user(uss.ss_size, &newstack->ss_size)) + return -EFAULT; + uss.ss_sp = compat_ptr(ss_sp); + } + + old_fs = get_fs(); + set_fs(KERNEL_DS); + /* The __user pointer casts are valid because of the set_fs() */ + ret = do_sigaltstack( + newstack ? (stack_t __user *) &uss : NULL, + oldstack ? (stack_t __user *) &uoss : NULL, + sp); + set_fs(old_fs); + /* Copy the stack information to the user output buffer */ + if (!ret && oldstack && + (put_user(ptr_to_compat(uoss.ss_sp), &oldstack->ss_sp) || + __put_user(uoss.ss_flags, &oldstack->ss_flags) || + __put_user(uoss.ss_size, &oldstack->ss_size))) + return -EFAULT; + return ret; +} +#endif /* CONFIG_PPC64 */ + +/* + * Set up a signal frame for a "real-time" signal handler + * (one which gets siginfo). + */ +int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, + struct pt_regs *regs) +{ + struct rt_sigframe __user *rt_sf; + struct mcontext __user *frame; + void __user *addr; + unsigned long newsp = 0; + + /* Set up Signal Frame */ + /* Put a Real Time Context onto stack */ + rt_sf = get_sigframe(ka, regs, sizeof(*rt_sf), 1); + addr = rt_sf; + if (unlikely(rt_sf == NULL)) + goto badframe; + + /* Put the siginfo & fill in most of the ucontext */ + if (copy_siginfo_to_user(&rt_sf->info, info) + || __put_user(0, &rt_sf->uc.uc_flags) + || __put_user(0, &rt_sf->uc.uc_link) + || __put_user(current->sas_ss_sp, &rt_sf->uc.uc_stack.ss_sp) + || __put_user(sas_ss_flags(regs->gpr[1]), + &rt_sf->uc.uc_stack.ss_flags) + || __put_user(current->sas_ss_size, &rt_sf->uc.uc_stack.ss_size) + || __put_user(to_user_ptr(&rt_sf->uc.uc_mcontext), + &rt_sf->uc.uc_regs) + || put_sigset_t(&rt_sf->uc.uc_sigmask, oldset)) + goto badframe; + + /* Save user registers on the stack */ + frame = &rt_sf->uc.uc_mcontext; + addr = frame; + if (vdso32_rt_sigtramp && current->mm->context.vdso_base) { + if (save_user_regs(regs, frame, 0, 1)) + goto badframe; + regs->link = current->mm->context.vdso_base + vdso32_rt_sigtramp; + } else { + if (save_user_regs(regs, frame, __NR_rt_sigreturn, 1)) + goto badframe; + regs->link = (unsigned long) frame->tramp; + } + + current->thread.fpscr.val = 0; /* turn off all fp exceptions */ + + /* create a stack frame for the caller of the handler */ + newsp = ((unsigned long)rt_sf) - (__SIGNAL_FRAMESIZE + 16); + addr = (void __user *)regs->gpr[1]; + if (put_user(regs->gpr[1], (u32 __user *)newsp)) + goto badframe; + + /* Fill registers for signal handler */ + regs->gpr[1] = newsp; + regs->gpr[3] = sig; + regs->gpr[4] = (unsigned long) &rt_sf->info; + regs->gpr[5] = (unsigned long) &rt_sf->uc; + regs->gpr[6] = (unsigned long) rt_sf; + regs->nip = (unsigned long) ka->sa.sa_handler; + /* enter the signal handler in big-endian mode */ + regs->msr &= ~MSR_LE; + return 1; + +badframe: +#ifdef DEBUG_SIG + printk("badframe in handle_rt_signal, regs=%p frame=%p newsp=%lx\n", + regs, frame, newsp); +#endif + if (show_unhandled_signals) + printk_ratelimited(KERN_INFO + "%s[%d]: bad frame in handle_rt_signal32: " + "%p nip %08lx lr %08lx\n", + current->comm, current->pid, + addr, regs->nip, regs->link); + + force_sigsegv(sig, current); + return 0; +} + +static int do_setcontext(struct ucontext __user *ucp, struct pt_regs *regs, int sig) +{ + sigset_t set; + struct mcontext __user *mcp; + + if (get_sigset_t(&set, &ucp->uc_sigmask)) + return -EFAULT; +#ifdef CONFIG_PPC64 + { + u32 cmcp; + + if (__get_user(cmcp, &ucp->uc_regs)) + return -EFAULT; + mcp = (struct mcontext __user *)(u64)cmcp; + /* no need to check access_ok(mcp), since mcp < 4GB */ + } +#else + if (__get_user(mcp, &ucp->uc_regs)) + return -EFAULT; + if (!access_ok(VERIFY_READ, mcp, sizeof(*mcp))) + return -EFAULT; +#endif + restore_sigmask(&set); + if (restore_user_regs(regs, mcp, sig)) + return -EFAULT; + + return 0; +} + +long sys_swapcontext(struct ucontext __user *old_ctx, + struct ucontext __user *new_ctx, + int ctx_size, int r6, int r7, int r8, struct pt_regs *regs) +{ + unsigned char tmp; + int ctx_has_vsx_region = 0; + +#ifdef CONFIG_PPC64 + unsigned long new_msr = 0; + + if (new_ctx) { + struct mcontext __user *mcp; + u32 cmcp; + + /* + * Get pointer to the real mcontext. No need for + * access_ok since we are dealing with compat + * pointers. + */ + if (__get_user(cmcp, &new_ctx->uc_regs)) + return -EFAULT; + mcp = (struct mcontext __user *)(u64)cmcp; + if (__get_user(new_msr, &mcp->mc_gregs[PT_MSR])) + return -EFAULT; + } + /* + * Check that the context is not smaller than the original + * size (with VMX but without VSX) + */ + if (ctx_size < UCONTEXTSIZEWITHOUTVSX) + return -EINVAL; + /* + * If the new context state sets the MSR VSX bits but + * it doesn't provide VSX state. + */ + if ((ctx_size < sizeof(struct ucontext)) && + (new_msr & MSR_VSX)) + return -EINVAL; + /* Does the context have enough room to store VSX data? */ + if (ctx_size >= sizeof(struct ucontext)) + ctx_has_vsx_region = 1; +#else + /* Context size is for future use. Right now, we only make sure + * we are passed something we understand + */ + if (ctx_size < sizeof(struct ucontext)) + return -EINVAL; +#endif + if (old_ctx != NULL) { + struct mcontext __user *mctx; + + /* + * old_ctx might not be 16-byte aligned, in which + * case old_ctx->uc_mcontext won't be either. + * Because we have the old_ctx->uc_pad2 field + * before old_ctx->uc_mcontext, we need to round down + * from &old_ctx->uc_mcontext to a 16-byte boundary. + */ + mctx = (struct mcontext __user *) + ((unsigned long) &old_ctx->uc_mcontext & ~0xfUL); + if (!access_ok(VERIFY_WRITE, old_ctx, ctx_size) + || save_user_regs(regs, mctx, 0, ctx_has_vsx_region) + || put_sigset_t(&old_ctx->uc_sigmask, ¤t->blocked) + || __put_user(to_user_ptr(mctx), &old_ctx->uc_regs)) + return -EFAULT; + } + if (new_ctx == NULL) + return 0; + if (!access_ok(VERIFY_READ, new_ctx, ctx_size) + || __get_user(tmp, (u8 __user *) new_ctx) + || __get_user(tmp, (u8 __user *) new_ctx + ctx_size - 1)) + return -EFAULT; + + /* + * If we get a fault copying the context into the kernel's + * image of the user's registers, we can't just return -EFAULT + * because the user's registers will be corrupted. For instance + * the NIP value may have been updated but not some of the + * other registers. Given that we have done the access_ok + * and successfully read the first and last bytes of the region + * above, this should only happen in an out-of-memory situation + * or if another thread unmaps the region containing the context. + * We kill the task with a SIGSEGV in this situation. + */ + if (do_setcontext(new_ctx, regs, 0)) + do_exit(SIGSEGV); + + set_thread_flag(TIF_RESTOREALL); + return 0; +} + +long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, + struct pt_regs *regs) +{ + struct rt_sigframe __user *rt_sf; + + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + + rt_sf = (struct rt_sigframe __user *) + (regs->gpr[1] + __SIGNAL_FRAMESIZE + 16); + if (!access_ok(VERIFY_READ, rt_sf, sizeof(*rt_sf))) + goto bad; + if (do_setcontext(&rt_sf->uc, regs, 1)) + goto bad; + + /* + * It's not clear whether or why it is desirable to save the + * sigaltstack setting on signal delivery and restore it on + * signal return. But other architectures do this and we have + * always done it up until now so it is probably better not to + * change it. -- paulus + */ +#ifdef CONFIG_PPC64 + /* + * We use the compat_sys_ version that does the 32/64 bits conversion + * and takes userland pointer directly. What about error checking ? + * nobody does any... + */ + compat_sys_sigaltstack((u32)(u64)&rt_sf->uc.uc_stack, 0, 0, 0, 0, 0, regs); +#else + do_sigaltstack(&rt_sf->uc.uc_stack, NULL, regs->gpr[1]); +#endif + set_thread_flag(TIF_RESTOREALL); + return 0; + + bad: + if (show_unhandled_signals) + printk_ratelimited(KERN_INFO + "%s[%d]: bad frame in sys_rt_sigreturn: " + "%p nip %08lx lr %08lx\n", + current->comm, current->pid, + rt_sf, regs->nip, regs->link); + + force_sig(SIGSEGV, current); + return 0; +} + +#ifdef CONFIG_PPC32 +int sys_debug_setcontext(struct ucontext __user *ctx, + int ndbg, struct sig_dbg_op __user *dbg, + int r6, int r7, int r8, + struct pt_regs *regs) +{ + struct sig_dbg_op op; + int i; + unsigned char tmp; + unsigned long new_msr = regs->msr; +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + unsigned long new_dbcr0 = current->thread.dbcr0; +#endif + + for (i=0; i<ndbg; i++) { + if (copy_from_user(&op, dbg + i, sizeof(op))) + return -EFAULT; + switch (op.dbg_type) { + case SIG_DBG_SINGLE_STEPPING: +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + if (op.dbg_value) { + new_msr |= MSR_DE; + new_dbcr0 |= (DBCR0_IDM | DBCR0_IC); + } else { + new_dbcr0 &= ~DBCR0_IC; + if (!DBCR_ACTIVE_EVENTS(new_dbcr0, + current->thread.dbcr1)) { + new_msr &= ~MSR_DE; + new_dbcr0 &= ~DBCR0_IDM; + } + } +#else + if (op.dbg_value) + new_msr |= MSR_SE; + else + new_msr &= ~MSR_SE; +#endif + break; + case SIG_DBG_BRANCH_TRACING: +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + return -EINVAL; +#else + if (op.dbg_value) + new_msr |= MSR_BE; + else + new_msr &= ~MSR_BE; +#endif + break; + + default: + return -EINVAL; + } + } + + /* We wait until here to actually install the values in the + registers so if we fail in the above loop, it will not + affect the contents of these registers. After this point, + failure is a problem, anyway, and it's very unlikely unless + the user is really doing something wrong. */ + regs->msr = new_msr; +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + current->thread.dbcr0 = new_dbcr0; +#endif + + if (!access_ok(VERIFY_READ, ctx, sizeof(*ctx)) + || __get_user(tmp, (u8 __user *) ctx) + || __get_user(tmp, (u8 __user *) (ctx + 1) - 1)) + return -EFAULT; + + /* + * If we get a fault copying the context into the kernel's + * image of the user's registers, we can't just return -EFAULT + * because the user's registers will be corrupted. For instance + * the NIP value may have been updated but not some of the + * other registers. Given that we have done the access_ok + * and successfully read the first and last bytes of the region + * above, this should only happen in an out-of-memory situation + * or if another thread unmaps the region containing the context. + * We kill the task with a SIGSEGV in this situation. + */ + if (do_setcontext(ctx, regs, 1)) { + if (show_unhandled_signals) + printk_ratelimited(KERN_INFO "%s[%d]: bad frame in " + "sys_debug_setcontext: %p nip %08lx " + "lr %08lx\n", + current->comm, current->pid, + ctx, regs->nip, regs->link); + + force_sig(SIGSEGV, current); + goto out; + } + + /* + * It's not clear whether or why it is desirable to save the + * sigaltstack setting on signal delivery and restore it on + * signal return. But other architectures do this and we have + * always done it up until now so it is probably better not to + * change it. -- paulus + */ + do_sigaltstack(&ctx->uc_stack, NULL, regs->gpr[1]); + + set_thread_flag(TIF_RESTOREALL); + out: + return 0; +} +#endif + +/* + * OK, we're invoking a handler + */ +int handle_signal32(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) +{ + struct sigcontext __user *sc; + struct sigframe __user *frame; + unsigned long newsp = 0; + + /* Set up Signal Frame */ + frame = get_sigframe(ka, regs, sizeof(*frame), 1); + if (unlikely(frame == NULL)) + goto badframe; + sc = (struct sigcontext __user *) &frame->sctx; + +#if _NSIG != 64 +#error "Please adjust handle_signal()" +#endif + if (__put_user(to_user_ptr(ka->sa.sa_handler), &sc->handler) + || __put_user(oldset->sig[0], &sc->oldmask) +#ifdef CONFIG_PPC64 + || __put_user((oldset->sig[0] >> 32), &sc->_unused[3]) +#else + || __put_user(oldset->sig[1], &sc->_unused[3]) +#endif + || __put_user(to_user_ptr(&frame->mctx), &sc->regs) + || __put_user(sig, &sc->signal)) + goto badframe; + + if (vdso32_sigtramp && current->mm->context.vdso_base) { + if (save_user_regs(regs, &frame->mctx, 0, 1)) + goto badframe; + regs->link = current->mm->context.vdso_base + vdso32_sigtramp; + } else { + if (save_user_regs(regs, &frame->mctx, __NR_sigreturn, 1)) + goto badframe; + regs->link = (unsigned long) frame->mctx.tramp; + } + + current->thread.fpscr.val = 0; /* turn off all fp exceptions */ + + /* create a stack frame for the caller of the handler */ + newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; + if (put_user(regs->gpr[1], (u32 __user *)newsp)) + goto badframe; + + regs->gpr[1] = newsp; + regs->gpr[3] = sig; + regs->gpr[4] = (unsigned long) sc; + regs->nip = (unsigned long) ka->sa.sa_handler; + /* enter the signal handler in big-endian mode */ + regs->msr &= ~MSR_LE; + + return 1; + +badframe: +#ifdef DEBUG_SIG + printk("badframe in handle_signal, regs=%p frame=%p newsp=%lx\n", + regs, frame, newsp); +#endif + if (show_unhandled_signals) + printk_ratelimited(KERN_INFO + "%s[%d]: bad frame in handle_signal32: " + "%p nip %08lx lr %08lx\n", + current->comm, current->pid, + frame, regs->nip, regs->link); + + force_sigsegv(sig, current); + return 0; +} + +/* + * Do a signal return; undo the signal stack. + */ +long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, + struct pt_regs *regs) +{ + struct sigcontext __user *sc; + struct sigcontext sigctx; + struct mcontext __user *sr; + void __user *addr; + sigset_t set; + + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + + sc = (struct sigcontext __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE); + addr = sc; + if (copy_from_user(&sigctx, sc, sizeof(sigctx))) + goto badframe; + +#ifdef CONFIG_PPC64 + /* + * Note that PPC32 puts the upper 32 bits of the sigmask in the + * unused part of the signal stackframe + */ + set.sig[0] = sigctx.oldmask + ((long)(sigctx._unused[3]) << 32); +#else + set.sig[0] = sigctx.oldmask; + set.sig[1] = sigctx._unused[3]; +#endif + restore_sigmask(&set); + + sr = (struct mcontext __user *)from_user_ptr(sigctx.regs); + addr = sr; + if (!access_ok(VERIFY_READ, sr, sizeof(*sr)) + || restore_user_regs(regs, sr, 1)) + goto badframe; + + set_thread_flag(TIF_RESTOREALL); + return 0; + +badframe: + if (show_unhandled_signals) + printk_ratelimited(KERN_INFO + "%s[%d]: bad frame in sys_sigreturn: " + "%p nip %08lx lr %08lx\n", + current->comm, current->pid, + addr, regs->nip, regs->link); + + force_sig(SIGSEGV, current); + return 0; +} diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c new file mode 100644 index 00000000..2692efdb --- /dev/null +++ b/arch/powerpc/kernel/signal_64.c @@ -0,0 +1,479 @@ +/* + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Derived from "arch/i386/kernel/signal.c" + * Copyright (C) 1991, 1992 Linus Torvalds + * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/kernel.h> +#include <linux/signal.h> +#include <linux/errno.h> +#include <linux/wait.h> +#include <linux/unistd.h> +#include <linux/stddef.h> +#include <linux/elf.h> +#include <linux/ptrace.h> +#include <linux/ratelimit.h> + +#include <asm/sigcontext.h> +#include <asm/ucontext.h> +#include <asm/uaccess.h> +#include <asm/pgtable.h> +#include <asm/unistd.h> +#include <asm/cacheflush.h> +#include <asm/syscalls.h> +#include <asm/vdso.h> +#include <asm/switch_to.h> + +#include "signal.h" + +#define DEBUG_SIG 0 + +#define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs)) +#define FP_REGS_SIZE sizeof(elf_fpregset_t) + +#define TRAMP_TRACEBACK 3 +#define TRAMP_SIZE 6 + +/* + * When we have signals to deliver, we set up on the user stack, + * going down from the original stack pointer: + * 1) a rt_sigframe struct which contains the ucontext + * 2) a gap of __SIGNAL_FRAMESIZE bytes which acts as a dummy caller + * frame for the signal handler. + */ + +struct rt_sigframe { + /* sys_rt_sigreturn requires the ucontext be the first field */ + struct ucontext uc; + unsigned long _unused[2]; + unsigned int tramp[TRAMP_SIZE]; + struct siginfo __user *pinfo; + void __user *puc; + struct siginfo info; + /* 64 bit ABI allows for 288 bytes below sp before decrementing it. */ + char abigap[288]; +} __attribute__ ((aligned (16))); + +static const char fmt32[] = KERN_INFO \ + "%s[%d]: bad frame in %s: %08lx nip %08lx lr %08lx\n"; +static const char fmt64[] = KERN_INFO \ + "%s[%d]: bad frame in %s: %016lx nip %016lx lr %016lx\n"; + +/* + * Set up the sigcontext for the signal frame. + */ + +static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, + int signr, sigset_t *set, unsigned long handler, + int ctx_has_vsx_region) +{ + /* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the + * process never used altivec yet (MSR_VEC is zero in pt_regs of + * the context). This is very important because we must ensure we + * don't lose the VRSAVE content that may have been set prior to + * the process doing its first vector operation + * Userland shall check AT_HWCAP to know wether it can rely on the + * v_regs pointer or not + */ +#ifdef CONFIG_ALTIVEC + elf_vrreg_t __user *v_regs = (elf_vrreg_t __user *)(((unsigned long)sc->vmx_reserve + 15) & ~0xful); +#endif + unsigned long msr = regs->msr; + long err = 0; + + flush_fp_to_thread(current); + +#ifdef CONFIG_ALTIVEC + err |= __put_user(v_regs, &sc->v_regs); + + /* save altivec registers */ + if (current->thread.used_vr) { + flush_altivec_to_thread(current); + /* Copy 33 vec registers (vr0..31 and vscr) to the stack */ + err |= __copy_to_user(v_regs, current->thread.vr, 33 * sizeof(vector128)); + /* set MSR_VEC in the MSR value in the frame to indicate that sc->v_reg) + * contains valid data. + */ + msr |= MSR_VEC; + } + /* We always copy to/from vrsave, it's 0 if we don't have or don't + * use altivec. + */ + err |= __put_user(current->thread.vrsave, (u32 __user *)&v_regs[33]); +#else /* CONFIG_ALTIVEC */ + err |= __put_user(0, &sc->v_regs); +#endif /* CONFIG_ALTIVEC */ + flush_fp_to_thread(current); + /* copy fpr regs and fpscr */ + err |= copy_fpr_to_user(&sc->fp_regs, current); +#ifdef CONFIG_VSX + /* + * Copy VSX low doubleword to local buffer for formatting, + * then out to userspace. Update v_regs to point after the + * VMX data. + */ + if (current->thread.used_vsr && ctx_has_vsx_region) { + __giveup_vsx(current); + v_regs += ELF_NVRREG; + err |= copy_vsx_to_user(v_regs, current); + /* set MSR_VSX in the MSR value in the frame to + * indicate that sc->vs_reg) contains valid data. + */ + msr |= MSR_VSX; + } +#endif /* CONFIG_VSX */ + err |= __put_user(&sc->gp_regs, &sc->regs); + WARN_ON(!FULL_REGS(regs)); + err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE); + err |= __put_user(msr, &sc->gp_regs[PT_MSR]); + err |= __put_user(signr, &sc->signal); + err |= __put_user(handler, &sc->handler); + if (set != NULL) + err |= __put_user(set->sig[0], &sc->oldmask); + + return err; +} + +/* + * Restore the sigcontext from the signal frame. + */ + +static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig, + struct sigcontext __user *sc) +{ +#ifdef CONFIG_ALTIVEC + elf_vrreg_t __user *v_regs; +#endif + unsigned long err = 0; + unsigned long save_r13 = 0; + unsigned long msr; +#ifdef CONFIG_VSX + int i; +#endif + + /* If this is not a signal return, we preserve the TLS in r13 */ + if (!sig) + save_r13 = regs->gpr[13]; + + /* copy the GPRs */ + err |= __copy_from_user(regs->gpr, sc->gp_regs, sizeof(regs->gpr)); + err |= __get_user(regs->nip, &sc->gp_regs[PT_NIP]); + /* get MSR separately, transfer the LE bit if doing signal return */ + err |= __get_user(msr, &sc->gp_regs[PT_MSR]); + if (sig) + regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); + err |= __get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3]); + err |= __get_user(regs->ctr, &sc->gp_regs[PT_CTR]); + err |= __get_user(regs->link, &sc->gp_regs[PT_LNK]); + err |= __get_user(regs->xer, &sc->gp_regs[PT_XER]); + err |= __get_user(regs->ccr, &sc->gp_regs[PT_CCR]); + /* skip SOFTE */ + regs->trap = 0; + err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]); + err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]); + err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]); + + if (!sig) + regs->gpr[13] = save_r13; + if (set != NULL) + err |= __get_user(set->sig[0], &sc->oldmask); + + /* + * Do this before updating the thread state in + * current->thread.fpr/vr. That way, if we get preempted + * and another task grabs the FPU/Altivec, it won't be + * tempted to save the current CPU state into the thread_struct + * and corrupt what we are writing there. + */ + discard_lazy_cpu_state(); + + /* + * Force reload of FP/VEC. + * This has to be done before copying stuff into current->thread.fpr/vr + * for the reasons explained in the previous comment. + */ + regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX); + +#ifdef CONFIG_ALTIVEC + err |= __get_user(v_regs, &sc->v_regs); + if (err) + return err; + if (v_regs && !access_ok(VERIFY_READ, v_regs, 34 * sizeof(vector128))) + return -EFAULT; + /* Copy 33 vec registers (vr0..31 and vscr) from the stack */ + if (v_regs != 0 && (msr & MSR_VEC) != 0) + err |= __copy_from_user(current->thread.vr, v_regs, + 33 * sizeof(vector128)); + else if (current->thread.used_vr) + memset(current->thread.vr, 0, 33 * sizeof(vector128)); + /* Always get VRSAVE back */ + if (v_regs != 0) + err |= __get_user(current->thread.vrsave, (u32 __user *)&v_regs[33]); + else + current->thread.vrsave = 0; +#endif /* CONFIG_ALTIVEC */ + /* restore floating point */ + err |= copy_fpr_from_user(current, &sc->fp_regs); +#ifdef CONFIG_VSX + /* + * Get additional VSX data. Update v_regs to point after the + * VMX data. Copy VSX low doubleword from userspace to local + * buffer for formatting, then into the taskstruct. + */ + v_regs += ELF_NVRREG; + if ((msr & MSR_VSX) != 0) + err |= copy_vsx_from_user(current, v_regs); + else + for (i = 0; i < 32 ; i++) + current->thread.fpr[i][TS_VSRLOWOFFSET] = 0; +#endif + return err; +} + +/* + * Setup the trampoline code on the stack + */ +static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp) +{ + int i; + long err = 0; + + /* addi r1, r1, __SIGNAL_FRAMESIZE # Pop the dummy stackframe */ + err |= __put_user(0x38210000UL | (__SIGNAL_FRAMESIZE & 0xffff), &tramp[0]); + /* li r0, __NR_[rt_]sigreturn| */ + err |= __put_user(0x38000000UL | (syscall & 0xffff), &tramp[1]); + /* sc */ + err |= __put_user(0x44000002UL, &tramp[2]); + + /* Minimal traceback info */ + for (i=TRAMP_TRACEBACK; i < TRAMP_SIZE ;i++) + err |= __put_user(0, &tramp[i]); + + if (!err) + flush_icache_range((unsigned long) &tramp[0], + (unsigned long) &tramp[TRAMP_SIZE]); + + return err; +} + +/* + * Userspace code may pass a ucontext which doesn't include VSX added + * at the end. We need to check for this case. + */ +#define UCONTEXTSIZEWITHOUTVSX \ + (sizeof(struct ucontext) - 32*sizeof(long)) + +/* + * Handle {get,set,swap}_context operations + */ +int sys_swapcontext(struct ucontext __user *old_ctx, + struct ucontext __user *new_ctx, + long ctx_size, long r6, long r7, long r8, struct pt_regs *regs) +{ + unsigned char tmp; + sigset_t set; + unsigned long new_msr = 0; + int ctx_has_vsx_region = 0; + + if (new_ctx && + get_user(new_msr, &new_ctx->uc_mcontext.gp_regs[PT_MSR])) + return -EFAULT; + /* + * Check that the context is not smaller than the original + * size (with VMX but without VSX) + */ + if (ctx_size < UCONTEXTSIZEWITHOUTVSX) + return -EINVAL; + /* + * If the new context state sets the MSR VSX bits but + * it doesn't provide VSX state. + */ + if ((ctx_size < sizeof(struct ucontext)) && + (new_msr & MSR_VSX)) + return -EINVAL; + /* Does the context have enough room to store VSX data? */ + if (ctx_size >= sizeof(struct ucontext)) + ctx_has_vsx_region = 1; + + if (old_ctx != NULL) { + if (!access_ok(VERIFY_WRITE, old_ctx, ctx_size) + || setup_sigcontext(&old_ctx->uc_mcontext, regs, 0, NULL, 0, + ctx_has_vsx_region) + || __copy_to_user(&old_ctx->uc_sigmask, + ¤t->blocked, sizeof(sigset_t))) + return -EFAULT; + } + if (new_ctx == NULL) + return 0; + if (!access_ok(VERIFY_READ, new_ctx, ctx_size) + || __get_user(tmp, (u8 __user *) new_ctx) + || __get_user(tmp, (u8 __user *) new_ctx + ctx_size - 1)) + return -EFAULT; + + /* + * If we get a fault copying the context into the kernel's + * image of the user's registers, we can't just return -EFAULT + * because the user's registers will be corrupted. For instance + * the NIP value may have been updated but not some of the + * other registers. Given that we have done the access_ok + * and successfully read the first and last bytes of the region + * above, this should only happen in an out-of-memory situation + * or if another thread unmaps the region containing the context. + * We kill the task with a SIGSEGV in this situation. + */ + + if (__copy_from_user(&set, &new_ctx->uc_sigmask, sizeof(set))) + do_exit(SIGSEGV); + restore_sigmask(&set); + if (restore_sigcontext(regs, NULL, 0, &new_ctx->uc_mcontext)) + do_exit(SIGSEGV); + + /* This returns like rt_sigreturn */ + set_thread_flag(TIF_RESTOREALL); + return 0; +} + + +/* + * Do a signal return; undo the signal stack. + */ + +int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7, unsigned long r8, + struct pt_regs *regs) +{ + struct ucontext __user *uc = (struct ucontext __user *)regs->gpr[1]; + sigset_t set; + + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + + if (!access_ok(VERIFY_READ, uc, sizeof(*uc))) + goto badframe; + + if (__copy_from_user(&set, &uc->uc_sigmask, sizeof(set))) + goto badframe; + restore_sigmask(&set); + if (restore_sigcontext(regs, NULL, 1, &uc->uc_mcontext)) + goto badframe; + + /* do_sigaltstack expects a __user pointer and won't modify + * what's in there anyway + */ + do_sigaltstack(&uc->uc_stack, NULL, regs->gpr[1]); + + set_thread_flag(TIF_RESTOREALL); + return 0; + +badframe: +#if DEBUG_SIG + printk("badframe in sys_rt_sigreturn, regs=%p uc=%p &uc->uc_mcontext=%p\n", + regs, uc, &uc->uc_mcontext); +#endif + if (show_unhandled_signals) + printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32, + current->comm, current->pid, "rt_sigreturn", + (long)uc, regs->nip, regs->link); + + force_sig(SIGSEGV, current); + return 0; +} + +int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs) +{ + /* Handler is *really* a pointer to the function descriptor for + * the signal routine. The first entry in the function + * descriptor is the entry address of signal and the second + * entry is the TOC value we need to use. + */ + func_descr_t __user *funct_desc_ptr; + struct rt_sigframe __user *frame; + unsigned long newsp = 0; + long err = 0; + + frame = get_sigframe(ka, regs, sizeof(*frame), 0); + if (unlikely(frame == NULL)) + goto badframe; + + err |= __put_user(&frame->info, &frame->pinfo); + err |= __put_user(&frame->uc, &frame->puc); + err |= copy_siginfo_to_user(&frame->info, info); + if (err) + goto badframe; + + /* Create the ucontext. */ + err |= __put_user(0, &frame->uc.uc_flags); + err |= __put_user(0, &frame->uc.uc_link); + err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + err |= __put_user(sas_ss_flags(regs->gpr[1]), + &frame->uc.uc_stack.ss_flags); + err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, signr, NULL, + (unsigned long)ka->sa.sa_handler, 1); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) + goto badframe; + + /* Make sure signal handler doesn't get spurious FP exceptions */ + current->thread.fpscr.val = 0; + + /* Set up to return from userspace. */ + if (vdso64_rt_sigtramp && current->mm->context.vdso_base) { + regs->link = current->mm->context.vdso_base + vdso64_rt_sigtramp; + } else { + err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]); + if (err) + goto badframe; + regs->link = (unsigned long) &frame->tramp[0]; + } + funct_desc_ptr = (func_descr_t __user *) ka->sa.sa_handler; + + /* Allocate a dummy caller frame for the signal handler. */ + newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; + err |= put_user(regs->gpr[1], (unsigned long __user *)newsp); + + /* Set up "regs" so we "return" to the signal handler. */ + err |= get_user(regs->nip, &funct_desc_ptr->entry); + /* enter the signal handler in big-endian mode */ + regs->msr &= ~MSR_LE; + regs->gpr[1] = newsp; + err |= get_user(regs->gpr[2], &funct_desc_ptr->toc); + regs->gpr[3] = signr; + regs->result = 0; + if (ka->sa.sa_flags & SA_SIGINFO) { + err |= get_user(regs->gpr[4], (unsigned long __user *)&frame->pinfo); + err |= get_user(regs->gpr[5], (unsigned long __user *)&frame->puc); + regs->gpr[6] = (unsigned long) frame; + } else { + regs->gpr[4] = (unsigned long)&frame->uc.uc_mcontext; + } + if (err) + goto badframe; + + return 1; + +badframe: +#if DEBUG_SIG + printk("badframe in setup_rt_frame, regs=%p frame=%p newsp=%lx\n", + regs, frame, newsp); +#endif + if (show_unhandled_signals) + printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32, + current->comm, current->pid, "setup_rt_frame", + (long)frame, regs->nip, regs->link); + + force_sigsegv(signr, current); + return 0; +} diff --git a/arch/powerpc/kernel/smp-tbsync.c b/arch/powerpc/kernel/smp-tbsync.c new file mode 100644 index 00000000..640de836 --- /dev/null +++ b/arch/powerpc/kernel/smp-tbsync.c @@ -0,0 +1,171 @@ +/* + * Smp timebase synchronization for ppc. + * + * Copyright (C) 2003 Samuel Rydh (samuel@ibrium.se) + * + */ + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/unistd.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/atomic.h> +#include <asm/smp.h> +#include <asm/time.h> + +#define NUM_ITER 300 + +enum { + kExit=0, kSetAndTest, kTest +}; + +static struct { + volatile u64 tb; + volatile u64 mark; + volatile int cmd; + volatile int handshake; + int filler[2]; + + volatile int ack; + int filler2[7]; + + volatile int race_result; +} *tbsync; + +static volatile int running; + +static void __devinit enter_contest(u64 mark, long add) +{ + while (get_tb() < mark) + tbsync->race_result = add; +} + +void __devinit smp_generic_take_timebase(void) +{ + int cmd; + u64 tb; + unsigned long flags; + + local_irq_save(flags); + while (!running) + barrier(); + rmb(); + + for (;;) { + tbsync->ack = 1; + while (!tbsync->handshake) + barrier(); + rmb(); + + cmd = tbsync->cmd; + tb = tbsync->tb; + mb(); + tbsync->ack = 0; + if (cmd == kExit) + break; + + while (tbsync->handshake) + barrier(); + if (cmd == kSetAndTest) + set_tb(tb >> 32, tb & 0xfffffffful); + enter_contest(tbsync->mark, -1); + } + local_irq_restore(flags); +} + +static int __devinit start_contest(int cmd, long offset, int num) +{ + int i, score=0; + u64 tb; + u64 mark; + + tbsync->cmd = cmd; + + local_irq_disable(); + for (i = -3; i < num; ) { + tb = get_tb() + 400; + tbsync->tb = tb + offset; + tbsync->mark = mark = tb + 400; + + wmb(); + + tbsync->handshake = 1; + while (tbsync->ack) + barrier(); + + while (get_tb() <= tb) + barrier(); + tbsync->handshake = 0; + enter_contest(mark, 1); + + while (!tbsync->ack) + barrier(); + + if (i++ > 0) + score += tbsync->race_result; + } + local_irq_enable(); + return score; +} + +void __devinit smp_generic_give_timebase(void) +{ + int i, score, score2, old, min=0, max=5000, offset=1000; + + pr_debug("Software timebase sync\n"); + + /* if this fails then this kernel won't work anyway... */ + tbsync = kzalloc( sizeof(*tbsync), GFP_KERNEL ); + mb(); + running = 1; + + while (!tbsync->ack) + barrier(); + + pr_debug("Got ack\n"); + + /* binary search */ + for (old = -1; old != offset ; offset = (min+max) / 2) { + score = start_contest(kSetAndTest, offset, NUM_ITER); + + pr_debug("score %d, offset %d\n", score, offset ); + + if( score > 0 ) + max = offset; + else + min = offset; + old = offset; + } + score = start_contest(kSetAndTest, min, NUM_ITER); + score2 = start_contest(kSetAndTest, max, NUM_ITER); + + pr_debug("Min %d (score %d), Max %d (score %d)\n", + min, score, max, score2); + score = abs(score); + score2 = abs(score2); + offset = (score < score2) ? min : max; + + /* guard against inaccurate mttb */ + for (i = 0; i < 10; i++) { + start_contest(kSetAndTest, offset, NUM_ITER/10); + + if ((score2 = start_contest(kTest, offset, NUM_ITER)) < 0) + score2 = -score2; + if (score2 <= score || score2 < 20) + break; + } + pr_debug("Final offset: %d (%d/%d)\n", offset, score2, NUM_ITER ); + + /* exiting */ + tbsync->cmd = kExit; + wmb(); + tbsync->handshake = 1; + while (tbsync->ack) + barrier(); + tbsync->handshake = 0; + kfree(tbsync); + tbsync = NULL; + running = 0; +} diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c new file mode 100644 index 00000000..d9f94410 --- /dev/null +++ b/arch/powerpc/kernel/smp.c @@ -0,0 +1,785 @@ +/* + * SMP support for ppc. + * + * Written by Cort Dougan (cort@cs.nmt.edu) borrowing a great + * deal of code from the sparc and intel versions. + * + * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu> + * + * PowerPC-64 Support added by Dave Engebretsen, Peter Bergner, and + * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG + +#include <linux/kernel.h> +#include <linux/export.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/cache.h> +#include <linux/err.h> +#include <linux/device.h> +#include <linux/cpu.h> +#include <linux/notifier.h> +#include <linux/topology.h> + +#include <asm/ptrace.h> +#include <linux/atomic.h> +#include <asm/irq.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/prom.h> +#include <asm/smp.h> +#include <asm/time.h> +#include <asm/machdep.h> +#include <asm/cputhreads.h> +#include <asm/cputable.h> +#include <asm/mpic.h> +#include <asm/vdso_datapage.h> +#ifdef CONFIG_PPC64 +#include <asm/paca.h> +#endif +#include <asm/debug.h> + +#ifdef DEBUG +#include <asm/udbg.h> +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + + +/* Store all idle threads, this can be reused instead of creating +* a new thread. Also avoids complicated thread destroy functionality +* for idle threads. +*/ +#ifdef CONFIG_HOTPLUG_CPU +/* + * Needed only for CONFIG_HOTPLUG_CPU because __cpuinitdata is + * removed after init for !CONFIG_HOTPLUG_CPU. + */ +static DEFINE_PER_CPU(struct task_struct *, idle_thread_array); +#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x)) +#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p)) + +/* State of each CPU during hotplug phases */ +static DEFINE_PER_CPU(int, cpu_state) = { 0 }; + +#else +static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; +#define get_idle_for_cpu(x) (idle_thread_array[(x)]) +#define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p)) +#endif + +struct thread_info *secondary_ti; + +DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); +DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); + +EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); +EXPORT_PER_CPU_SYMBOL(cpu_core_map); + +/* SMP operations for this machine */ +struct smp_ops_t *smp_ops; + +/* Can't be static due to PowerMac hackery */ +volatile unsigned int cpu_callin_map[NR_CPUS]; + +int smt_enabled_at_boot = 1; + +static void (*crash_ipi_function_ptr)(struct pt_regs *) = NULL; + +#ifdef CONFIG_PPC64 +int __devinit smp_generic_kick_cpu(int nr) +{ + BUG_ON(nr < 0 || nr >= NR_CPUS); + + /* + * The processor is currently spinning, waiting for the + * cpu_start field to become non-zero After we set cpu_start, + * the processor will continue on to secondary_start + */ + if (!paca[nr].cpu_start) { + paca[nr].cpu_start = 1; + smp_mb(); + return 0; + } + +#ifdef CONFIG_HOTPLUG_CPU + /* + * Ok it's not there, so it might be soft-unplugged, let's + * try to bring it back + */ + per_cpu(cpu_state, nr) = CPU_UP_PREPARE; + smp_wmb(); + smp_send_reschedule(nr); +#endif /* CONFIG_HOTPLUG_CPU */ + + return 0; +} +#endif /* CONFIG_PPC64 */ + +static irqreturn_t call_function_action(int irq, void *data) +{ + generic_smp_call_function_interrupt(); + return IRQ_HANDLED; +} + +static irqreturn_t reschedule_action(int irq, void *data) +{ + scheduler_ipi(); + return IRQ_HANDLED; +} + +static irqreturn_t call_function_single_action(int irq, void *data) +{ + generic_smp_call_function_single_interrupt(); + return IRQ_HANDLED; +} + +static irqreturn_t debug_ipi_action(int irq, void *data) +{ + if (crash_ipi_function_ptr) { + crash_ipi_function_ptr(get_irq_regs()); + return IRQ_HANDLED; + } + +#ifdef CONFIG_DEBUGGER + debugger_ipi(get_irq_regs()); +#endif /* CONFIG_DEBUGGER */ + + return IRQ_HANDLED; +} + +static irq_handler_t smp_ipi_action[] = { + [PPC_MSG_CALL_FUNCTION] = call_function_action, + [PPC_MSG_RESCHEDULE] = reschedule_action, + [PPC_MSG_CALL_FUNC_SINGLE] = call_function_single_action, + [PPC_MSG_DEBUGGER_BREAK] = debug_ipi_action, +}; + +const char *smp_ipi_name[] = { + [PPC_MSG_CALL_FUNCTION] = "ipi call function", + [PPC_MSG_RESCHEDULE] = "ipi reschedule", + [PPC_MSG_CALL_FUNC_SINGLE] = "ipi call function single", + [PPC_MSG_DEBUGGER_BREAK] = "ipi debugger", +}; + +/* optional function to request ipi, for controllers with >= 4 ipis */ +int smp_request_message_ipi(int virq, int msg) +{ + int err; + + if (msg < 0 || msg > PPC_MSG_DEBUGGER_BREAK) { + return -EINVAL; + } +#if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC) + if (msg == PPC_MSG_DEBUGGER_BREAK) { + return 1; + } +#endif + err = request_irq(virq, smp_ipi_action[msg], + IRQF_PERCPU | IRQF_NO_THREAD, + smp_ipi_name[msg], 0); + WARN(err < 0, "unable to request_irq %d for %s (rc %d)\n", + virq, smp_ipi_name[msg], err); + + return err; +} + +#ifdef CONFIG_PPC_SMP_MUXED_IPI +struct cpu_messages { + int messages; /* current messages */ + unsigned long data; /* data for cause ipi */ +}; +static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message); + +void smp_muxed_ipi_set_data(int cpu, unsigned long data) +{ + struct cpu_messages *info = &per_cpu(ipi_message, cpu); + + info->data = data; +} + +void smp_muxed_ipi_message_pass(int cpu, int msg) +{ + struct cpu_messages *info = &per_cpu(ipi_message, cpu); + char *message = (char *)&info->messages; + + message[msg] = 1; + mb(); + smp_ops->cause_ipi(cpu, info->data); +} + +irqreturn_t smp_ipi_demux(void) +{ + struct cpu_messages *info = &__get_cpu_var(ipi_message); + unsigned int all; + + mb(); /* order any irq clear */ + + do { + all = xchg_local(&info->messages, 0); + +#ifdef __BIG_ENDIAN + if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNCTION))) + generic_smp_call_function_interrupt(); + if (all & (1 << (24 - 8 * PPC_MSG_RESCHEDULE))) + scheduler_ipi(); + if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNC_SINGLE))) + generic_smp_call_function_single_interrupt(); + if (all & (1 << (24 - 8 * PPC_MSG_DEBUGGER_BREAK))) + debug_ipi_action(0, NULL); +#else +#error Unsupported ENDIAN +#endif + } while (info->messages); + + return IRQ_HANDLED; +} +#endif /* CONFIG_PPC_SMP_MUXED_IPI */ + +static inline void do_message_pass(int cpu, int msg) +{ + if (smp_ops->message_pass) + smp_ops->message_pass(cpu, msg); +#ifdef CONFIG_PPC_SMP_MUXED_IPI + else + smp_muxed_ipi_message_pass(cpu, msg); +#endif +} + +void smp_send_reschedule(int cpu) +{ + if (likely(smp_ops)) + do_message_pass(cpu, PPC_MSG_RESCHEDULE); +} +EXPORT_SYMBOL_GPL(smp_send_reschedule); + +void arch_send_call_function_single_ipi(int cpu) +{ + do_message_pass(cpu, PPC_MSG_CALL_FUNC_SINGLE); +} + +void arch_send_call_function_ipi_mask(const struct cpumask *mask) +{ + unsigned int cpu; + + for_each_cpu(cpu, mask) + do_message_pass(cpu, PPC_MSG_CALL_FUNCTION); +} + +#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) +void smp_send_debugger_break(void) +{ + int cpu; + int me = raw_smp_processor_id(); + + if (unlikely(!smp_ops)) + return; + + for_each_online_cpu(cpu) + if (cpu != me) + do_message_pass(cpu, PPC_MSG_DEBUGGER_BREAK); +} +#endif + +#ifdef CONFIG_KEXEC +void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) +{ + crash_ipi_function_ptr = crash_ipi_callback; + if (crash_ipi_callback) { + mb(); + smp_send_debugger_break(); + } +} +#endif + +static void stop_this_cpu(void *dummy) +{ + /* Remove this CPU */ + set_cpu_online(smp_processor_id(), false); + + local_irq_disable(); + while (1) + ; +} + +void smp_send_stop(void) +{ + smp_call_function(stop_this_cpu, NULL, 0); +} + +struct thread_info *current_set[NR_CPUS]; + +static void __devinit smp_store_cpu_info(int id) +{ + per_cpu(cpu_pvr, id) = mfspr(SPRN_PVR); +#ifdef CONFIG_PPC_FSL_BOOK3E + per_cpu(next_tlbcam_idx, id) + = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1; +#endif +} + +void __init smp_prepare_cpus(unsigned int max_cpus) +{ + unsigned int cpu; + + DBG("smp_prepare_cpus\n"); + + /* + * setup_cpu may need to be called on the boot cpu. We havent + * spun any cpus up but lets be paranoid. + */ + BUG_ON(boot_cpuid != smp_processor_id()); + + /* Fixup boot cpu */ + smp_store_cpu_info(boot_cpuid); + cpu_callin_map[boot_cpuid] = 1; + + for_each_possible_cpu(cpu) { + zalloc_cpumask_var_node(&per_cpu(cpu_sibling_map, cpu), + GFP_KERNEL, cpu_to_node(cpu)); + zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu), + GFP_KERNEL, cpu_to_node(cpu)); + } + + cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid)); + cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid)); + + if (smp_ops) + if (smp_ops->probe) + max_cpus = smp_ops->probe(); + else + max_cpus = NR_CPUS; + else + max_cpus = 1; +} + +void __devinit smp_prepare_boot_cpu(void) +{ + BUG_ON(smp_processor_id() != boot_cpuid); +#ifdef CONFIG_PPC64 + paca[boot_cpuid].__current = current; +#endif + current_set[boot_cpuid] = task_thread_info(current); +} + +#ifdef CONFIG_HOTPLUG_CPU + +int generic_cpu_disable(void) +{ + unsigned int cpu = smp_processor_id(); + + if (cpu == boot_cpuid) + return -EBUSY; + + set_cpu_online(cpu, false); +#ifdef CONFIG_PPC64 + vdso_data->processorCount--; +#endif + migrate_irqs(); + return 0; +} + +void generic_cpu_die(unsigned int cpu) +{ + int i; + + for (i = 0; i < 100; i++) { + smp_rmb(); + if (per_cpu(cpu_state, cpu) == CPU_DEAD) + return; + msleep(100); + } + printk(KERN_ERR "CPU%d didn't die...\n", cpu); +} + +void generic_mach_cpu_die(void) +{ + unsigned int cpu; + + local_irq_disable(); + idle_task_exit(); + cpu = smp_processor_id(); + printk(KERN_DEBUG "CPU%d offline\n", cpu); + __get_cpu_var(cpu_state) = CPU_DEAD; + smp_wmb(); + while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE) + cpu_relax(); +} + +void generic_set_cpu_dead(unsigned int cpu) +{ + per_cpu(cpu_state, cpu) = CPU_DEAD; +} + +int generic_check_cpu_restart(unsigned int cpu) +{ + return per_cpu(cpu_state, cpu) == CPU_UP_PREPARE; +} +#endif + +struct create_idle { + struct work_struct work; + struct task_struct *idle; + struct completion done; + int cpu; +}; + +static void __cpuinit do_fork_idle(struct work_struct *work) +{ + struct create_idle *c_idle = + container_of(work, struct create_idle, work); + + c_idle->idle = fork_idle(c_idle->cpu); + complete(&c_idle->done); +} + +static int __cpuinit create_idle(unsigned int cpu) +{ + struct thread_info *ti; + struct create_idle c_idle = { + .cpu = cpu, + .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), + }; + INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle); + + c_idle.idle = get_idle_for_cpu(cpu); + + /* We can't use kernel_thread since we must avoid to + * reschedule the child. We use a workqueue because + * we want to fork from a kernel thread, not whatever + * userspace process happens to be trying to online us. + */ + if (!c_idle.idle) { + schedule_work(&c_idle.work); + wait_for_completion(&c_idle.done); + } else + init_idle(c_idle.idle, cpu); + if (IS_ERR(c_idle.idle)) { + pr_err("Failed fork for CPU %u: %li", cpu, PTR_ERR(c_idle.idle)); + return PTR_ERR(c_idle.idle); + } + ti = task_thread_info(c_idle.idle); + +#ifdef CONFIG_PPC64 + paca[cpu].__current = c_idle.idle; + paca[cpu].kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD; +#endif + ti->cpu = cpu; + current_set[cpu] = ti; + + return 0; +} + +int __cpuinit __cpu_up(unsigned int cpu) +{ + int rc, c; + + if (smp_ops == NULL || + (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu))) + return -EINVAL; + + /* Make sure we have an idle thread */ + rc = create_idle(cpu); + if (rc) + return rc; + + secondary_ti = current_set[cpu]; + + /* Make sure callin-map entry is 0 (can be leftover a CPU + * hotplug + */ + cpu_callin_map[cpu] = 0; + + /* The information for processor bringup must + * be written out to main store before we release + * the processor. + */ + smp_mb(); + + /* wake up cpus */ + DBG("smp: kicking cpu %d\n", cpu); + rc = smp_ops->kick_cpu(cpu); + if (rc) { + pr_err("smp: failed starting cpu %d (rc %d)\n", cpu, rc); + return rc; + } + + /* + * wait to see if the cpu made a callin (is actually up). + * use this value that I found through experimentation. + * -- Cort + */ + if (system_state < SYSTEM_RUNNING) + for (c = 50000; c && !cpu_callin_map[cpu]; c--) + udelay(100); +#ifdef CONFIG_HOTPLUG_CPU + else + /* + * CPUs can take much longer to come up in the + * hotplug case. Wait five seconds. + */ + for (c = 5000; c && !cpu_callin_map[cpu]; c--) + msleep(1); +#endif + + if (!cpu_callin_map[cpu]) { + printk(KERN_ERR "Processor %u is stuck.\n", cpu); + return -ENOENT; + } + + DBG("Processor %u found.\n", cpu); + + if (smp_ops->give_timebase) + smp_ops->give_timebase(); + + /* Wait until cpu puts itself in the online map */ + while (!cpu_online(cpu)) + cpu_relax(); + + return 0; +} + +/* Return the value of the reg property corresponding to the given + * logical cpu. + */ +int cpu_to_core_id(int cpu) +{ + struct device_node *np; + const int *reg; + int id = -1; + + np = of_get_cpu_node(cpu, NULL); + if (!np) + goto out; + + reg = of_get_property(np, "reg", NULL); + if (!reg) + goto out; + + id = *reg; +out: + of_node_put(np); + return id; +} + +/* Helper routines for cpu to core mapping */ +int cpu_core_index_of_thread(int cpu) +{ + return cpu >> threads_shift; +} +EXPORT_SYMBOL_GPL(cpu_core_index_of_thread); + +int cpu_first_thread_of_core(int core) +{ + return core << threads_shift; +} +EXPORT_SYMBOL_GPL(cpu_first_thread_of_core); + +/* Must be called when no change can occur to cpu_present_mask, + * i.e. during cpu online or offline. + */ +static struct device_node *cpu_to_l2cache(int cpu) +{ + struct device_node *np; + struct device_node *cache; + + if (!cpu_present(cpu)) + return NULL; + + np = of_get_cpu_node(cpu, NULL); + if (np == NULL) + return NULL; + + cache = of_find_next_cache_node(np); + + of_node_put(np); + + return cache; +} + +/* Activate a secondary processor. */ +void __devinit start_secondary(void *unused) +{ + unsigned int cpu = smp_processor_id(); + struct device_node *l2_cache; + int i, base; + + atomic_inc(&init_mm.mm_count); + current->active_mm = &init_mm; + + smp_store_cpu_info(cpu); + set_dec(tb_ticks_per_jiffy); + preempt_disable(); + cpu_callin_map[cpu] = 1; + + if (smp_ops->setup_cpu) + smp_ops->setup_cpu(cpu); + if (smp_ops->take_timebase) + smp_ops->take_timebase(); + + secondary_cpu_time_init(); + +#ifdef CONFIG_PPC64 + if (system_state == SYSTEM_RUNNING) + vdso_data->processorCount++; +#endif + ipi_call_lock(); + notify_cpu_starting(cpu); + set_cpu_online(cpu, true); + /* Update sibling maps */ + base = cpu_first_thread_sibling(cpu); + for (i = 0; i < threads_per_core; i++) { + if (cpu_is_offline(base + i)) + continue; + cpumask_set_cpu(cpu, cpu_sibling_mask(base + i)); + cpumask_set_cpu(base + i, cpu_sibling_mask(cpu)); + + /* cpu_core_map should be a superset of + * cpu_sibling_map even if we don't have cache + * information, so update the former here, too. + */ + cpumask_set_cpu(cpu, cpu_core_mask(base + i)); + cpumask_set_cpu(base + i, cpu_core_mask(cpu)); + } + l2_cache = cpu_to_l2cache(cpu); + for_each_online_cpu(i) { + struct device_node *np = cpu_to_l2cache(i); + if (!np) + continue; + if (np == l2_cache) { + cpumask_set_cpu(cpu, cpu_core_mask(i)); + cpumask_set_cpu(i, cpu_core_mask(cpu)); + } + of_node_put(np); + } + of_node_put(l2_cache); + ipi_call_unlock(); + + local_irq_enable(); + + cpu_idle(); + + BUG(); +} + +int setup_profiling_timer(unsigned int multiplier) +{ + return 0; +} + +void __init smp_cpus_done(unsigned int max_cpus) +{ + cpumask_var_t old_mask; + + /* We want the setup_cpu() here to be called from CPU 0, but our + * init thread may have been "borrowed" by another CPU in the meantime + * se we pin us down to CPU 0 for a short while + */ + alloc_cpumask_var(&old_mask, GFP_NOWAIT); + cpumask_copy(old_mask, tsk_cpus_allowed(current)); + set_cpus_allowed_ptr(current, cpumask_of(boot_cpuid)); + + if (smp_ops && smp_ops->setup_cpu) + smp_ops->setup_cpu(boot_cpuid); + + set_cpus_allowed_ptr(current, old_mask); + + free_cpumask_var(old_mask); + + if (smp_ops && smp_ops->bringup_done) + smp_ops->bringup_done(); + + dump_numa_cpu_topology(); + +} + +int arch_sd_sibling_asym_packing(void) +{ + if (cpu_has_feature(CPU_FTR_ASYM_SMT)) { + printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n"); + return SD_ASYM_PACKING; + } + return 0; +} + +#ifdef CONFIG_HOTPLUG_CPU +int __cpu_disable(void) +{ + struct device_node *l2_cache; + int cpu = smp_processor_id(); + int base, i; + int err; + + if (!smp_ops->cpu_disable) + return -ENOSYS; + + err = smp_ops->cpu_disable(); + if (err) + return err; + + /* Update sibling maps */ + base = cpu_first_thread_sibling(cpu); + for (i = 0; i < threads_per_core; i++) { + cpumask_clear_cpu(cpu, cpu_sibling_mask(base + i)); + cpumask_clear_cpu(base + i, cpu_sibling_mask(cpu)); + cpumask_clear_cpu(cpu, cpu_core_mask(base + i)); + cpumask_clear_cpu(base + i, cpu_core_mask(cpu)); + } + + l2_cache = cpu_to_l2cache(cpu); + for_each_present_cpu(i) { + struct device_node *np = cpu_to_l2cache(i); + if (!np) + continue; + if (np == l2_cache) { + cpumask_clear_cpu(cpu, cpu_core_mask(i)); + cpumask_clear_cpu(i, cpu_core_mask(cpu)); + } + of_node_put(np); + } + of_node_put(l2_cache); + + + return 0; +} + +void __cpu_die(unsigned int cpu) +{ + if (smp_ops->cpu_die) + smp_ops->cpu_die(cpu); +} + +static DEFINE_MUTEX(powerpc_cpu_hotplug_driver_mutex); + +void cpu_hotplug_driver_lock() +{ + mutex_lock(&powerpc_cpu_hotplug_driver_mutex); +} + +void cpu_hotplug_driver_unlock() +{ + mutex_unlock(&powerpc_cpu_hotplug_driver_mutex); +} + +void cpu_die(void) +{ + if (ppc_md.cpu_die) + ppc_md.cpu_die(); + + /* If we return, we re-enter start_secondary */ + start_secondary_resume(); +} + +#endif diff --git a/arch/powerpc/kernel/softemu8xx.c b/arch/powerpc/kernel/softemu8xx.c new file mode 100644 index 00000000..29b2f81d --- /dev/null +++ b/arch/powerpc/kernel/softemu8xx.c @@ -0,0 +1,199 @@ +/* + * Software emulation of some PPC instructions for the 8xx core. + * + * Copyright (C) 1998 Dan Malek (dmalek@jlc.net) + * + * Software floating emuation for the MPC8xx processor. I did this mostly + * because it was easier than trying to get the libraries compiled for + * software floating point. The goal is still to get the libraries done, + * but I lost patience and needed some hacks to at least get init and + * shells running. The first problem is the setjmp/longjmp that save + * and restore the floating point registers. + * + * For this emulation, our working registers are found on the register + * save area. + */ + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/ptrace.h> +#include <linux/user.h> +#include <linux/interrupt.h> + +#include <asm/pgtable.h> +#include <asm/uaccess.h> +#include <asm/io.h> + +/* Eventually we may need a look-up table, but this works for now. +*/ +#define LFS 48 +#define LFD 50 +#define LFDU 51 +#define STFD 54 +#define STFDU 55 +#define FMR 63 + +void print_8xx_pte(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + + printk(" pte @ 0x%8lx: ", addr); + pgd = pgd_offset(mm, addr & PAGE_MASK); + if (pgd) { + pmd = pmd_offset(pud_offset(pgd, addr & PAGE_MASK), + addr & PAGE_MASK); + if (pmd && pmd_present(*pmd)) { + pte = pte_offset_kernel(pmd, addr & PAGE_MASK); + if (pte) { + printk(" (0x%08lx)->(0x%08lx)->0x%08lx\n", + (long)pgd, (long)pte, (long)pte_val(*pte)); +#define pp ((long)pte_val(*pte)) + printk(" RPN: %05lx PP: %lx SPS: %lx SH: %lx " + "CI: %lx v: %lx\n", + pp>>12, /* rpn */ + (pp>>10)&3, /* pp */ + (pp>>3)&1, /* small */ + (pp>>2)&1, /* shared */ + (pp>>1)&1, /* cache inhibit */ + pp&1 /* valid */ + ); +#undef pp + } + else { + printk("no pte\n"); + } + } + else { + printk("no pmd\n"); + } + } + else { + printk("no pgd\n"); + } +} + +int get_8xx_pte(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + int retval = 0; + + pgd = pgd_offset(mm, addr & PAGE_MASK); + if (pgd) { + pmd = pmd_offset(pud_offset(pgd, addr & PAGE_MASK), + addr & PAGE_MASK); + if (pmd && pmd_present(*pmd)) { + pte = pte_offset_kernel(pmd, addr & PAGE_MASK); + if (pte) { + retval = (int)pte_val(*pte); + } + } + } + return retval; +} + +/* + * We return 0 on success, 1 on unimplemented instruction, and EFAULT + * if a load/store faulted. + */ +int Soft_emulate_8xx(struct pt_regs *regs) +{ + u32 inst, instword; + u32 flreg, idxreg, disp; + int retval; + s16 sdisp; + u32 *ea, *ip; + + retval = 0; + + instword = *((u32 *)regs->nip); + inst = instword >> 26; + + flreg = (instword >> 21) & 0x1f; + idxreg = (instword >> 16) & 0x1f; + disp = instword & 0xffff; + + ea = (u32 *)(regs->gpr[idxreg] + disp); + ip = (u32 *)¤t->thread.TS_FPR(flreg); + + switch ( inst ) + { + case LFD: + /* this is a 16 bit quantity that is sign extended + * so use a signed short here -- Cort + */ + sdisp = (instword & 0xffff); + ea = (u32 *)(regs->gpr[idxreg] + sdisp); + if (copy_from_user(ip, ea, sizeof(double))) + retval = -EFAULT; + break; + + case LFDU: + if (copy_from_user(ip, ea, sizeof(double))) + retval = -EFAULT; + else + regs->gpr[idxreg] = (u32)ea; + break; + case LFS: + sdisp = (instword & 0xffff); + ea = (u32 *)(regs->gpr[idxreg] + sdisp); + if (copy_from_user(ip, ea, sizeof(float))) + retval = -EFAULT; + break; + case STFD: + /* this is a 16 bit quantity that is sign extended + * so use a signed short here -- Cort + */ + sdisp = (instword & 0xffff); + ea = (u32 *)(regs->gpr[idxreg] + sdisp); + if (copy_to_user(ea, ip, sizeof(double))) + retval = -EFAULT; + break; + + case STFDU: + if (copy_to_user(ea, ip, sizeof(double))) + retval = -EFAULT; + else + regs->gpr[idxreg] = (u32)ea; + break; + case FMR: + /* assume this is a fp move -- Cort */ + memcpy(ip, ¤t->thread.TS_FPR((instword>>11)&0x1f), + sizeof(double)); + break; + default: + retval = 1; + printk("Bad emulation %s/%d\n" + " NIP: %08lx instruction: %08x opcode: %x " + "A: %x B: %x C: %x code: %x rc: %x\n", + current->comm,current->pid, + regs->nip, + instword,inst, + (instword>>16)&0x1f, + (instword>>11)&0x1f, + (instword>>6)&0x1f, + (instword>>1)&0x3ff, + instword&1); + { + int pa; + print_8xx_pte(current->mm,regs->nip); + pa = get_8xx_pte(current->mm,regs->nip) & PAGE_MASK; + pa |= (regs->nip & ~PAGE_MASK); + pa = (unsigned long)__va(pa); + printk("Kernel VA for NIP %x ", pa); + print_8xx_pte(current->mm,pa); + } + } + + if (retval == 0) + regs->nip += 4; + + return retval; +} diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c new file mode 100644 index 00000000..3d30ef10 --- /dev/null +++ b/arch/powerpc/kernel/stacktrace.c @@ -0,0 +1,63 @@ +/* + * Stack trace utility + * + * Copyright 2008 Christoph Hellwig, IBM Corp. + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/export.h> +#include <linux/sched.h> +#include <linux/stacktrace.h> +#include <asm/ptrace.h> +#include <asm/processor.h> + +/* + * Save stack-backtrace addresses into a stack_trace buffer. + */ +static void save_context_stack(struct stack_trace *trace, unsigned long sp, + struct task_struct *tsk, int savesched) +{ + for (;;) { + unsigned long *stack = (unsigned long *) sp; + unsigned long newsp, ip; + + if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD)) + return; + + newsp = stack[0]; + ip = stack[STACK_FRAME_LR_SAVE]; + + if (savesched || !in_sched_functions(ip)) { + if (!trace->skip) + trace->entries[trace->nr_entries++] = ip; + else + trace->skip--; + } + + if (trace->nr_entries >= trace->max_entries) + return; + + sp = newsp; + } +} + +void save_stack_trace(struct stack_trace *trace) +{ + unsigned long sp; + + asm("mr %0,1" : "=r" (sp)); + + save_context_stack(trace, sp, current, 1); +} +EXPORT_SYMBOL_GPL(save_stack_trace); + +void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +{ + save_context_stack(trace, tsk->thread.ksp, tsk, 0); +} +EXPORT_SYMBOL_GPL(save_stack_trace_tsk); diff --git a/arch/powerpc/kernel/suspend.c b/arch/powerpc/kernel/suspend.c new file mode 100644 index 00000000..0167d53d --- /dev/null +++ b/arch/powerpc/kernel/suspend.c @@ -0,0 +1,25 @@ +/* + * Suspend support specific for power. + * + * Distribute under GPLv2 + * + * Copyright (c) 2002 Pavel Machek <pavel@ucw.cz> + * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org> + */ + +#include <linux/mm.h> +#include <asm/page.h> + +/* References to section boundaries */ +extern const void __nosave_begin, __nosave_end; + +/* + * pfn_is_nosave - check if given pfn is in the 'nosave' section + */ + +int pfn_is_nosave(unsigned long pfn) +{ + unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT; + unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT; + return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); +} diff --git a/arch/powerpc/kernel/swsusp.c b/arch/powerpc/kernel/swsusp.c new file mode 100644 index 00000000..eae33e10 --- /dev/null +++ b/arch/powerpc/kernel/swsusp.c @@ -0,0 +1,38 @@ +/* + * Common powerpc suspend code for 32 and 64 bits + * + * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/sched.h> +#include <asm/current.h> +#include <asm/mmu_context.h> +#include <asm/switch_to.h> + +void save_processor_state(void) +{ + /* + * flush out all the special registers so we don't need + * to save them in the snapshot + */ + flush_fp_to_thread(current); + flush_altivec_to_thread(current); + flush_spe_to_thread(current); + +#ifdef CONFIG_PPC64 + hard_irq_disable(); +#endif + +} + +void restore_processor_state(void) +{ +#ifdef CONFIG_PPC32 + switch_mmu_context(current->active_mm, current->active_mm); +#endif +} diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S new file mode 100644 index 00000000..ba4dee3d --- /dev/null +++ b/arch/powerpc/kernel/swsusp_32.S @@ -0,0 +1,350 @@ +#include <linux/threads.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/mmu.h> + +/* + * Structure for storing CPU registers on the save area. + */ +#define SL_SP 0 +#define SL_PC 4 +#define SL_MSR 8 +#define SL_SDR1 0xc +#define SL_SPRG0 0x10 /* 4 sprg's */ +#define SL_DBAT0 0x20 +#define SL_IBAT0 0x28 +#define SL_DBAT1 0x30 +#define SL_IBAT1 0x38 +#define SL_DBAT2 0x40 +#define SL_IBAT2 0x48 +#define SL_DBAT3 0x50 +#define SL_IBAT3 0x58 +#define SL_TB 0x60 +#define SL_R2 0x68 +#define SL_CR 0x6c +#define SL_LR 0x70 +#define SL_R12 0x74 /* r12 to r31 */ +#define SL_SIZE (SL_R12 + 80) + + .section .data + .align 5 + +_GLOBAL(swsusp_save_area) + .space SL_SIZE + + + .section .text + .align 5 + +_GLOBAL(swsusp_arch_suspend) + + lis r11,swsusp_save_area@h + ori r11,r11,swsusp_save_area@l + + mflr r0 + stw r0,SL_LR(r11) + mfcr r0 + stw r0,SL_CR(r11) + stw r1,SL_SP(r11) + stw r2,SL_R2(r11) + stmw r12,SL_R12(r11) + + /* Save MSR & SDR1 */ + mfmsr r4 + stw r4,SL_MSR(r11) + mfsdr1 r4 + stw r4,SL_SDR1(r11) + + /* Get a stable timebase and save it */ +1: mftbu r4 + stw r4,SL_TB(r11) + mftb r5 + stw r5,SL_TB+4(r11) + mftbu r3 + cmpw r3,r4 + bne 1b + + /* Save SPRGs */ + mfsprg r4,0 + stw r4,SL_SPRG0(r11) + mfsprg r4,1 + stw r4,SL_SPRG0+4(r11) + mfsprg r4,2 + stw r4,SL_SPRG0+8(r11) + mfsprg r4,3 + stw r4,SL_SPRG0+12(r11) + + /* Save BATs */ + mfdbatu r4,0 + stw r4,SL_DBAT0(r11) + mfdbatl r4,0 + stw r4,SL_DBAT0+4(r11) + mfdbatu r4,1 + stw r4,SL_DBAT1(r11) + mfdbatl r4,1 + stw r4,SL_DBAT1+4(r11) + mfdbatu r4,2 + stw r4,SL_DBAT2(r11) + mfdbatl r4,2 + stw r4,SL_DBAT2+4(r11) + mfdbatu r4,3 + stw r4,SL_DBAT3(r11) + mfdbatl r4,3 + stw r4,SL_DBAT3+4(r11) + mfibatu r4,0 + stw r4,SL_IBAT0(r11) + mfibatl r4,0 + stw r4,SL_IBAT0+4(r11) + mfibatu r4,1 + stw r4,SL_IBAT1(r11) + mfibatl r4,1 + stw r4,SL_IBAT1+4(r11) + mfibatu r4,2 + stw r4,SL_IBAT2(r11) + mfibatl r4,2 + stw r4,SL_IBAT2+4(r11) + mfibatu r4,3 + stw r4,SL_IBAT3(r11) + mfibatl r4,3 + stw r4,SL_IBAT3+4(r11) + +#if 0 + /* Backup various CPU config stuffs */ + bl __save_cpu_setup +#endif + /* Call the low level suspend stuff (we should probably have made + * a stackframe... + */ + bl swsusp_save + + /* Restore LR from the save area */ + lis r11,swsusp_save_area@h + ori r11,r11,swsusp_save_area@l + lwz r0,SL_LR(r11) + mtlr r0 + + blr + + +/* Resume code */ +_GLOBAL(swsusp_arch_resume) + +#ifdef CONFIG_ALTIVEC + /* Stop pending alitvec streams and memory accesses */ +BEGIN_FTR_SECTION + DSSALL +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) +#endif + sync + + /* Disable MSR:DR to make sure we don't take a TLB or + * hash miss during the copy, as our hash table will + * for a while be unusable. For .text, we assume we are + * covered by a BAT. This works only for non-G5 at this + * point. G5 will need a better approach, possibly using + * a small temporary hash table filled with large mappings, + * disabling the MMU completely isn't a good option for + * performance reasons. + * (Note that 750's may have the same performance issue as + * the G5 in this case, we should investigate using moving + * BATs for these CPUs) + */ + mfmsr r0 + sync + rlwinm r0,r0,0,28,26 /* clear MSR_DR */ + mtmsr r0 + sync + isync + + /* Load ptr the list of pages to copy in r3 */ + lis r11,(restore_pblist - KERNELBASE)@h + ori r11,r11,restore_pblist@l + lwz r10,0(r11) + + /* Copy the pages. This is a very basic implementation, to + * be replaced by something more cache efficient */ +1: + tophys(r3,r10) + li r0,256 + mtctr r0 + lwz r11,pbe_address(r3) /* source */ + tophys(r5,r11) + lwz r10,pbe_orig_address(r3) /* destination */ + tophys(r6,r10) +2: + lwz r8,0(r5) + lwz r9,4(r5) + lwz r10,8(r5) + lwz r11,12(r5) + addi r5,r5,16 + stw r8,0(r6) + stw r9,4(r6) + stw r10,8(r6) + stw r11,12(r6) + addi r6,r6,16 + bdnz 2b + lwz r10,pbe_next(r3) + cmpwi 0,r10,0 + bne 1b + + /* Do a very simple cache flush/inval of the L1 to ensure + * coherency of the icache + */ + lis r3,0x0002 + mtctr r3 + li r3, 0 +1: + lwz r0,0(r3) + addi r3,r3,0x0020 + bdnz 1b + isync + sync + + /* Now flush those cache lines */ + lis r3,0x0002 + mtctr r3 + li r3, 0 +1: + dcbf 0,r3 + addi r3,r3,0x0020 + bdnz 1b + sync + + /* Ok, we are now running with the kernel data of the old + * kernel fully restored. We can get to the save area + * easily now. As for the rest of the code, it assumes the + * loader kernel and the booted one are exactly identical + */ + lis r11,swsusp_save_area@h + ori r11,r11,swsusp_save_area@l + tophys(r11,r11) + +#if 0 + /* Restore various CPU config stuffs */ + bl __restore_cpu_setup +#endif + /* Restore the BATs, and SDR1. Then we can turn on the MMU. + * This is a bit hairy as we are running out of those BATs, + * but first, our code is probably in the icache, and we are + * writing the same value to the BAT, so that should be fine, + * though a better solution will have to be found long-term + */ + lwz r4,SL_SDR1(r11) + mtsdr1 r4 + lwz r4,SL_SPRG0(r11) + mtsprg 0,r4 + lwz r4,SL_SPRG0+4(r11) + mtsprg 1,r4 + lwz r4,SL_SPRG0+8(r11) + mtsprg 2,r4 + lwz r4,SL_SPRG0+12(r11) + mtsprg 3,r4 + +#if 0 + lwz r4,SL_DBAT0(r11) + mtdbatu 0,r4 + lwz r4,SL_DBAT0+4(r11) + mtdbatl 0,r4 + lwz r4,SL_DBAT1(r11) + mtdbatu 1,r4 + lwz r4,SL_DBAT1+4(r11) + mtdbatl 1,r4 + lwz r4,SL_DBAT2(r11) + mtdbatu 2,r4 + lwz r4,SL_DBAT2+4(r11) + mtdbatl 2,r4 + lwz r4,SL_DBAT3(r11) + mtdbatu 3,r4 + lwz r4,SL_DBAT3+4(r11) + mtdbatl 3,r4 + lwz r4,SL_IBAT0(r11) + mtibatu 0,r4 + lwz r4,SL_IBAT0+4(r11) + mtibatl 0,r4 + lwz r4,SL_IBAT1(r11) + mtibatu 1,r4 + lwz r4,SL_IBAT1+4(r11) + mtibatl 1,r4 + lwz r4,SL_IBAT2(r11) + mtibatu 2,r4 + lwz r4,SL_IBAT2+4(r11) + mtibatl 2,r4 + lwz r4,SL_IBAT3(r11) + mtibatu 3,r4 + lwz r4,SL_IBAT3+4(r11) + mtibatl 3,r4 +#endif + +BEGIN_MMU_FTR_SECTION + li r4,0 + mtspr SPRN_DBAT4U,r4 + mtspr SPRN_DBAT4L,r4 + mtspr SPRN_DBAT5U,r4 + mtspr SPRN_DBAT5L,r4 + mtspr SPRN_DBAT6U,r4 + mtspr SPRN_DBAT6L,r4 + mtspr SPRN_DBAT7U,r4 + mtspr SPRN_DBAT7L,r4 + mtspr SPRN_IBAT4U,r4 + mtspr SPRN_IBAT4L,r4 + mtspr SPRN_IBAT5U,r4 + mtspr SPRN_IBAT5L,r4 + mtspr SPRN_IBAT6U,r4 + mtspr SPRN_IBAT6L,r4 + mtspr SPRN_IBAT7U,r4 + mtspr SPRN_IBAT7L,r4 +END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) + + /* Flush all TLBs */ + lis r4,0x1000 +1: addic. r4,r4,-0x1000 + tlbie r4 + bgt 1b + sync + + /* restore the MSR and turn on the MMU */ + lwz r3,SL_MSR(r11) + bl turn_on_mmu + tovirt(r11,r11) + + /* Restore TB */ + li r3,0 + mttbl r3 + lwz r3,SL_TB(r11) + lwz r4,SL_TB+4(r11) + mttbu r3 + mttbl r4 + + /* Kick decrementer */ + li r0,1 + mtdec r0 + + /* Restore the callee-saved registers and return */ + lwz r0,SL_CR(r11) + mtcr r0 + lwz r2,SL_R2(r11) + lmw r12,SL_R12(r11) + lwz r1,SL_SP(r11) + lwz r0,SL_LR(r11) + mtlr r0 + + // XXX Note: we don't really need to call swsusp_resume + + li r3,0 + blr + +/* FIXME:This construct is actually not useful since we don't shut + * down the instruction MMU, we could just flip back MSR-DR on. + */ +turn_on_mmu: + mflr r4 + mtsrr0 r4 + mtsrr1 r3 + sync + isync + rfi + diff --git a/arch/powerpc/kernel/swsusp_64.c b/arch/powerpc/kernel/swsusp_64.c new file mode 100644 index 00000000..0e899e47 --- /dev/null +++ b/arch/powerpc/kernel/swsusp_64.c @@ -0,0 +1,24 @@ +/* + * PowerPC 64-bit swsusp implementation + * + * Copyright 2006 Johannes Berg <johannes@sipsolutions.net> + * + * GPLv2 + */ + +#include <asm/iommu.h> +#include <linux/irq.h> +#include <linux/sched.h> +#include <linux/interrupt.h> + +void do_after_copyback(void) +{ + iommu_restore(); + touch_softlockup_watchdog(); + mb(); +} + +void _iommu_save(void) +{ + iommu_save(); +} diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S new file mode 100644 index 00000000..86ac1d90 --- /dev/null +++ b/arch/powerpc/kernel/swsusp_asm64.S @@ -0,0 +1,228 @@ +/* + * PowerPC 64-bit swsusp implementation + * + * Copyright 2006 Johannes Berg <johannes@sipsolutions.net> + * + * GPLv2 + */ + +#include <linux/threads.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> + +/* + * Structure for storing CPU registers on the save area. + */ +#define SL_r1 0x00 /* stack pointer */ +#define SL_PC 0x08 +#define SL_MSR 0x10 +#define SL_SDR1 0x18 +#define SL_XER 0x20 +#define SL_TB 0x40 +#define SL_r2 0x48 +#define SL_CR 0x50 +#define SL_LR 0x58 +#define SL_r12 0x60 +#define SL_r13 0x68 +#define SL_r14 0x70 +#define SL_r15 0x78 +#define SL_r16 0x80 +#define SL_r17 0x88 +#define SL_r18 0x90 +#define SL_r19 0x98 +#define SL_r20 0xa0 +#define SL_r21 0xa8 +#define SL_r22 0xb0 +#define SL_r23 0xb8 +#define SL_r24 0xc0 +#define SL_r25 0xc8 +#define SL_r26 0xd0 +#define SL_r27 0xd8 +#define SL_r28 0xe0 +#define SL_r29 0xe8 +#define SL_r30 0xf0 +#define SL_r31 0xf8 +#define SL_SIZE SL_r31+8 + +/* these macros rely on the save area being + * pointed to by r11 */ +#define SAVE_SPECIAL(special) \ + mf##special r0 ;\ + std r0, SL_##special(r11) +#define RESTORE_SPECIAL(special) \ + ld r0, SL_##special(r11) ;\ + mt##special r0 +#define SAVE_REGISTER(reg) \ + std reg, SL_##reg(r11) +#define RESTORE_REGISTER(reg) \ + ld reg, SL_##reg(r11) + +/* space for storing cpu state */ + .section .data + .align 5 +swsusp_save_area: + .space SL_SIZE + + .section ".toc","aw" +swsusp_save_area_ptr: + .tc swsusp_save_area[TC],swsusp_save_area +restore_pblist_ptr: + .tc restore_pblist[TC],restore_pblist + + .section .text + .align 5 +_GLOBAL(swsusp_arch_suspend) + ld r11,swsusp_save_area_ptr@toc(r2) + SAVE_SPECIAL(LR) + SAVE_REGISTER(r1) + SAVE_SPECIAL(CR) + SAVE_SPECIAL(TB) + SAVE_REGISTER(r2) + SAVE_REGISTER(r12) + SAVE_REGISTER(r13) + SAVE_REGISTER(r14) + SAVE_REGISTER(r15) + SAVE_REGISTER(r16) + SAVE_REGISTER(r17) + SAVE_REGISTER(r18) + SAVE_REGISTER(r19) + SAVE_REGISTER(r20) + SAVE_REGISTER(r21) + SAVE_REGISTER(r22) + SAVE_REGISTER(r23) + SAVE_REGISTER(r24) + SAVE_REGISTER(r25) + SAVE_REGISTER(r26) + SAVE_REGISTER(r27) + SAVE_REGISTER(r28) + SAVE_REGISTER(r29) + SAVE_REGISTER(r30) + SAVE_REGISTER(r31) + SAVE_SPECIAL(MSR) + SAVE_SPECIAL(SDR1) + SAVE_SPECIAL(XER) + + /* we push the stack up 128 bytes but don't store the + * stack pointer on the stack like a real stackframe */ + addi r1,r1,-128 + + bl _iommu_save + bl swsusp_save + + /* restore LR */ + ld r11,swsusp_save_area_ptr@toc(r2) + RESTORE_SPECIAL(LR) + addi r1,r1,128 + + blr + +/* Resume code */ +_GLOBAL(swsusp_arch_resume) + /* Stop pending alitvec streams and memory accesses */ +BEGIN_FTR_SECTION + DSSALL +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + sync + + ld r12,restore_pblist_ptr@toc(r2) + ld r12,0(r12) + + cmpdi r12,0 + beq- nothing_to_copy + li r15,PAGE_SIZE>>3 +copyloop: + ld r13,pbe_address(r12) + ld r14,pbe_orig_address(r12) + + mtctr r15 + li r10,0 +copy_page_loop: + ldx r0,r10,r13 + stdx r0,r10,r14 + addi r10,r10,8 + bdnz copy_page_loop + + ld r12,pbe_next(r12) + cmpdi r12,0 + bne+ copyloop +nothing_to_copy: + + /* flush caches */ + lis r3, 0x10 + mtctr r3 + li r3, 0 + ori r3, r3, CONFIG_KERNEL_START>>48 + li r0, 48 + sld r3, r3, r0 + li r0, 0 +1: + dcbf r0,r3 + addi r3,r3,0x20 + bdnz 1b + + sync + + tlbia + + ld r11,swsusp_save_area_ptr@toc(r2) + + RESTORE_SPECIAL(CR) + + /* restore timebase */ + /* load saved tb */ + ld r1, SL_TB(r11) + /* get upper 32 bits of it */ + srdi r2, r1, 32 + /* clear tb lower to avoid wrap */ + li r0, 0 + mttbl r0 + /* set tb upper */ + mttbu r2 + /* set tb lower */ + mttbl r1 + + /* restore registers */ + RESTORE_REGISTER(r1) + RESTORE_REGISTER(r2) + RESTORE_REGISTER(r12) + RESTORE_REGISTER(r13) + RESTORE_REGISTER(r14) + RESTORE_REGISTER(r15) + RESTORE_REGISTER(r16) + RESTORE_REGISTER(r17) + RESTORE_REGISTER(r18) + RESTORE_REGISTER(r19) + RESTORE_REGISTER(r20) + RESTORE_REGISTER(r21) + RESTORE_REGISTER(r22) + RESTORE_REGISTER(r23) + RESTORE_REGISTER(r24) + RESTORE_REGISTER(r25) + RESTORE_REGISTER(r26) + RESTORE_REGISTER(r27) + RESTORE_REGISTER(r28) + RESTORE_REGISTER(r29) + RESTORE_REGISTER(r30) + RESTORE_REGISTER(r31) + /* can't use RESTORE_SPECIAL(MSR) */ + ld r0, SL_MSR(r11) + mtmsrd r0, 0 + RESTORE_SPECIAL(SDR1) + RESTORE_SPECIAL(XER) + + sync + + addi r1,r1,-128 + bl slb_flush_and_rebolt + bl do_after_copyback + addi r1,r1,128 + + ld r11,swsusp_save_area_ptr@toc(r2) + RESTORE_SPECIAL(LR) + + li r3, 0 + blr diff --git a/arch/powerpc/kernel/swsusp_booke.S b/arch/powerpc/kernel/swsusp_booke.S new file mode 100644 index 00000000..11a39307 --- /dev/null +++ b/arch/powerpc/kernel/swsusp_booke.S @@ -0,0 +1,193 @@ +/* + * Based on swsusp_32.S, modified for FSL BookE by + * Anton Vorontsov <avorontsov@ru.mvista.com> + * Copyright (c) 2009-2010 MontaVista Software, LLC. + */ + +#include <linux/threads.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/mmu.h> + +/* + * Structure for storing CPU registers on the save area. + */ +#define SL_SP 0 +#define SL_PC 4 +#define SL_MSR 8 +#define SL_TCR 0xc +#define SL_SPRG0 0x10 +#define SL_SPRG1 0x14 +#define SL_SPRG2 0x18 +#define SL_SPRG3 0x1c +#define SL_SPRG4 0x20 +#define SL_SPRG5 0x24 +#define SL_SPRG6 0x28 +#define SL_SPRG7 0x2c +#define SL_TBU 0x30 +#define SL_TBL 0x34 +#define SL_R2 0x38 +#define SL_CR 0x3c +#define SL_LR 0x40 +#define SL_R12 0x44 /* r12 to r31 */ +#define SL_SIZE (SL_R12 + 80) + + .section .data + .align 5 + +_GLOBAL(swsusp_save_area) + .space SL_SIZE + + + .section .text + .align 5 + +_GLOBAL(swsusp_arch_suspend) + lis r11,swsusp_save_area@h + ori r11,r11,swsusp_save_area@l + + mflr r0 + stw r0,SL_LR(r11) + mfcr r0 + stw r0,SL_CR(r11) + stw r1,SL_SP(r11) + stw r2,SL_R2(r11) + stmw r12,SL_R12(r11) + + /* Save MSR & TCR */ + mfmsr r4 + stw r4,SL_MSR(r11) + mfspr r4,SPRN_TCR + stw r4,SL_TCR(r11) + + /* Get a stable timebase and save it */ +1: mfspr r4,SPRN_TBRU + stw r4,SL_TBU(r11) + mfspr r5,SPRN_TBRL + stw r5,SL_TBL(r11) + mfspr r3,SPRN_TBRU + cmpw r3,r4 + bne 1b + + /* Save SPRGs */ + mfsprg r4,0 + stw r4,SL_SPRG0(r11) + mfsprg r4,1 + stw r4,SL_SPRG1(r11) + mfsprg r4,2 + stw r4,SL_SPRG2(r11) + mfsprg r4,3 + stw r4,SL_SPRG3(r11) + mfsprg r4,4 + stw r4,SL_SPRG4(r11) + mfsprg r4,5 + stw r4,SL_SPRG5(r11) + mfsprg r4,6 + stw r4,SL_SPRG6(r11) + mfsprg r4,7 + stw r4,SL_SPRG7(r11) + + /* Call the low level suspend stuff (we should probably have made + * a stackframe... + */ + bl swsusp_save + + /* Restore LR from the save area */ + lis r11,swsusp_save_area@h + ori r11,r11,swsusp_save_area@l + lwz r0,SL_LR(r11) + mtlr r0 + + blr + +_GLOBAL(swsusp_arch_resume) + sync + + /* Load ptr the list of pages to copy in r3 */ + lis r11,(restore_pblist)@h + ori r11,r11,restore_pblist@l + lwz r3,0(r11) + + /* Copy the pages. This is a very basic implementation, to + * be replaced by something more cache efficient */ +1: + li r0,256 + mtctr r0 + lwz r5,pbe_address(r3) /* source */ + lwz r6,pbe_orig_address(r3) /* destination */ +2: + lwz r8,0(r5) + lwz r9,4(r5) + lwz r10,8(r5) + lwz r11,12(r5) + addi r5,r5,16 + stw r8,0(r6) + stw r9,4(r6) + stw r10,8(r6) + stw r11,12(r6) + addi r6,r6,16 + bdnz 2b + lwz r3,pbe_next(r3) + cmpwi 0,r3,0 + bne 1b + + bl flush_dcache_L1 + bl flush_instruction_cache + + lis r11,swsusp_save_area@h + ori r11,r11,swsusp_save_area@l + + lwz r4,SL_SPRG0(r11) + mtsprg 0,r4 + lwz r4,SL_SPRG1(r11) + mtsprg 1,r4 + lwz r4,SL_SPRG2(r11) + mtsprg 2,r4 + lwz r4,SL_SPRG3(r11) + mtsprg 3,r4 + lwz r4,SL_SPRG4(r11) + mtsprg 4,r4 + lwz r4,SL_SPRG5(r11) + mtsprg 5,r4 + lwz r4,SL_SPRG6(r11) + mtsprg 6,r4 + lwz r4,SL_SPRG7(r11) + mtsprg 7,r4 + + /* restore the MSR */ + lwz r3,SL_MSR(r11) + mtmsr r3 + + /* Restore TB */ + li r3,0 + mtspr SPRN_TBWL,r3 + lwz r3,SL_TBU(r11) + lwz r4,SL_TBL(r11) + mtspr SPRN_TBWU,r3 + mtspr SPRN_TBWL,r4 + + /* Restore TCR and clear any pending bits in TSR. */ + lwz r4,SL_TCR(r11) + mtspr SPRN_TCR,r4 + lis r4, (TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS)@h + mtspr SPRN_TSR,r4 + + /* Kick decrementer */ + li r0,1 + mtdec r0 + + /* Restore the callee-saved registers and return */ + lwz r0,SL_CR(r11) + mtcr r0 + lwz r2,SL_R2(r11) + lmw r12,SL_R12(r11) + lwz r1,SL_SP(r11) + lwz r0,SL_LR(r11) + mtlr r0 + + li r3,0 + blr diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c new file mode 100644 index 00000000..81c57063 --- /dev/null +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -0,0 +1,626 @@ +/* + * sys_ppc32.c: Conversion between 32bit and 64bit native syscalls. + * + * Copyright (C) 2001 IBM + * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) + * + * These routines maintain argument size conversion between 32bit and 64bit + * environment. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/file.h> +#include <linux/signal.h> +#include <linux/resource.h> +#include <linux/times.h> +#include <linux/smp.h> +#include <linux/sem.h> +#include <linux/msg.h> +#include <linux/shm.h> +#include <linux/poll.h> +#include <linux/personality.h> +#include <linux/stat.h> +#include <linux/mman.h> +#include <linux/in.h> +#include <linux/syscalls.h> +#include <linux/unistd.h> +#include <linux/sysctl.h> +#include <linux/binfmts.h> +#include <linux/security.h> +#include <linux/compat.h> +#include <linux/ptrace.h> +#include <linux/elf.h> +#include <linux/ipc.h> +#include <linux/slab.h> + +#include <asm/ptrace.h> +#include <asm/types.h> +#include <asm/uaccess.h> +#include <asm/unistd.h> +#include <asm/time.h> +#include <asm/mmu_context.h> +#include <asm/ppc-pci.h> +#include <asm/syscalls.h> +#include <asm/switch_to.h> + + +asmlinkage long ppc32_select(u32 n, compat_ulong_t __user *inp, + compat_ulong_t __user *outp, compat_ulong_t __user *exp, + compat_uptr_t tvp_x) +{ + /* sign extend n */ + return compat_sys_select((int)n, inp, outp, exp, compat_ptr(tvp_x)); +} + +/* Note: it is necessary to treat option as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long compat_sys_sysfs(u32 option, u32 arg1, u32 arg2) +{ + return sys_sysfs((int)option, arg1, arg2); +} + +#ifdef CONFIG_SYSVIPC +long compat_sys_ipc(u32 call, u32 first, u32 second, u32 third, compat_uptr_t ptr, + u32 fifth) +{ + int version; + + version = call >> 16; /* hack for backward compatibility */ + call &= 0xffff; + + switch (call) { + + case SEMTIMEDOP: + if (fifth) + /* sign extend semid */ + return compat_sys_semtimedop((int)first, + compat_ptr(ptr), second, + compat_ptr(fifth)); + /* else fall through for normal semop() */ + case SEMOP: + /* struct sembuf is the same on 32 and 64bit :)) */ + /* sign extend semid */ + return sys_semtimedop((int)first, compat_ptr(ptr), second, + NULL); + case SEMGET: + /* sign extend key, nsems */ + return sys_semget((int)first, (int)second, third); + case SEMCTL: + /* sign extend semid, semnum */ + return compat_sys_semctl((int)first, (int)second, third, + compat_ptr(ptr)); + + case MSGSND: + /* sign extend msqid */ + return compat_sys_msgsnd((int)first, (int)second, third, + compat_ptr(ptr)); + case MSGRCV: + /* sign extend msqid, msgtyp */ + return compat_sys_msgrcv((int)first, second, (int)fifth, + third, version, compat_ptr(ptr)); + case MSGGET: + /* sign extend key */ + return sys_msgget((int)first, second); + case MSGCTL: + /* sign extend msqid */ + return compat_sys_msgctl((int)first, second, compat_ptr(ptr)); + + case SHMAT: + /* sign extend shmid */ + return compat_sys_shmat((int)first, second, third, version, + compat_ptr(ptr)); + case SHMDT: + return sys_shmdt(compat_ptr(ptr)); + case SHMGET: + /* sign extend key_t */ + return sys_shmget((int)first, second, third); + case SHMCTL: + /* sign extend shmid */ + return compat_sys_shmctl((int)first, second, compat_ptr(ptr)); + + default: + return -ENOSYS; + } + + return -ENOSYS; +} +#endif + +/* Note: it is necessary to treat out_fd and in_fd as unsigned ints, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long compat_sys_sendfile(u32 out_fd, u32 in_fd, compat_off_t __user * offset, u32 count) +{ + mm_segment_t old_fs = get_fs(); + int ret; + off_t of; + off_t __user *up; + + if (offset && get_user(of, offset)) + return -EFAULT; + + /* The __user pointer cast is valid because of the set_fs() */ + set_fs(KERNEL_DS); + up = offset ? (off_t __user *) &of : NULL; + ret = sys_sendfile((int)out_fd, (int)in_fd, up, count); + set_fs(old_fs); + + if (offset && put_user(of, offset)) + return -EFAULT; + + return ret; +} + +asmlinkage int compat_sys_sendfile64(int out_fd, int in_fd, compat_loff_t __user *offset, s32 count) +{ + mm_segment_t old_fs = get_fs(); + int ret; + loff_t lof; + loff_t __user *up; + + if (offset && get_user(lof, offset)) + return -EFAULT; + + /* The __user pointer cast is valid because of the set_fs() */ + set_fs(KERNEL_DS); + up = offset ? (loff_t __user *) &lof : NULL; + ret = sys_sendfile64(out_fd, in_fd, up, count); + set_fs(old_fs); + + if (offset && put_user(lof, offset)) + return -EFAULT; + + return ret; +} + +long compat_sys_execve(unsigned long a0, unsigned long a1, unsigned long a2, + unsigned long a3, unsigned long a4, unsigned long a5, + struct pt_regs *regs) +{ + int error; + char * filename; + + filename = getname((char __user *) a0); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + goto out; + flush_fp_to_thread(current); + flush_altivec_to_thread(current); + + error = compat_do_execve(filename, compat_ptr(a1), compat_ptr(a2), regs); + + putname(filename); + +out: + return error; +} + +/* Note: it is necessary to treat option as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long compat_sys_prctl(u32 option, u32 arg2, u32 arg3, u32 arg4, u32 arg5) +{ + return sys_prctl((int)option, + (unsigned long) arg2, + (unsigned long) arg3, + (unsigned long) arg4, + (unsigned long) arg5); +} + +/* Note: it is necessary to treat pid as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long compat_sys_sched_rr_get_interval(u32 pid, struct compat_timespec __user *interval) +{ + struct timespec t; + int ret; + mm_segment_t old_fs = get_fs (); + + /* The __user pointer cast is valid because of the set_fs() */ + set_fs (KERNEL_DS); + ret = sys_sched_rr_get_interval((int)pid, (struct timespec __user *) &t); + set_fs (old_fs); + if (put_compat_timespec(&t, interval)) + return -EFAULT; + return ret; +} + +/* Note: it is necessary to treat mode as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long compat_sys_access(const char __user * filename, u32 mode) +{ + return sys_access(filename, (int)mode); +} + + +/* Note: it is necessary to treat mode as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long compat_sys_creat(const char __user * pathname, u32 mode) +{ + return sys_creat(pathname, (int)mode); +} + + +/* Note: it is necessary to treat pid and options as unsigned ints, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long compat_sys_waitpid(u32 pid, unsigned int __user * stat_addr, u32 options) +{ + return sys_waitpid((int)pid, stat_addr, (int)options); +} + + +/* Note: it is necessary to treat gidsetsize as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long compat_sys_getgroups(u32 gidsetsize, gid_t __user *grouplist) +{ + return sys_getgroups((int)gidsetsize, grouplist); +} + + +/* Note: it is necessary to treat pid as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long compat_sys_getpgid(u32 pid) +{ + return sys_getpgid((int)pid); +} + + + +/* Note: it is necessary to treat pid as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long compat_sys_getsid(u32 pid) +{ + return sys_getsid((int)pid); +} + + +/* Note: it is necessary to treat pid and sig as unsigned ints, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long compat_sys_kill(u32 pid, u32 sig) +{ + return sys_kill((int)pid, (int)sig); +} + + +/* Note: it is necessary to treat mode as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long compat_sys_mkdir(const char __user * pathname, u32 mode) +{ + return sys_mkdir(pathname, (int)mode); +} + +long compat_sys_nice(u32 increment) +{ + /* sign extend increment */ + return sys_nice((int)increment); +} + +off_t ppc32_lseek(unsigned int fd, u32 offset, unsigned int origin) +{ + /* sign extend n */ + return sys_lseek(fd, (int)offset, origin); +} + +long compat_sys_truncate(const char __user * path, u32 length) +{ + /* sign extend length */ + return sys_truncate(path, (int)length); +} + +long compat_sys_ftruncate(int fd, u32 length) +{ + /* sign extend length */ + return sys_ftruncate(fd, (int)length); +} + +/* Note: it is necessary to treat bufsiz as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long compat_sys_readlink(const char __user * path, char __user * buf, u32 bufsiz) +{ + return sys_readlink(path, buf, (int)bufsiz); +} + +/* Note: it is necessary to treat option as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long compat_sys_sched_get_priority_max(u32 policy) +{ + return sys_sched_get_priority_max((int)policy); +} + + +/* Note: it is necessary to treat policy as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long compat_sys_sched_get_priority_min(u32 policy) +{ + return sys_sched_get_priority_min((int)policy); +} + + +/* Note: it is necessary to treat pid as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long compat_sys_sched_getparam(u32 pid, struct sched_param __user *param) +{ + return sys_sched_getparam((int)pid, param); +} + + +/* Note: it is necessary to treat pid as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long compat_sys_sched_getscheduler(u32 pid) +{ + return sys_sched_getscheduler((int)pid); +} + + +/* Note: it is necessary to treat pid as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long compat_sys_sched_setparam(u32 pid, struct sched_param __user *param) +{ + return sys_sched_setparam((int)pid, param); +} + + +/* Note: it is necessary to treat pid and policy as unsigned ints, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long compat_sys_sched_setscheduler(u32 pid, u32 policy, struct sched_param __user *param) +{ + return sys_sched_setscheduler((int)pid, (int)policy, param); +} + + +/* Note: it is necessary to treat len as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long compat_sys_setdomainname(char __user *name, u32 len) +{ + return sys_setdomainname(name, (int)len); +} + + +/* Note: it is necessary to treat gidsetsize as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long compat_sys_setgroups(u32 gidsetsize, gid_t __user *grouplist) +{ + return sys_setgroups((int)gidsetsize, grouplist); +} + + +asmlinkage long compat_sys_sethostname(char __user *name, u32 len) +{ + /* sign extend len */ + return sys_sethostname(name, (int)len); +} + + +/* Note: it is necessary to treat pid and pgid as unsigned ints, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long compat_sys_setpgid(u32 pid, u32 pgid) +{ + return sys_setpgid((int)pid, (int)pgid); +} + +long compat_sys_getpriority(u32 which, u32 who) +{ + /* sign extend which and who */ + return sys_getpriority((int)which, (int)who); +} + +long compat_sys_setpriority(u32 which, u32 who, u32 niceval) +{ + /* sign extend which, who and niceval */ + return sys_setpriority((int)which, (int)who, (int)niceval); +} + +long compat_sys_ioprio_get(u32 which, u32 who) +{ + /* sign extend which and who */ + return sys_ioprio_get((int)which, (int)who); +} + +long compat_sys_ioprio_set(u32 which, u32 who, u32 ioprio) +{ + /* sign extend which, who and ioprio */ + return sys_ioprio_set((int)which, (int)who, (int)ioprio); +} + +/* Note: it is necessary to treat newmask as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long compat_sys_ssetmask(u32 newmask) +{ + return sys_ssetmask((int) newmask); +} + +asmlinkage long compat_sys_syslog(u32 type, char __user * buf, u32 len) +{ + /* sign extend len */ + return sys_syslog(type, buf, (int)len); +} + + +/* Note: it is necessary to treat mask as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long compat_sys_umask(u32 mask) +{ + return sys_umask((int)mask); +} + +unsigned long compat_sys_mmap2(unsigned long addr, size_t len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff) +{ + /* This should remain 12 even if PAGE_SIZE changes */ + return sys_mmap(addr, len, prot, flags, fd, pgoff << 12); +} + +long compat_sys_tgkill(u32 tgid, u32 pid, int sig) +{ + /* sign extend tgid, pid */ + return sys_tgkill((int)tgid, (int)pid, sig); +} + +/* + * long long munging: + * The 32 bit ABI passes long longs in an odd even register pair. + */ + +compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf, compat_size_t count, + u32 reg6, u32 poshi, u32 poslo) +{ + return sys_pread64(fd, ubuf, count, ((loff_t)poshi << 32) | poslo); +} + +compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count, + u32 reg6, u32 poshi, u32 poslo) +{ + return sys_pwrite64(fd, ubuf, count, ((loff_t)poshi << 32) | poslo); +} + +compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offhi, u32 offlo, u32 count) +{ + return sys_readahead(fd, ((loff_t)offhi << 32) | offlo, count); +} + +asmlinkage int compat_sys_truncate64(const char __user * path, u32 reg4, + unsigned long high, unsigned long low) +{ + return sys_truncate(path, (high << 32) | low); +} + +asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo, + u32 lenhi, u32 lenlo) +{ + return sys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo, + ((loff_t)lenhi << 32) | lenlo); +} + +asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long high, + unsigned long low) +{ + return sys_ftruncate(fd, (high << 32) | low); +} + +long ppc32_lookup_dcookie(u32 cookie_high, u32 cookie_low, char __user *buf, + size_t len) +{ + return sys_lookup_dcookie((u64)cookie_high << 32 | cookie_low, + buf, len); +} + +long ppc32_fadvise64(int fd, u32 unused, u32 offset_high, u32 offset_low, + size_t len, int advice) +{ + return sys_fadvise64(fd, (u64)offset_high << 32 | offset_low, len, + advice); +} + +asmlinkage long compat_sys_add_key(const char __user *_type, + const char __user *_description, + const void __user *_payload, + u32 plen, + u32 ringid) +{ + return sys_add_key(_type, _description, _payload, plen, ringid); +} + +asmlinkage long compat_sys_request_key(const char __user *_type, + const char __user *_description, + const char __user *_callout_info, + u32 destringid) +{ + return sys_request_key(_type, _description, _callout_info, destringid); +} + +asmlinkage long compat_sys_sync_file_range2(int fd, unsigned int flags, + unsigned offset_hi, unsigned offset_lo, + unsigned nbytes_hi, unsigned nbytes_lo) +{ + loff_t offset = ((loff_t)offset_hi << 32) | offset_lo; + loff_t nbytes = ((loff_t)nbytes_hi << 32) | nbytes_lo; + + return sys_sync_file_range(fd, offset, nbytes, flags); +} + +asmlinkage long compat_sys_fanotify_mark(int fanotify_fd, unsigned int flags, + unsigned mask_hi, unsigned mask_lo, + int dfd, const char __user *pathname) +{ + u64 mask = ((u64)mask_hi << 32) | mask_lo; + return sys_fanotify_mark(fanotify_fd, flags, mask, dfd, pathname); +} diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c new file mode 100644 index 00000000..f2496f2f --- /dev/null +++ b/arch/powerpc/kernel/syscalls.c @@ -0,0 +1,138 @@ +/* + * Implementation of various system calls for Linux/PowerPC + * + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Derived from "arch/i386/kernel/sys_i386.c" + * Adapted from the i386 version by Gary Thomas + * Modified by Cort Dougan (cort@cs.nmt.edu) + * and Paul Mackerras (paulus@cs.anu.edu.au). + * + * This file contains various random system calls that + * have a non-standard calling sequence on the Linux/PPC + * platform. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/syscalls.h> +#include <linux/mm.h> +#include <linux/fs.h> +#include <linux/smp.h> +#include <linux/sem.h> +#include <linux/msg.h> +#include <linux/shm.h> +#include <linux/stat.h> +#include <linux/mman.h> +#include <linux/sys.h> +#include <linux/ipc.h> +#include <linux/utsname.h> +#include <linux/file.h> +#include <linux/init.h> +#include <linux/personality.h> + +#include <asm/uaccess.h> +#include <asm/syscalls.h> +#include <asm/time.h> +#include <asm/unistd.h> + +static inline unsigned long do_mmap2(unsigned long addr, size_t len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long off, int shift) +{ + unsigned long ret = -EINVAL; + + if (!arch_validate_prot(prot)) + goto out; + + if (shift) { + if (off & ((1 << shift) - 1)) + goto out; + off >>= shift; + } + + ret = sys_mmap_pgoff(addr, len, prot, flags, fd, off); +out: + return ret; +} + +unsigned long sys_mmap2(unsigned long addr, size_t len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff) +{ + return do_mmap2(addr, len, prot, flags, fd, pgoff, PAGE_SHIFT-12); +} + +unsigned long sys_mmap(unsigned long addr, size_t len, + unsigned long prot, unsigned long flags, + unsigned long fd, off_t offset) +{ + return do_mmap2(addr, len, prot, flags, fd, offset, PAGE_SHIFT); +} + +#ifdef CONFIG_PPC32 +/* + * Due to some executables calling the wrong select we sometimes + * get wrong args. This determines how the args are being passed + * (a single ptr to them all args passed) then calls + * sys_select() with the appropriate args. -- Cort + */ +int +ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp) +{ + if ( (unsigned long)n >= 4096 ) + { + unsigned long __user *buffer = (unsigned long __user *)n; + if (!access_ok(VERIFY_READ, buffer, 5*sizeof(unsigned long)) + || __get_user(n, buffer) + || __get_user(inp, ((fd_set __user * __user *)(buffer+1))) + || __get_user(outp, ((fd_set __user * __user *)(buffer+2))) + || __get_user(exp, ((fd_set __user * __user *)(buffer+3))) + || __get_user(tvp, ((struct timeval __user * __user *)(buffer+4)))) + return -EFAULT; + } + return sys_select(n, inp, outp, exp, tvp); +} +#endif + +#ifdef CONFIG_PPC64 +long ppc64_personality(unsigned long personality) +{ + long ret; + + if (personality(current->personality) == PER_LINUX32 + && personality == PER_LINUX) + personality = PER_LINUX32; + ret = sys_personality(personality); + if (ret == PER_LINUX32) + ret = PER_LINUX; + return ret; +} +#endif + +long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, + u32 len_high, u32 len_low) +{ + return sys_fadvise64(fd, (u64)offset_high << 32 | offset_low, + (u64)len_high << 32 | len_low, advice); +} + +void do_show_syscall(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7, unsigned long r8, + struct pt_regs *regs) +{ + printk("syscall %ld(%lx, %lx, %lx, %lx, %lx, %lx) regs=%p current=%p" + " cpu=%d\n", regs->gpr[0], r3, r4, r5, r6, r7, r8, regs, + current, smp_processor_id()); +} + +void do_show_syscall_exit(unsigned long r3) +{ + printk(" -> %lx, current=%p cpu=%d\n", r3, current, smp_processor_id()); +} diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c new file mode 100644 index 00000000..3529446c --- /dev/null +++ b/arch/powerpc/kernel/sysfs.c @@ -0,0 +1,666 @@ +#include <linux/device.h> +#include <linux/cpu.h> +#include <linux/smp.h> +#include <linux/percpu.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/export.h> +#include <linux/nodemask.h> +#include <linux/cpumask.h> +#include <linux/notifier.h> + +#include <asm/current.h> +#include <asm/processor.h> +#include <asm/cputable.h> +#include <asm/hvcall.h> +#include <asm/prom.h> +#include <asm/machdep.h> +#include <asm/smp.h> +#include <asm/pmc.h> + +#include "cacheinfo.h" + +#ifdef CONFIG_PPC64 +#include <asm/paca.h> +#include <asm/lppaca.h> +#endif + +static DEFINE_PER_CPU(struct cpu, cpu_devices); + +/* + * SMT snooze delay stuff, 64-bit only for now + */ + +#ifdef CONFIG_PPC64 + +/* Time in microseconds we delay before sleeping in the idle loop */ +DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 }; + +static ssize_t store_smt_snooze_delay(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct cpu *cpu = container_of(dev, struct cpu, dev); + ssize_t ret; + long snooze; + + ret = sscanf(buf, "%ld", &snooze); + if (ret != 1) + return -EINVAL; + + per_cpu(smt_snooze_delay, cpu->dev.id) = snooze; + update_smt_snooze_delay(snooze); + + return count; +} + +static ssize_t show_smt_snooze_delay(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct cpu *cpu = container_of(dev, struct cpu, dev); + + return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->dev.id)); +} + +static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay, + store_smt_snooze_delay); + +static int __init setup_smt_snooze_delay(char *str) +{ + unsigned int cpu; + long snooze; + + if (!cpu_has_feature(CPU_FTR_SMT)) + return 1; + + snooze = simple_strtol(str, NULL, 10); + for_each_possible_cpu(cpu) + per_cpu(smt_snooze_delay, cpu) = snooze; + + return 1; +} +__setup("smt-snooze-delay=", setup_smt_snooze_delay); + +#endif /* CONFIG_PPC64 */ + +/* + * Enabling PMCs will slow partition context switch times so we only do + * it the first time we write to the PMCs. + */ + +static DEFINE_PER_CPU(char, pmcs_enabled); + +void ppc_enable_pmcs(void) +{ + ppc_set_pmu_inuse(1); + + /* Only need to enable them once */ + if (__get_cpu_var(pmcs_enabled)) + return; + + __get_cpu_var(pmcs_enabled) = 1; + + if (ppc_md.enable_pmcs) + ppc_md.enable_pmcs(); +} +EXPORT_SYMBOL(ppc_enable_pmcs); + +#define SYSFS_PMCSETUP(NAME, ADDRESS) \ +static void read_##NAME(void *val) \ +{ \ + *(unsigned long *)val = mfspr(ADDRESS); \ +} \ +static void write_##NAME(void *val) \ +{ \ + ppc_enable_pmcs(); \ + mtspr(ADDRESS, *(unsigned long *)val); \ +} \ +static ssize_t show_##NAME(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + struct cpu *cpu = container_of(dev, struct cpu, dev); \ + unsigned long val; \ + smp_call_function_single(cpu->dev.id, read_##NAME, &val, 1); \ + return sprintf(buf, "%lx\n", val); \ +} \ +static ssize_t __used \ + store_##NAME(struct device *dev, struct device_attribute *attr, \ + const char *buf, size_t count) \ +{ \ + struct cpu *cpu = container_of(dev, struct cpu, dev); \ + unsigned long val; \ + int ret = sscanf(buf, "%lx", &val); \ + if (ret != 1) \ + return -EINVAL; \ + smp_call_function_single(cpu->dev.id, write_##NAME, &val, 1); \ + return count; \ +} + + +/* Let's define all possible registers, we'll only hook up the ones + * that are implemented on the current processor + */ + +#if defined(CONFIG_PPC64) +#define HAS_PPC_PMC_CLASSIC 1 +#define HAS_PPC_PMC_IBM 1 +#define HAS_PPC_PMC_PA6T 1 +#elif defined(CONFIG_6xx) +#define HAS_PPC_PMC_CLASSIC 1 +#define HAS_PPC_PMC_IBM 1 +#define HAS_PPC_PMC_G4 1 +#endif + + +#ifdef HAS_PPC_PMC_CLASSIC +SYSFS_PMCSETUP(mmcr0, SPRN_MMCR0); +SYSFS_PMCSETUP(mmcr1, SPRN_MMCR1); +SYSFS_PMCSETUP(pmc1, SPRN_PMC1); +SYSFS_PMCSETUP(pmc2, SPRN_PMC2); +SYSFS_PMCSETUP(pmc3, SPRN_PMC3); +SYSFS_PMCSETUP(pmc4, SPRN_PMC4); +SYSFS_PMCSETUP(pmc5, SPRN_PMC5); +SYSFS_PMCSETUP(pmc6, SPRN_PMC6); + +#ifdef HAS_PPC_PMC_G4 +SYSFS_PMCSETUP(mmcr2, SPRN_MMCR2); +#endif + +#ifdef CONFIG_PPC64 +SYSFS_PMCSETUP(pmc7, SPRN_PMC7); +SYSFS_PMCSETUP(pmc8, SPRN_PMC8); + +SYSFS_PMCSETUP(mmcra, SPRN_MMCRA); +SYSFS_PMCSETUP(purr, SPRN_PURR); +SYSFS_PMCSETUP(spurr, SPRN_SPURR); +SYSFS_PMCSETUP(dscr, SPRN_DSCR); +SYSFS_PMCSETUP(pir, SPRN_PIR); + +static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra); +static DEVICE_ATTR(spurr, 0600, show_spurr, NULL); +static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr); +static DEVICE_ATTR(purr, 0600, show_purr, store_purr); +static DEVICE_ATTR(pir, 0400, show_pir, NULL); + +unsigned long dscr_default = 0; +EXPORT_SYMBOL(dscr_default); + +static ssize_t show_dscr_default(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%lx\n", dscr_default); +} + +static ssize_t __used store_dscr_default(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + unsigned long val; + int ret = 0; + + ret = sscanf(buf, "%lx", &val); + if (ret != 1) + return -EINVAL; + dscr_default = val; + + return count; +} + +static DEVICE_ATTR(dscr_default, 0600, + show_dscr_default, store_dscr_default); + +static void sysfs_create_dscr_default(void) +{ + int err = 0; + if (cpu_has_feature(CPU_FTR_DSCR)) + err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default); +} +#endif /* CONFIG_PPC64 */ + +#ifdef HAS_PPC_PMC_PA6T +SYSFS_PMCSETUP(pa6t_pmc0, SPRN_PA6T_PMC0); +SYSFS_PMCSETUP(pa6t_pmc1, SPRN_PA6T_PMC1); +SYSFS_PMCSETUP(pa6t_pmc2, SPRN_PA6T_PMC2); +SYSFS_PMCSETUP(pa6t_pmc3, SPRN_PA6T_PMC3); +SYSFS_PMCSETUP(pa6t_pmc4, SPRN_PA6T_PMC4); +SYSFS_PMCSETUP(pa6t_pmc5, SPRN_PA6T_PMC5); +#ifdef CONFIG_DEBUG_KERNEL +SYSFS_PMCSETUP(hid0, SPRN_HID0); +SYSFS_PMCSETUP(hid1, SPRN_HID1); +SYSFS_PMCSETUP(hid4, SPRN_HID4); +SYSFS_PMCSETUP(hid5, SPRN_HID5); +SYSFS_PMCSETUP(ima0, SPRN_PA6T_IMA0); +SYSFS_PMCSETUP(ima1, SPRN_PA6T_IMA1); +SYSFS_PMCSETUP(ima2, SPRN_PA6T_IMA2); +SYSFS_PMCSETUP(ima3, SPRN_PA6T_IMA3); +SYSFS_PMCSETUP(ima4, SPRN_PA6T_IMA4); +SYSFS_PMCSETUP(ima5, SPRN_PA6T_IMA5); +SYSFS_PMCSETUP(ima6, SPRN_PA6T_IMA6); +SYSFS_PMCSETUP(ima7, SPRN_PA6T_IMA7); +SYSFS_PMCSETUP(ima8, SPRN_PA6T_IMA8); +SYSFS_PMCSETUP(ima9, SPRN_PA6T_IMA9); +SYSFS_PMCSETUP(imaat, SPRN_PA6T_IMAAT); +SYSFS_PMCSETUP(btcr, SPRN_PA6T_BTCR); +SYSFS_PMCSETUP(pccr, SPRN_PA6T_PCCR); +SYSFS_PMCSETUP(rpccr, SPRN_PA6T_RPCCR); +SYSFS_PMCSETUP(der, SPRN_PA6T_DER); +SYSFS_PMCSETUP(mer, SPRN_PA6T_MER); +SYSFS_PMCSETUP(ber, SPRN_PA6T_BER); +SYSFS_PMCSETUP(ier, SPRN_PA6T_IER); +SYSFS_PMCSETUP(sier, SPRN_PA6T_SIER); +SYSFS_PMCSETUP(siar, SPRN_PA6T_SIAR); +SYSFS_PMCSETUP(tsr0, SPRN_PA6T_TSR0); +SYSFS_PMCSETUP(tsr1, SPRN_PA6T_TSR1); +SYSFS_PMCSETUP(tsr2, SPRN_PA6T_TSR2); +SYSFS_PMCSETUP(tsr3, SPRN_PA6T_TSR3); +#endif /* CONFIG_DEBUG_KERNEL */ +#endif /* HAS_PPC_PMC_PA6T */ + +#ifdef HAS_PPC_PMC_IBM +static struct device_attribute ibm_common_attrs[] = { + __ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0), + __ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1), +}; +#endif /* HAS_PPC_PMC_G4 */ + +#ifdef HAS_PPC_PMC_G4 +static struct device_attribute g4_common_attrs[] = { + __ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0), + __ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1), + __ATTR(mmcr2, 0600, show_mmcr2, store_mmcr2), +}; +#endif /* HAS_PPC_PMC_G4 */ + +static struct device_attribute classic_pmc_attrs[] = { + __ATTR(pmc1, 0600, show_pmc1, store_pmc1), + __ATTR(pmc2, 0600, show_pmc2, store_pmc2), + __ATTR(pmc3, 0600, show_pmc3, store_pmc3), + __ATTR(pmc4, 0600, show_pmc4, store_pmc4), + __ATTR(pmc5, 0600, show_pmc5, store_pmc5), + __ATTR(pmc6, 0600, show_pmc6, store_pmc6), +#ifdef CONFIG_PPC64 + __ATTR(pmc7, 0600, show_pmc7, store_pmc7), + __ATTR(pmc8, 0600, show_pmc8, store_pmc8), +#endif +}; + +#ifdef HAS_PPC_PMC_PA6T +static struct device_attribute pa6t_attrs[] = { + __ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0), + __ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1), + __ATTR(pmc0, 0600, show_pa6t_pmc0, store_pa6t_pmc0), + __ATTR(pmc1, 0600, show_pa6t_pmc1, store_pa6t_pmc1), + __ATTR(pmc2, 0600, show_pa6t_pmc2, store_pa6t_pmc2), + __ATTR(pmc3, 0600, show_pa6t_pmc3, store_pa6t_pmc3), + __ATTR(pmc4, 0600, show_pa6t_pmc4, store_pa6t_pmc4), + __ATTR(pmc5, 0600, show_pa6t_pmc5, store_pa6t_pmc5), +#ifdef CONFIG_DEBUG_KERNEL + __ATTR(hid0, 0600, show_hid0, store_hid0), + __ATTR(hid1, 0600, show_hid1, store_hid1), + __ATTR(hid4, 0600, show_hid4, store_hid4), + __ATTR(hid5, 0600, show_hid5, store_hid5), + __ATTR(ima0, 0600, show_ima0, store_ima0), + __ATTR(ima1, 0600, show_ima1, store_ima1), + __ATTR(ima2, 0600, show_ima2, store_ima2), + __ATTR(ima3, 0600, show_ima3, store_ima3), + __ATTR(ima4, 0600, show_ima4, store_ima4), + __ATTR(ima5, 0600, show_ima5, store_ima5), + __ATTR(ima6, 0600, show_ima6, store_ima6), + __ATTR(ima7, 0600, show_ima7, store_ima7), + __ATTR(ima8, 0600, show_ima8, store_ima8), + __ATTR(ima9, 0600, show_ima9, store_ima9), + __ATTR(imaat, 0600, show_imaat, store_imaat), + __ATTR(btcr, 0600, show_btcr, store_btcr), + __ATTR(pccr, 0600, show_pccr, store_pccr), + __ATTR(rpccr, 0600, show_rpccr, store_rpccr), + __ATTR(der, 0600, show_der, store_der), + __ATTR(mer, 0600, show_mer, store_mer), + __ATTR(ber, 0600, show_ber, store_ber), + __ATTR(ier, 0600, show_ier, store_ier), + __ATTR(sier, 0600, show_sier, store_sier), + __ATTR(siar, 0600, show_siar, store_siar), + __ATTR(tsr0, 0600, show_tsr0, store_tsr0), + __ATTR(tsr1, 0600, show_tsr1, store_tsr1), + __ATTR(tsr2, 0600, show_tsr2, store_tsr2), + __ATTR(tsr3, 0600, show_tsr3, store_tsr3), +#endif /* CONFIG_DEBUG_KERNEL */ +}; +#endif /* HAS_PPC_PMC_PA6T */ +#endif /* HAS_PPC_PMC_CLASSIC */ + +static void __cpuinit register_cpu_online(unsigned int cpu) +{ + struct cpu *c = &per_cpu(cpu_devices, cpu); + struct device *s = &c->dev; + struct device_attribute *attrs, *pmc_attrs; + int i, nattrs; + +#ifdef CONFIG_PPC64 + if (cpu_has_feature(CPU_FTR_SMT)) + device_create_file(s, &dev_attr_smt_snooze_delay); +#endif + + /* PMC stuff */ + switch (cur_cpu_spec->pmc_type) { +#ifdef HAS_PPC_PMC_IBM + case PPC_PMC_IBM: + attrs = ibm_common_attrs; + nattrs = sizeof(ibm_common_attrs) / sizeof(struct device_attribute); + pmc_attrs = classic_pmc_attrs; + break; +#endif /* HAS_PPC_PMC_IBM */ +#ifdef HAS_PPC_PMC_G4 + case PPC_PMC_G4: + attrs = g4_common_attrs; + nattrs = sizeof(g4_common_attrs) / sizeof(struct device_attribute); + pmc_attrs = classic_pmc_attrs; + break; +#endif /* HAS_PPC_PMC_G4 */ +#ifdef HAS_PPC_PMC_PA6T + case PPC_PMC_PA6T: + /* PA Semi starts counting at PMC0 */ + attrs = pa6t_attrs; + nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute); + pmc_attrs = NULL; + break; +#endif /* HAS_PPC_PMC_PA6T */ + default: + attrs = NULL; + nattrs = 0; + pmc_attrs = NULL; + } + + for (i = 0; i < nattrs; i++) + device_create_file(s, &attrs[i]); + + if (pmc_attrs) + for (i = 0; i < cur_cpu_spec->num_pmcs; i++) + device_create_file(s, &pmc_attrs[i]); + +#ifdef CONFIG_PPC64 + if (cpu_has_feature(CPU_FTR_MMCRA)) + device_create_file(s, &dev_attr_mmcra); + + if (cpu_has_feature(CPU_FTR_PURR)) + device_create_file(s, &dev_attr_purr); + + if (cpu_has_feature(CPU_FTR_SPURR)) + device_create_file(s, &dev_attr_spurr); + + if (cpu_has_feature(CPU_FTR_DSCR)) + device_create_file(s, &dev_attr_dscr); + + if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2)) + device_create_file(s, &dev_attr_pir); +#endif /* CONFIG_PPC64 */ + + cacheinfo_cpu_online(cpu); +} + +#ifdef CONFIG_HOTPLUG_CPU +static void unregister_cpu_online(unsigned int cpu) +{ + struct cpu *c = &per_cpu(cpu_devices, cpu); + struct device *s = &c->dev; + struct device_attribute *attrs, *pmc_attrs; + int i, nattrs; + + BUG_ON(!c->hotpluggable); + +#ifdef CONFIG_PPC64 + if (cpu_has_feature(CPU_FTR_SMT)) + device_remove_file(s, &dev_attr_smt_snooze_delay); +#endif + + /* PMC stuff */ + switch (cur_cpu_spec->pmc_type) { +#ifdef HAS_PPC_PMC_IBM + case PPC_PMC_IBM: + attrs = ibm_common_attrs; + nattrs = sizeof(ibm_common_attrs) / sizeof(struct device_attribute); + pmc_attrs = classic_pmc_attrs; + break; +#endif /* HAS_PPC_PMC_IBM */ +#ifdef HAS_PPC_PMC_G4 + case PPC_PMC_G4: + attrs = g4_common_attrs; + nattrs = sizeof(g4_common_attrs) / sizeof(struct device_attribute); + pmc_attrs = classic_pmc_attrs; + break; +#endif /* HAS_PPC_PMC_G4 */ +#ifdef HAS_PPC_PMC_PA6T + case PPC_PMC_PA6T: + /* PA Semi starts counting at PMC0 */ + attrs = pa6t_attrs; + nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute); + pmc_attrs = NULL; + break; +#endif /* HAS_PPC_PMC_PA6T */ + default: + attrs = NULL; + nattrs = 0; + pmc_attrs = NULL; + } + + for (i = 0; i < nattrs; i++) + device_remove_file(s, &attrs[i]); + + if (pmc_attrs) + for (i = 0; i < cur_cpu_spec->num_pmcs; i++) + device_remove_file(s, &pmc_attrs[i]); + +#ifdef CONFIG_PPC64 + if (cpu_has_feature(CPU_FTR_MMCRA)) + device_remove_file(s, &dev_attr_mmcra); + + if (cpu_has_feature(CPU_FTR_PURR)) + device_remove_file(s, &dev_attr_purr); + + if (cpu_has_feature(CPU_FTR_SPURR)) + device_remove_file(s, &dev_attr_spurr); + + if (cpu_has_feature(CPU_FTR_DSCR)) + device_remove_file(s, &dev_attr_dscr); + + if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2)) + device_remove_file(s, &dev_attr_pir); +#endif /* CONFIG_PPC64 */ + + cacheinfo_cpu_offline(cpu); +} + +#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE +ssize_t arch_cpu_probe(const char *buf, size_t count) +{ + if (ppc_md.cpu_probe) + return ppc_md.cpu_probe(buf, count); + + return -EINVAL; +} + +ssize_t arch_cpu_release(const char *buf, size_t count) +{ + if (ppc_md.cpu_release) + return ppc_md.cpu_release(buf, count); + + return -EINVAL; +} +#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */ + +#endif /* CONFIG_HOTPLUG_CPU */ + +static int __cpuinit sysfs_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned int)(long)hcpu; + + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + register_cpu_online(cpu); + break; +#ifdef CONFIG_HOTPLUG_CPU + case CPU_DEAD: + case CPU_DEAD_FROZEN: + unregister_cpu_online(cpu); + break; +#endif + } + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata sysfs_cpu_nb = { + .notifier_call = sysfs_cpu_notify, +}; + +static DEFINE_MUTEX(cpu_mutex); + +int cpu_add_dev_attr(struct device_attribute *attr) +{ + int cpu; + + mutex_lock(&cpu_mutex); + + for_each_possible_cpu(cpu) { + device_create_file(get_cpu_device(cpu), attr); + } + + mutex_unlock(&cpu_mutex); + return 0; +} +EXPORT_SYMBOL_GPL(cpu_add_dev_attr); + +int cpu_add_dev_attr_group(struct attribute_group *attrs) +{ + int cpu; + struct device *dev; + int ret; + + mutex_lock(&cpu_mutex); + + for_each_possible_cpu(cpu) { + dev = get_cpu_device(cpu); + ret = sysfs_create_group(&dev->kobj, attrs); + WARN_ON(ret != 0); + } + + mutex_unlock(&cpu_mutex); + return 0; +} +EXPORT_SYMBOL_GPL(cpu_add_dev_attr_group); + + +void cpu_remove_dev_attr(struct device_attribute *attr) +{ + int cpu; + + mutex_lock(&cpu_mutex); + + for_each_possible_cpu(cpu) { + device_remove_file(get_cpu_device(cpu), attr); + } + + mutex_unlock(&cpu_mutex); +} +EXPORT_SYMBOL_GPL(cpu_remove_dev_attr); + +void cpu_remove_dev_attr_group(struct attribute_group *attrs) +{ + int cpu; + struct device *dev; + + mutex_lock(&cpu_mutex); + + for_each_possible_cpu(cpu) { + dev = get_cpu_device(cpu); + sysfs_remove_group(&dev->kobj, attrs); + } + + mutex_unlock(&cpu_mutex); +} +EXPORT_SYMBOL_GPL(cpu_remove_dev_attr_group); + + +/* NUMA stuff */ + +#ifdef CONFIG_NUMA +static void register_nodes(void) +{ + int i; + + for (i = 0; i < MAX_NUMNODES; i++) + register_one_node(i); +} + +int sysfs_add_device_to_node(struct device *dev, int nid) +{ + struct node *node = &node_devices[nid]; + return sysfs_create_link(&node->dev.kobj, &dev->kobj, + kobject_name(&dev->kobj)); +} +EXPORT_SYMBOL_GPL(sysfs_add_device_to_node); + +void sysfs_remove_device_from_node(struct device *dev, int nid) +{ + struct node *node = &node_devices[nid]; + sysfs_remove_link(&node->dev.kobj, kobject_name(&dev->kobj)); +} +EXPORT_SYMBOL_GPL(sysfs_remove_device_from_node); + +#else +static void register_nodes(void) +{ + return; +} + +#endif + +/* Only valid if CPU is present. */ +static ssize_t show_physical_id(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cpu *cpu = container_of(dev, struct cpu, dev); + + return sprintf(buf, "%d\n", get_hard_smp_processor_id(cpu->dev.id)); +} +static DEVICE_ATTR(physical_id, 0444, show_physical_id, NULL); + +static int __init topology_init(void) +{ + int cpu; + + register_nodes(); + register_cpu_notifier(&sysfs_cpu_nb); + + for_each_possible_cpu(cpu) { + struct cpu *c = &per_cpu(cpu_devices, cpu); + + /* + * For now, we just see if the system supports making + * the RTAS calls for CPU hotplug. But, there may be a + * more comprehensive way to do this for an individual + * CPU. For instance, the boot cpu might never be valid + * for hotplugging. + */ + if (ppc_md.cpu_die) + c->hotpluggable = 1; + + if (cpu_online(cpu) || c->hotpluggable) { + register_cpu(c, cpu); + + device_create_file(&c->dev, &dev_attr_physical_id); + } + + if (cpu_online(cpu)) + register_cpu_online(cpu); + } +#ifdef CONFIG_PPC64 + sysfs_create_dscr_default(); +#endif /* CONFIG_PPC64 */ + + return 0; +} +subsys_initcall(topology_init); diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S new file mode 100644 index 00000000..93219c34 --- /dev/null +++ b/arch/powerpc/kernel/systbl.S @@ -0,0 +1,47 @@ +/* + * This file contains the table of syscall-handling functions. + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Largely rewritten by Cort Dougan (cort@cs.nmt.edu) + * and Paul Mackerras. + * + * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com) + * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <asm/ppc_asm.h> + +#ifdef CONFIG_PPC64 +#define SYSCALL(func) .llong .sys_##func,.sys_##func +#define COMPAT_SYS(func) .llong .sys_##func,.compat_sys_##func +#define PPC_SYS(func) .llong .ppc_##func,.ppc_##func +#define OLDSYS(func) .llong .sys_ni_syscall,.sys_ni_syscall +#define SYS32ONLY(func) .llong .sys_ni_syscall,.compat_sys_##func +#define SYSX(f, f3264, f32) .llong .f,.f3264 +#else +#define SYSCALL(func) .long sys_##func +#define COMPAT_SYS(func) .long sys_##func +#define PPC_SYS(func) .long ppc_##func +#define OLDSYS(func) .long sys_##func +#define SYS32ONLY(func) .long sys_##func +#define SYSX(f, f3264, f32) .long f32 +#endif +#define SYSCALL_SPU(func) SYSCALL(func) +#define COMPAT_SYS_SPU(func) COMPAT_SYS(func) +#define PPC_SYS_SPU(func) PPC_SYS(func) +#define SYSX_SPU(f, f3264, f32) SYSX(f, f3264, f32) + +#ifdef CONFIG_PPC64 +#define sys_sigpending sys_ni_syscall +#define sys_old_getrlimit sys_ni_syscall + + .p2align 3 +#endif + +_GLOBAL(sys_call_table) +#include <asm/systbl.h> diff --git a/arch/powerpc/kernel/systbl_chk.c b/arch/powerpc/kernel/systbl_chk.c new file mode 100644 index 00000000..238aa63c --- /dev/null +++ b/arch/powerpc/kernel/systbl_chk.c @@ -0,0 +1,58 @@ +/* + * This file, when run through CPP produces a list of syscall numbers + * in the order of systbl.h. That way we can check for gaps and syscalls + * that are out of order. + * + * Unfortunately, we cannot check for the correct ordering of entries + * using SYSX(). + * + * Copyright © IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <asm/unistd.h> + +#define SYSCALL(func) __NR_##func +#define COMPAT_SYS(func) __NR_##func +#define PPC_SYS(func) __NR_##func +#ifdef CONFIG_PPC64 +#define OLDSYS(func) -1 +#define SYS32ONLY(func) -1 +#else +#define OLDSYS(func) __NR_old##func +#define SYS32ONLY(func) __NR_##func +#endif +#define SYSX(f, f3264, f32) -1 + +#define SYSCALL_SPU(func) SYSCALL(func) +#define COMPAT_SYS_SPU(func) COMPAT_SYS(func) +#define PPC_SYS_SPU(func) PPC_SYS(func) +#define SYSX_SPU(f, f3264, f32) SYSX(f, f3264, f32) + +/* Just insert a marker for ni_syscalls */ +#define __NR_ni_syscall -1 + +/* + * These are the known exceptions. + * Hopefully, there will be no more. + */ +#define __NR_llseek __NR__llseek +#undef __NR_umount +#define __NR_umount __NR_umount2 +#define __NR_old_getrlimit __NR_getrlimit +#define __NR_newstat __NR_stat +#define __NR_newlstat __NR_lstat +#define __NR_newfstat __NR_fstat +#define __NR_newuname __NR_uname +#define __NR_sysctl __NR__sysctl +#define __NR_olddebug_setcontext __NR_sys_debug_setcontext + +/* We call sys_ugetrlimit for syscall number __NR_getrlimit */ +#define getrlimit ugetrlimit + +START_TABLE +#include <asm/systbl.h> +END_TABLE __NR_syscalls diff --git a/arch/powerpc/kernel/systbl_chk.sh b/arch/powerpc/kernel/systbl_chk.sh new file mode 100644 index 00000000..19415e76 --- /dev/null +++ b/arch/powerpc/kernel/systbl_chk.sh @@ -0,0 +1,33 @@ +#!/bin/sh +# +# Just process the CPP output from systbl_chk.c and complain +# if anything is out of order. +# +# Copyright © 2008 IBM Corporation +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version +# 2 of the License, or (at your option) any later version. + +awk 'BEGIN { num = -1; } # Ignore the beginning of the file + /^#/ { next; } + /^[ \t]*$/ { next; } + /^START_TABLE/ { num = 0; next; } + /^END_TABLE/ { + if (num != $2) { + printf "__NR_syscalls (%s) is not one more than the last syscall (%s)\n", + $2, num - 1; + exit(1); + } + num = -1; # Ignore the rest of the file + } + { + if (num == -1) next; + if (($1 != -1) && ($1 != num)) { + printf "Syscall %s out of order (expected %s)\n", + $1, num; + exit(1); + }; + num++; + }' "$1" diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c new file mode 100644 index 00000000..a753b72e --- /dev/null +++ b/arch/powerpc/kernel/tau_6xx.c @@ -0,0 +1,270 @@ +/* + * temp.c Thermal management for cpu's with Thermal Assist Units + * + * Written by Troy Benjegerdes <hozer@drgw.net> + * + * TODO: + * dynamic power management to limit peak CPU temp (using ICTC) + * calibration??? + * + * Silly, crazy ideas: use cpu load (from scheduler) and ICTC to extend battery + * life in portables, and add a 'performance/watt' metric somewhere in /proc + */ + +#include <linux/errno.h> +#include <linux/jiffies.h> +#include <linux/kernel.h> +#include <linux/param.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/init.h> + +#include <asm/io.h> +#include <asm/reg.h> +#include <asm/nvram.h> +#include <asm/cache.h> +#include <asm/8xx_immap.h> +#include <asm/machdep.h> + +static struct tau_temp +{ + int interrupts; + unsigned char low; + unsigned char high; + unsigned char grew; +} tau[NR_CPUS]; + +struct timer_list tau_timer; + +#undef DEBUG + +/* TODO: put these in a /proc interface, with some sanity checks, and maybe + * dynamic adjustment to minimize # of interrupts */ +/* configurable values for step size and how much to expand the window when + * we get an interrupt. These are based on the limit that was out of range */ +#define step_size 2 /* step size when temp goes out of range */ +#define window_expand 1 /* expand the window by this much */ +/* configurable values for shrinking the window */ +#define shrink_timer 2*HZ /* period between shrinking the window */ +#define min_window 2 /* minimum window size, degrees C */ + +void set_thresholds(unsigned long cpu) +{ +#ifdef CONFIG_TAU_INT + /* + * setup THRM1, + * threshold, valid bit, enable interrupts, interrupt when below threshold + */ + mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TIE | THRM1_TID); + + /* setup THRM2, + * threshold, valid bit, enable interrupts, interrupt when above threshold + */ + mtspr (SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | THRM1_TIE); +#else + /* same thing but don't enable interrupts */ + mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TID); + mtspr(SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V); +#endif +} + +void TAUupdate(int cpu) +{ + unsigned thrm; + +#ifdef DEBUG + printk("TAUupdate "); +#endif + + /* if both thresholds are crossed, the step_sizes cancel out + * and the window winds up getting expanded twice. */ + if((thrm = mfspr(SPRN_THRM1)) & THRM1_TIV){ /* is valid? */ + if(thrm & THRM1_TIN){ /* crossed low threshold */ + if (tau[cpu].low >= step_size){ + tau[cpu].low -= step_size; + tau[cpu].high -= (step_size - window_expand); + } + tau[cpu].grew = 1; +#ifdef DEBUG + printk("low threshold crossed "); +#endif + } + } + if((thrm = mfspr(SPRN_THRM2)) & THRM1_TIV){ /* is valid? */ + if(thrm & THRM1_TIN){ /* crossed high threshold */ + if (tau[cpu].high <= 127-step_size){ + tau[cpu].low += (step_size - window_expand); + tau[cpu].high += step_size; + } + tau[cpu].grew = 1; +#ifdef DEBUG + printk("high threshold crossed "); +#endif + } + } + +#ifdef DEBUG + printk("grew = %d\n", tau[cpu].grew); +#endif + +#ifndef CONFIG_TAU_INT /* tau_timeout will do this if not using interrupts */ + set_thresholds(cpu); +#endif + +} + +#ifdef CONFIG_TAU_INT +/* + * TAU interrupts - called when we have a thermal assist unit interrupt + * with interrupts disabled + */ + +void TAUException(struct pt_regs * regs) +{ + int cpu = smp_processor_id(); + + irq_enter(); + tau[cpu].interrupts++; + + TAUupdate(cpu); + + irq_exit(); +} +#endif /* CONFIG_TAU_INT */ + +static void tau_timeout(void * info) +{ + int cpu; + unsigned long flags; + int size; + int shrink; + + /* disabling interrupts *should* be okay */ + local_irq_save(flags); + cpu = smp_processor_id(); + +#ifndef CONFIG_TAU_INT + TAUupdate(cpu); +#endif + + size = tau[cpu].high - tau[cpu].low; + if (size > min_window && ! tau[cpu].grew) { + /* do an exponential shrink of half the amount currently over size */ + shrink = (2 + size - min_window) / 4; + if (shrink) { + tau[cpu].low += shrink; + tau[cpu].high -= shrink; + } else { /* size must have been min_window + 1 */ + tau[cpu].low += 1; +#if 1 /* debug */ + if ((tau[cpu].high - tau[cpu].low) != min_window){ + printk(KERN_ERR "temp.c: line %d, logic error\n", __LINE__); + } +#endif + } + } + + tau[cpu].grew = 0; + + set_thresholds(cpu); + + /* + * Do the enable every time, since otherwise a bunch of (relatively) + * complex sleep code needs to be added. One mtspr every time + * tau_timeout is called is probably not a big deal. + * + * Enable thermal sensor and set up sample interval timer + * need 20 us to do the compare.. until a nice 'cpu_speed' function + * call is implemented, just assume a 500 mhz clock. It doesn't really + * matter if we take too long for a compare since it's all interrupt + * driven anyway. + * + * use a extra long time.. (60 us @ 500 mhz) + */ + mtspr(SPRN_THRM3, THRM3_SITV(500*60) | THRM3_E); + + local_irq_restore(flags); +} + +static void tau_timeout_smp(unsigned long unused) +{ + + /* schedule ourselves to be run again */ + mod_timer(&tau_timer, jiffies + shrink_timer) ; + on_each_cpu(tau_timeout, NULL, 0); +} + +/* + * setup the TAU + * + * Set things up to use THRM1 as a temperature lower bound, and THRM2 as an upper bound. + * Start off at zero + */ + +int tau_initialized = 0; + +void __init TAU_init_smp(void * info) +{ + unsigned long cpu = smp_processor_id(); + + /* set these to a reasonable value and let the timer shrink the + * window */ + tau[cpu].low = 5; + tau[cpu].high = 120; + + set_thresholds(cpu); +} + +int __init TAU_init(void) +{ + /* We assume in SMP that if one CPU has TAU support, they + * all have it --BenH + */ + if (!cpu_has_feature(CPU_FTR_TAU)) { + printk("Thermal assist unit not available\n"); + tau_initialized = 0; + return 1; + } + + + /* first, set up the window shrinking timer */ + init_timer(&tau_timer); + tau_timer.function = tau_timeout_smp; + tau_timer.expires = jiffies + shrink_timer; + add_timer(&tau_timer); + + on_each_cpu(TAU_init_smp, NULL, 0); + + printk("Thermal assist unit "); +#ifdef CONFIG_TAU_INT + printk("using interrupts, "); +#else + printk("using timers, "); +#endif + printk("shrink_timer: %d jiffies\n", shrink_timer); + tau_initialized = 1; + + return 0; +} + +__initcall(TAU_init); + +/* + * return current temp + */ + +u32 cpu_temp_both(unsigned long cpu) +{ + return ((tau[cpu].high << 16) | tau[cpu].low); +} + +int cpu_temp(unsigned long cpu) +{ + return ((tau[cpu].high + tau[cpu].low) / 2); +} + +int tau_interrupts(unsigned long cpu) +{ + return (tau[cpu].interrupts); +} diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c new file mode 100644 index 00000000..730e69cb --- /dev/null +++ b/arch/powerpc/kernel/time.c @@ -0,0 +1,1036 @@ +/* + * Common time routines among all ppc machines. + * + * Written by Cort Dougan (cort@cs.nmt.edu) to merge + * Paul Mackerras' version and mine for PReP and Pmac. + * MPC8xx/MBX changes by Dan Malek (dmalek@jlc.net). + * Converted for 64-bit by Mike Corrigan (mikejc@us.ibm.com) + * + * First round of bugfixes by Gabriel Paubert (paubert@iram.es) + * to make clock more stable (2.4.0-test5). The only thing + * that this code assumes is that the timebases have been synchronized + * by firmware on SMP and are never stopped (never do sleep + * on SMP then, nap and doze are OK). + * + * Speeded up do_gettimeofday by getting rid of references to + * xtime (which required locks for consistency). (mikejc@us.ibm.com) + * + * TODO (not necessarily in this file): + * - improve precision and reproducibility of timebase frequency + * measurement at boot time. + * - for astronomical applications: add a new function to get + * non ambiguous timestamps even around leap seconds. This needs + * a new timestamp format and a good name. + * + * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 + * "A Kernel Model for Precision Timekeeping" by Dave Mills + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/errno.h> +#include <linux/export.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/param.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/timex.h> +#include <linux/kernel_stat.h> +#include <linux/time.h> +#include <linux/init.h> +#include <linux/profile.h> +#include <linux/cpu.h> +#include <linux/security.h> +#include <linux/percpu.h> +#include <linux/rtc.h> +#include <linux/jiffies.h> +#include <linux/posix-timers.h> +#include <linux/irq.h> +#include <linux/delay.h> +#include <linux/irq_work.h> +#include <asm/trace.h> + +#include <asm/io.h> +#include <asm/processor.h> +#include <asm/nvram.h> +#include <asm/cache.h> +#include <asm/machdep.h> +#include <asm/uaccess.h> +#include <asm/time.h> +#include <asm/prom.h> +#include <asm/irq.h> +#include <asm/div64.h> +#include <asm/smp.h> +#include <asm/vdso_datapage.h> +#include <asm/firmware.h> +#include <asm/cputime.h> + +/* powerpc clocksource/clockevent code */ + +#include <linux/clockchips.h> +#include <linux/clocksource.h> + +static cycle_t rtc_read(struct clocksource *); +static struct clocksource clocksource_rtc = { + .name = "rtc", + .rating = 400, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, + .mask = CLOCKSOURCE_MASK(64), + .read = rtc_read, +}; + +static cycle_t timebase_read(struct clocksource *); +static struct clocksource clocksource_timebase = { + .name = "timebase", + .rating = 400, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, + .mask = CLOCKSOURCE_MASK(64), + .read = timebase_read, +}; + +#define DECREMENTER_MAX 0x7fffffff + +static int decrementer_set_next_event(unsigned long evt, + struct clock_event_device *dev); +static void decrementer_set_mode(enum clock_event_mode mode, + struct clock_event_device *dev); + +static struct clock_event_device decrementer_clockevent = { + .name = "decrementer", + .rating = 200, + .irq = 0, + .set_next_event = decrementer_set_next_event, + .set_mode = decrementer_set_mode, + .features = CLOCK_EVT_FEAT_ONESHOT, +}; + +DEFINE_PER_CPU(u64, decrementers_next_tb); +static DEFINE_PER_CPU(struct clock_event_device, decrementers); + +#define XSEC_PER_SEC (1024*1024) + +#ifdef CONFIG_PPC64 +#define SCALE_XSEC(xsec, max) (((xsec) * max) / XSEC_PER_SEC) +#else +/* compute ((xsec << 12) * max) >> 32 */ +#define SCALE_XSEC(xsec, max) mulhwu((xsec) << 12, max) +#endif + +unsigned long tb_ticks_per_jiffy; +unsigned long tb_ticks_per_usec = 100; /* sane default */ +EXPORT_SYMBOL(tb_ticks_per_usec); +unsigned long tb_ticks_per_sec; +EXPORT_SYMBOL(tb_ticks_per_sec); /* for cputime_t conversions */ + +DEFINE_SPINLOCK(rtc_lock); +EXPORT_SYMBOL_GPL(rtc_lock); + +static u64 tb_to_ns_scale __read_mostly; +static unsigned tb_to_ns_shift __read_mostly; +static u64 boot_tb __read_mostly; + +extern struct timezone sys_tz; +static long timezone_offset; + +unsigned long ppc_proc_freq; +EXPORT_SYMBOL_GPL(ppc_proc_freq); +unsigned long ppc_tb_freq; +EXPORT_SYMBOL_GPL(ppc_tb_freq); + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +/* + * Factors for converting from cputime_t (timebase ticks) to + * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds). + * These are all stored as 0.64 fixed-point binary fractions. + */ +u64 __cputime_jiffies_factor; +EXPORT_SYMBOL(__cputime_jiffies_factor); +u64 __cputime_usec_factor; +EXPORT_SYMBOL(__cputime_usec_factor); +u64 __cputime_sec_factor; +EXPORT_SYMBOL(__cputime_sec_factor); +u64 __cputime_clockt_factor; +EXPORT_SYMBOL(__cputime_clockt_factor); +DEFINE_PER_CPU(unsigned long, cputime_last_delta); +DEFINE_PER_CPU(unsigned long, cputime_scaled_last_delta); + +cputime_t cputime_one_jiffy; + +void (*dtl_consumer)(struct dtl_entry *, u64); + +static void calc_cputime_factors(void) +{ + struct div_result res; + + div128_by_32(HZ, 0, tb_ticks_per_sec, &res); + __cputime_jiffies_factor = res.result_low; + div128_by_32(1000000, 0, tb_ticks_per_sec, &res); + __cputime_usec_factor = res.result_low; + div128_by_32(1, 0, tb_ticks_per_sec, &res); + __cputime_sec_factor = res.result_low; + div128_by_32(USER_HZ, 0, tb_ticks_per_sec, &res); + __cputime_clockt_factor = res.result_low; +} + +/* + * Read the SPURR on systems that have it, otherwise the PURR, + * or if that doesn't exist return the timebase value passed in. + */ +static u64 read_spurr(u64 tb) +{ + if (cpu_has_feature(CPU_FTR_SPURR)) + return mfspr(SPRN_SPURR); + if (cpu_has_feature(CPU_FTR_PURR)) + return mfspr(SPRN_PURR); + return tb; +} + +#ifdef CONFIG_PPC_SPLPAR + +/* + * Scan the dispatch trace log and count up the stolen time. + * Should be called with interrupts disabled. + */ +static u64 scan_dispatch_log(u64 stop_tb) +{ + u64 i = local_paca->dtl_ridx; + struct dtl_entry *dtl = local_paca->dtl_curr; + struct dtl_entry *dtl_end = local_paca->dispatch_log_end; + struct lppaca *vpa = local_paca->lppaca_ptr; + u64 tb_delta; + u64 stolen = 0; + u64 dtb; + + if (!dtl) + return 0; + + if (i == vpa->dtl_idx) + return 0; + while (i < vpa->dtl_idx) { + if (dtl_consumer) + dtl_consumer(dtl, i); + dtb = dtl->timebase; + tb_delta = dtl->enqueue_to_dispatch_time + + dtl->ready_to_enqueue_time; + barrier(); + if (i + N_DISPATCH_LOG < vpa->dtl_idx) { + /* buffer has overflowed */ + i = vpa->dtl_idx - N_DISPATCH_LOG; + dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG); + continue; + } + if (dtb > stop_tb) + break; + stolen += tb_delta; + ++i; + ++dtl; + if (dtl == dtl_end) + dtl = local_paca->dispatch_log; + } + local_paca->dtl_ridx = i; + local_paca->dtl_curr = dtl; + return stolen; +} + +/* + * Accumulate stolen time by scanning the dispatch trace log. + * Called on entry from user mode. + */ +void accumulate_stolen_time(void) +{ + u64 sst, ust; + + u8 save_soft_enabled = local_paca->soft_enabled; + + /* We are called early in the exception entry, before + * soft/hard_enabled are sync'ed to the expected state + * for the exception. We are hard disabled but the PACA + * needs to reflect that so various debug stuff doesn't + * complain + */ + local_paca->soft_enabled = 0; + + sst = scan_dispatch_log(local_paca->starttime_user); + ust = scan_dispatch_log(local_paca->starttime); + local_paca->system_time -= sst; + local_paca->user_time -= ust; + local_paca->stolen_time += ust + sst; + + local_paca->soft_enabled = save_soft_enabled; +} + +static inline u64 calculate_stolen_time(u64 stop_tb) +{ + u64 stolen = 0; + + if (get_paca()->dtl_ridx != get_paca()->lppaca_ptr->dtl_idx) { + stolen = scan_dispatch_log(stop_tb); + get_paca()->system_time -= stolen; + } + + stolen += get_paca()->stolen_time; + get_paca()->stolen_time = 0; + return stolen; +} + +#else /* CONFIG_PPC_SPLPAR */ +static inline u64 calculate_stolen_time(u64 stop_tb) +{ + return 0; +} + +#endif /* CONFIG_PPC_SPLPAR */ + +/* + * Account time for a transition between system, hard irq + * or soft irq state. + */ +void account_system_vtime(struct task_struct *tsk) +{ + u64 now, nowscaled, delta, deltascaled; + unsigned long flags; + u64 stolen, udelta, sys_scaled, user_scaled; + + local_irq_save(flags); + now = mftb(); + nowscaled = read_spurr(now); + get_paca()->system_time += now - get_paca()->starttime; + get_paca()->starttime = now; + deltascaled = nowscaled - get_paca()->startspurr; + get_paca()->startspurr = nowscaled; + + stolen = calculate_stolen_time(now); + + delta = get_paca()->system_time; + get_paca()->system_time = 0; + udelta = get_paca()->user_time - get_paca()->utime_sspurr; + get_paca()->utime_sspurr = get_paca()->user_time; + + /* + * Because we don't read the SPURR on every kernel entry/exit, + * deltascaled includes both user and system SPURR ticks. + * Apportion these ticks to system SPURR ticks and user + * SPURR ticks in the same ratio as the system time (delta) + * and user time (udelta) values obtained from the timebase + * over the same interval. The system ticks get accounted here; + * the user ticks get saved up in paca->user_time_scaled to be + * used by account_process_tick. + */ + sys_scaled = delta; + user_scaled = udelta; + if (deltascaled != delta + udelta) { + if (udelta) { + sys_scaled = deltascaled * delta / (delta + udelta); + user_scaled = deltascaled - sys_scaled; + } else { + sys_scaled = deltascaled; + } + } + get_paca()->user_time_scaled += user_scaled; + + if (in_interrupt() || idle_task(smp_processor_id()) != tsk) { + account_system_time(tsk, 0, delta, sys_scaled); + if (stolen) + account_steal_time(stolen); + } else { + account_idle_time(delta + stolen); + } + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(account_system_vtime); + +/* + * Transfer the user and system times accumulated in the paca + * by the exception entry and exit code to the generic process + * user and system time records. + * Must be called with interrupts disabled. + * Assumes that account_system_vtime() has been called recently + * (i.e. since the last entry from usermode) so that + * get_paca()->user_time_scaled is up to date. + */ +void account_process_tick(struct task_struct *tsk, int user_tick) +{ + cputime_t utime, utimescaled; + + utime = get_paca()->user_time; + utimescaled = get_paca()->user_time_scaled; + get_paca()->user_time = 0; + get_paca()->user_time_scaled = 0; + get_paca()->utime_sspurr = 0; + account_user_time(tsk, utime, utimescaled); +} + +#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */ +#define calc_cputime_factors() +#endif + +void __delay(unsigned long loops) +{ + unsigned long start; + int diff; + + if (__USE_RTC()) { + start = get_rtcl(); + do { + /* the RTCL register wraps at 1000000000 */ + diff = get_rtcl() - start; + if (diff < 0) + diff += 1000000000; + } while (diff < loops); + } else { + start = get_tbl(); + while (get_tbl() - start < loops) + HMT_low(); + HMT_medium(); + } +} +EXPORT_SYMBOL(__delay); + +void udelay(unsigned long usecs) +{ + __delay(tb_ticks_per_usec * usecs); +} +EXPORT_SYMBOL(udelay); + +#ifdef CONFIG_SMP +unsigned long profile_pc(struct pt_regs *regs) +{ + unsigned long pc = instruction_pointer(regs); + + if (in_lock_functions(pc)) + return regs->link; + + return pc; +} +EXPORT_SYMBOL(profile_pc); +#endif + +#ifdef CONFIG_IRQ_WORK + +/* + * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable... + */ +#ifdef CONFIG_PPC64 +static inline unsigned long test_irq_work_pending(void) +{ + unsigned long x; + + asm volatile("lbz %0,%1(13)" + : "=r" (x) + : "i" (offsetof(struct paca_struct, irq_work_pending))); + return x; +} + +static inline void set_irq_work_pending_flag(void) +{ + asm volatile("stb %0,%1(13)" : : + "r" (1), + "i" (offsetof(struct paca_struct, irq_work_pending))); +} + +static inline void clear_irq_work_pending(void) +{ + asm volatile("stb %0,%1(13)" : : + "r" (0), + "i" (offsetof(struct paca_struct, irq_work_pending))); +} + +#else /* 32-bit */ + +DEFINE_PER_CPU(u8, irq_work_pending); + +#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1 +#define test_irq_work_pending() __get_cpu_var(irq_work_pending) +#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0 + +#endif /* 32 vs 64 bit */ + +void arch_irq_work_raise(void) +{ + preempt_disable(); + set_irq_work_pending_flag(); + set_dec(1); + preempt_enable(); +} + +#else /* CONFIG_IRQ_WORK */ + +#define test_irq_work_pending() 0 +#define clear_irq_work_pending() + +#endif /* CONFIG_IRQ_WORK */ + +/* + * timer_interrupt - gets called when the decrementer overflows, + * with interrupts disabled. + */ +void timer_interrupt(struct pt_regs * regs) +{ + struct pt_regs *old_regs; + u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + struct clock_event_device *evt = &__get_cpu_var(decrementers); + u64 now; + + /* Ensure a positive value is written to the decrementer, or else + * some CPUs will continue to take decrementer exceptions. + */ + set_dec(DECREMENTER_MAX); + + /* Some implementations of hotplug will get timer interrupts while + * offline, just ignore these + */ + if (!cpu_online(smp_processor_id())) + return; + + /* Conditionally hard-enable interrupts now that the DEC has been + * bumped to its maximum value + */ + may_hard_irq_enable(); + + trace_timer_interrupt_entry(regs); + + __get_cpu_var(irq_stat).timer_irqs++; + +#if defined(CONFIG_PPC32) && defined(CONFIG_PMAC) + if (atomic_read(&ppc_n_lost_interrupts) != 0) + do_IRQ(regs); +#endif + + old_regs = set_irq_regs(regs); + irq_enter(); + + if (test_irq_work_pending()) { + clear_irq_work_pending(); + irq_work_run(); + } + + now = get_tb_or_rtc(); + if (now >= *next_tb) { + *next_tb = ~(u64)0; + if (evt->event_handler) + evt->event_handler(evt); + } else { + now = *next_tb - now; + if (now <= DECREMENTER_MAX) + set_dec((int)now); + } + +#ifdef CONFIG_PPC64 + /* collect purr register values often, for accurate calculations */ + if (firmware_has_feature(FW_FEATURE_SPLPAR)) { + struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); + cu->current_tb = mfspr(SPRN_PURR); + } +#endif + + irq_exit(); + set_irq_regs(old_regs); + + trace_timer_interrupt_exit(regs); +} + +#ifdef CONFIG_SUSPEND +static void generic_suspend_disable_irqs(void) +{ + /* Disable the decrementer, so that it doesn't interfere + * with suspending. + */ + + set_dec(DECREMENTER_MAX); + local_irq_disable(); + set_dec(DECREMENTER_MAX); +} + +static void generic_suspend_enable_irqs(void) +{ + local_irq_enable(); +} + +/* Overrides the weak version in kernel/power/main.c */ +void arch_suspend_disable_irqs(void) +{ + if (ppc_md.suspend_disable_irqs) + ppc_md.suspend_disable_irqs(); + generic_suspend_disable_irqs(); +} + +/* Overrides the weak version in kernel/power/main.c */ +void arch_suspend_enable_irqs(void) +{ + generic_suspend_enable_irqs(); + if (ppc_md.suspend_enable_irqs) + ppc_md.suspend_enable_irqs(); +} +#endif + +/* + * Scheduler clock - returns current time in nanosec units. + * + * Note: mulhdu(a, b) (multiply high double unsigned) returns + * the high 64 bits of a * b, i.e. (a * b) >> 64, where a and b + * are 64-bit unsigned numbers. + */ +unsigned long long sched_clock(void) +{ + if (__USE_RTC()) + return get_rtc(); + return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift; +} + +static int __init get_freq(char *name, int cells, unsigned long *val) +{ + struct device_node *cpu; + const unsigned int *fp; + int found = 0; + + /* The cpu node should have timebase and clock frequency properties */ + cpu = of_find_node_by_type(NULL, "cpu"); + + if (cpu) { + fp = of_get_property(cpu, name, NULL); + if (fp) { + found = 1; + *val = of_read_ulong(fp, cells); + } + + of_node_put(cpu); + } + + return found; +} + +/* should become __cpuinit when secondary_cpu_time_init also is */ +void start_cpu_decrementer(void) +{ +#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) + /* Clear any pending timer interrupts */ + mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS); + + /* Enable decrementer interrupt */ + mtspr(SPRN_TCR, TCR_DIE); +#endif /* defined(CONFIG_BOOKE) || defined(CONFIG_40x) */ +} + +void __init generic_calibrate_decr(void) +{ + ppc_tb_freq = DEFAULT_TB_FREQ; /* hardcoded default */ + + if (!get_freq("ibm,extended-timebase-frequency", 2, &ppc_tb_freq) && + !get_freq("timebase-frequency", 1, &ppc_tb_freq)) { + + printk(KERN_ERR "WARNING: Estimating decrementer frequency " + "(not found)\n"); + } + + ppc_proc_freq = DEFAULT_PROC_FREQ; /* hardcoded default */ + + if (!get_freq("ibm,extended-clock-frequency", 2, &ppc_proc_freq) && + !get_freq("clock-frequency", 1, &ppc_proc_freq)) { + + printk(KERN_ERR "WARNING: Estimating processor frequency " + "(not found)\n"); + } +} + +int update_persistent_clock(struct timespec now) +{ + struct rtc_time tm; + + if (!ppc_md.set_rtc_time) + return 0; + + to_tm(now.tv_sec + 1 + timezone_offset, &tm); + tm.tm_year -= 1900; + tm.tm_mon -= 1; + + return ppc_md.set_rtc_time(&tm); +} + +static void __read_persistent_clock(struct timespec *ts) +{ + struct rtc_time tm; + static int first = 1; + + ts->tv_nsec = 0; + /* XXX this is a litle fragile but will work okay in the short term */ + if (first) { + first = 0; + if (ppc_md.time_init) + timezone_offset = ppc_md.time_init(); + + /* get_boot_time() isn't guaranteed to be safe to call late */ + if (ppc_md.get_boot_time) { + ts->tv_sec = ppc_md.get_boot_time() - timezone_offset; + return; + } + } + if (!ppc_md.get_rtc_time) { + ts->tv_sec = 0; + return; + } + ppc_md.get_rtc_time(&tm); + + ts->tv_sec = mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec); +} + +void read_persistent_clock(struct timespec *ts) +{ + __read_persistent_clock(ts); + + /* Sanitize it in case real time clock is set below EPOCH */ + if (ts->tv_sec < 0) { + ts->tv_sec = 0; + ts->tv_nsec = 0; + } + +} + +/* clocksource code */ +static cycle_t rtc_read(struct clocksource *cs) +{ + return (cycle_t)get_rtc(); +} + +static cycle_t timebase_read(struct clocksource *cs) +{ + return (cycle_t)get_tb(); +} + +void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, + struct clocksource *clock, u32 mult) +{ + u64 new_tb_to_xs, new_stamp_xsec; + u32 frac_sec; + + if (clock != &clocksource_timebase) + return; + + /* Make userspace gettimeofday spin until we're done. */ + ++vdso_data->tb_update_count; + smp_mb(); + + /* 19342813113834067 ~= 2^(20+64) / 1e9 */ + new_tb_to_xs = (u64) mult * (19342813113834067ULL >> clock->shift); + new_stamp_xsec = (u64) wall_time->tv_nsec * XSEC_PER_SEC; + do_div(new_stamp_xsec, 1000000000); + new_stamp_xsec += (u64) wall_time->tv_sec * XSEC_PER_SEC; + + BUG_ON(wall_time->tv_nsec >= NSEC_PER_SEC); + /* this is tv_nsec / 1e9 as a 0.32 fraction */ + frac_sec = ((u64) wall_time->tv_nsec * 18446744073ULL) >> 32; + + /* + * tb_update_count is used to allow the userspace gettimeofday code + * to assure itself that it sees a consistent view of the tb_to_xs and + * stamp_xsec variables. It reads the tb_update_count, then reads + * tb_to_xs and stamp_xsec and then reads tb_update_count again. If + * the two values of tb_update_count match and are even then the + * tb_to_xs and stamp_xsec values are consistent. If not, then it + * loops back and reads them again until this criteria is met. + * We expect the caller to have done the first increment of + * vdso_data->tb_update_count already. + */ + vdso_data->tb_orig_stamp = clock->cycle_last; + vdso_data->stamp_xsec = new_stamp_xsec; + vdso_data->tb_to_xs = new_tb_to_xs; + vdso_data->wtom_clock_sec = wtm->tv_sec; + vdso_data->wtom_clock_nsec = wtm->tv_nsec; + vdso_data->stamp_xtime = *wall_time; + vdso_data->stamp_sec_fraction = frac_sec; + smp_wmb(); + ++(vdso_data->tb_update_count); +} + +void update_vsyscall_tz(void) +{ + /* Make userspace gettimeofday spin until we're done. */ + ++vdso_data->tb_update_count; + smp_mb(); + vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; + vdso_data->tz_dsttime = sys_tz.tz_dsttime; + smp_mb(); + ++vdso_data->tb_update_count; +} + +static void __init clocksource_init(void) +{ + struct clocksource *clock; + + if (__USE_RTC()) + clock = &clocksource_rtc; + else + clock = &clocksource_timebase; + + if (clocksource_register_hz(clock, tb_ticks_per_sec)) { + printk(KERN_ERR "clocksource: %s is already registered\n", + clock->name); + return; + } + + printk(KERN_INFO "clocksource: %s mult[%x] shift[%d] registered\n", + clock->name, clock->mult, clock->shift); +} + +static int decrementer_set_next_event(unsigned long evt, + struct clock_event_device *dev) +{ + __get_cpu_var(decrementers_next_tb) = get_tb_or_rtc() + evt; + set_dec(evt); + return 0; +} + +static void decrementer_set_mode(enum clock_event_mode mode, + struct clock_event_device *dev) +{ + if (mode != CLOCK_EVT_MODE_ONESHOT) + decrementer_set_next_event(DECREMENTER_MAX, dev); +} + +static void register_decrementer_clockevent(int cpu) +{ + struct clock_event_device *dec = &per_cpu(decrementers, cpu); + + *dec = decrementer_clockevent; + dec->cpumask = cpumask_of(cpu); + + printk_once(KERN_DEBUG "clockevent: %s mult[%x] shift[%d] cpu[%d]\n", + dec->name, dec->mult, dec->shift, cpu); + + clockevents_register_device(dec); +} + +static void __init init_decrementer_clockevent(void) +{ + int cpu = smp_processor_id(); + + clockevents_calc_mult_shift(&decrementer_clockevent, ppc_tb_freq, 4); + + decrementer_clockevent.max_delta_ns = + clockevent_delta2ns(DECREMENTER_MAX, &decrementer_clockevent); + decrementer_clockevent.min_delta_ns = + clockevent_delta2ns(2, &decrementer_clockevent); + + register_decrementer_clockevent(cpu); +} + +void secondary_cpu_time_init(void) +{ + /* Start the decrementer on CPUs that have manual control + * such as BookE + */ + start_cpu_decrementer(); + + /* FIME: Should make unrelatred change to move snapshot_timebase + * call here ! */ + register_decrementer_clockevent(smp_processor_id()); +} + +/* This function is only called on the boot processor */ +void __init time_init(void) +{ + struct div_result res; + u64 scale; + unsigned shift; + + if (__USE_RTC()) { + /* 601 processor: dec counts down by 128 every 128ns */ + ppc_tb_freq = 1000000000; + } else { + /* Normal PowerPC with timebase register */ + ppc_md.calibrate_decr(); + printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n", + ppc_tb_freq / 1000000, ppc_tb_freq % 1000000); + printk(KERN_DEBUG "time_init: processor frequency = %lu.%.6lu MHz\n", + ppc_proc_freq / 1000000, ppc_proc_freq % 1000000); + } + + tb_ticks_per_jiffy = ppc_tb_freq / HZ; + tb_ticks_per_sec = ppc_tb_freq; + tb_ticks_per_usec = ppc_tb_freq / 1000000; + calc_cputime_factors(); + setup_cputime_one_jiffy(); + + /* + * Compute scale factor for sched_clock. + * The calibrate_decr() function has set tb_ticks_per_sec, + * which is the timebase frequency. + * We compute 1e9 * 2^64 / tb_ticks_per_sec and interpret + * the 128-bit result as a 64.64 fixed-point number. + * We then shift that number right until it is less than 1.0, + * giving us the scale factor and shift count to use in + * sched_clock(). + */ + div128_by_32(1000000000, 0, tb_ticks_per_sec, &res); + scale = res.result_low; + for (shift = 0; res.result_high != 0; ++shift) { + scale = (scale >> 1) | (res.result_high << 63); + res.result_high >>= 1; + } + tb_to_ns_scale = scale; + tb_to_ns_shift = shift; + /* Save the current timebase to pretty up CONFIG_PRINTK_TIME */ + boot_tb = get_tb_or_rtc(); + + /* If platform provided a timezone (pmac), we correct the time */ + if (timezone_offset) { + sys_tz.tz_minuteswest = -timezone_offset / 60; + sys_tz.tz_dsttime = 0; + } + + vdso_data->tb_update_count = 0; + vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; + + /* Start the decrementer on CPUs that have manual control + * such as BookE + */ + start_cpu_decrementer(); + + /* Register the clocksource */ + clocksource_init(); + + init_decrementer_clockevent(); +} + + +#define FEBRUARY 2 +#define STARTOFTIME 1970 +#define SECDAY 86400L +#define SECYR (SECDAY * 365) +#define leapyear(year) ((year) % 4 == 0 && \ + ((year) % 100 != 0 || (year) % 400 == 0)) +#define days_in_year(a) (leapyear(a) ? 366 : 365) +#define days_in_month(a) (month_days[(a) - 1]) + +static int month_days[12] = { + 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 +}; + +/* + * This only works for the Gregorian calendar - i.e. after 1752 (in the UK) + */ +void GregorianDay(struct rtc_time * tm) +{ + int leapsToDate; + int lastYear; + int day; + int MonthOffset[] = { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 }; + + lastYear = tm->tm_year - 1; + + /* + * Number of leap corrections to apply up to end of last year + */ + leapsToDate = lastYear / 4 - lastYear / 100 + lastYear / 400; + + /* + * This year is a leap year if it is divisible by 4 except when it is + * divisible by 100 unless it is divisible by 400 + * + * e.g. 1904 was a leap year, 1900 was not, 1996 is, and 2000 was + */ + day = tm->tm_mon > 2 && leapyear(tm->tm_year); + + day += lastYear*365 + leapsToDate + MonthOffset[tm->tm_mon-1] + + tm->tm_mday; + + tm->tm_wday = day % 7; +} + +void to_tm(int tim, struct rtc_time * tm) +{ + register int i; + register long hms, day; + + day = tim / SECDAY; + hms = tim % SECDAY; + + /* Hours, minutes, seconds are easy */ + tm->tm_hour = hms / 3600; + tm->tm_min = (hms % 3600) / 60; + tm->tm_sec = (hms % 3600) % 60; + + /* Number of years in days */ + for (i = STARTOFTIME; day >= days_in_year(i); i++) + day -= days_in_year(i); + tm->tm_year = i; + + /* Number of months in days left */ + if (leapyear(tm->tm_year)) + days_in_month(FEBRUARY) = 29; + for (i = 1; day >= days_in_month(i); i++) + day -= days_in_month(i); + days_in_month(FEBRUARY) = 28; + tm->tm_mon = i; + + /* Days are what is left over (+1) from all that. */ + tm->tm_mday = day + 1; + + /* + * Determine the day of week + */ + GregorianDay(tm); +} + +/* + * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit + * result. + */ +void div128_by_32(u64 dividend_high, u64 dividend_low, + unsigned divisor, struct div_result *dr) +{ + unsigned long a, b, c, d; + unsigned long w, x, y, z; + u64 ra, rb, rc; + + a = dividend_high >> 32; + b = dividend_high & 0xffffffff; + c = dividend_low >> 32; + d = dividend_low & 0xffffffff; + + w = a / divisor; + ra = ((u64)(a - (w * divisor)) << 32) + b; + + rb = ((u64) do_div(ra, divisor) << 32) + c; + x = ra; + + rc = ((u64) do_div(rb, divisor) << 32) + d; + y = rb; + + do_div(rc, divisor); + z = rc; + + dr->result_high = ((u64)w << 32) + x; + dr->result_low = ((u64)y << 32) + z; + +} + +/* We don't need to calibrate delay, we use the CPU timebase for that */ +void calibrate_delay(void) +{ + /* Some generic code (such as spinlock debug) use loops_per_jiffy + * as the number of __delay(1) in a jiffy, so make it so + */ + loops_per_jiffy = tb_ticks_per_jiffy; +} + +static int __init rtc_init(void) +{ + struct platform_device *pdev; + + if (!ppc_md.get_rtc_time) + return -ENODEV; + + pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); + + return 0; +} + +module_init(rtc_init); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c new file mode 100644 index 00000000..15897234 --- /dev/null +++ b/arch/powerpc/kernel/traps.c @@ -0,0 +1,1627 @@ +/* + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * Copyright 2007-2010 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Modified by Cort Dougan (cort@cs.nmt.edu) + * and Paul Mackerras (paulus@samba.org) + */ + +/* + * This file handles the architecture-dependent parts of hardware exceptions + */ + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/ptrace.h> +#include <linux/user.h> +#include <linux/interrupt.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/prctl.h> +#include <linux/delay.h> +#include <linux/kprobes.h> +#include <linux/kexec.h> +#include <linux/backlight.h> +#include <linux/bug.h> +#include <linux/kdebug.h> +#include <linux/debugfs.h> +#include <linux/ratelimit.h> + +#include <asm/emulated_ops.h> +#include <asm/pgtable.h> +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/machdep.h> +#include <asm/rtas.h> +#include <asm/pmc.h> +#ifdef CONFIG_PPC32 +#include <asm/reg.h> +#endif +#ifdef CONFIG_PMAC_BACKLIGHT +#include <asm/backlight.h> +#endif +#ifdef CONFIG_PPC64 +#include <asm/firmware.h> +#include <asm/processor.h> +#endif +#include <asm/kexec.h> +#include <asm/ppc-opcode.h> +#include <asm/rio.h> +#include <asm/fadump.h> +#include <asm/switch_to.h> +#include <asm/debug.h> + +#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) +int (*__debugger)(struct pt_regs *regs) __read_mostly; +int (*__debugger_ipi)(struct pt_regs *regs) __read_mostly; +int (*__debugger_bpt)(struct pt_regs *regs) __read_mostly; +int (*__debugger_sstep)(struct pt_regs *regs) __read_mostly; +int (*__debugger_iabr_match)(struct pt_regs *regs) __read_mostly; +int (*__debugger_dabr_match)(struct pt_regs *regs) __read_mostly; +int (*__debugger_fault_handler)(struct pt_regs *regs) __read_mostly; + +EXPORT_SYMBOL(__debugger); +EXPORT_SYMBOL(__debugger_ipi); +EXPORT_SYMBOL(__debugger_bpt); +EXPORT_SYMBOL(__debugger_sstep); +EXPORT_SYMBOL(__debugger_iabr_match); +EXPORT_SYMBOL(__debugger_dabr_match); +EXPORT_SYMBOL(__debugger_fault_handler); +#endif + +/* + * Trap & Exception support + */ + +#ifdef CONFIG_PMAC_BACKLIGHT +static void pmac_backlight_unblank(void) +{ + mutex_lock(&pmac_backlight_mutex); + if (pmac_backlight) { + struct backlight_properties *props; + + props = &pmac_backlight->props; + props->brightness = props->max_brightness; + props->power = FB_BLANK_UNBLANK; + backlight_update_status(pmac_backlight); + } + mutex_unlock(&pmac_backlight_mutex); +} +#else +static inline void pmac_backlight_unblank(void) { } +#endif + +static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; +static int die_owner = -1; +static unsigned int die_nest_count; +static int die_counter; + +static unsigned __kprobes long oops_begin(struct pt_regs *regs) +{ + int cpu; + unsigned long flags; + + if (debugger(regs)) + return 1; + + oops_enter(); + + /* racy, but better than risking deadlock. */ + raw_local_irq_save(flags); + cpu = smp_processor_id(); + if (!arch_spin_trylock(&die_lock)) { + if (cpu == die_owner) + /* nested oops. should stop eventually */; + else + arch_spin_lock(&die_lock); + } + die_nest_count++; + die_owner = cpu; + console_verbose(); + bust_spinlocks(1); + if (machine_is(powermac)) + pmac_backlight_unblank(); + return flags; +} + +static void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, + int signr) +{ + bust_spinlocks(0); + die_owner = -1; + add_taint(TAINT_DIE); + die_nest_count--; + oops_exit(); + printk("\n"); + if (!die_nest_count) + /* Nest count reaches zero, release the lock. */ + arch_spin_unlock(&die_lock); + raw_local_irq_restore(flags); + + crash_fadump(regs, "die oops"); + + /* + * A system reset (0x100) is a request to dump, so we always send + * it through the crashdump code. + */ + if (kexec_should_crash(current) || (TRAP(regs) == 0x100)) { + crash_kexec(regs); + + /* + * We aren't the primary crash CPU. We need to send it + * to a holding pattern to avoid it ending up in the panic + * code. + */ + crash_kexec_secondary(regs); + } + + if (!signr) + return; + + /* + * While our oops output is serialised by a spinlock, output + * from panic() called below can race and corrupt it. If we + * know we are going to panic, delay for 1 second so we have a + * chance to get clean backtraces from all CPUs that are oopsing. + */ + if (in_interrupt() || panic_on_oops || !current->pid || + is_global_init(current)) { + mdelay(MSEC_PER_SEC); + } + + if (in_interrupt()) + panic("Fatal exception in interrupt"); + if (panic_on_oops) + panic("Fatal exception"); + do_exit(signr); +} + +static int __kprobes __die(const char *str, struct pt_regs *regs, long err) +{ + printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); +#ifdef CONFIG_PREEMPT + printk("PREEMPT "); +#endif +#ifdef CONFIG_SMP + printk("SMP NR_CPUS=%d ", NR_CPUS); +#endif +#ifdef CONFIG_DEBUG_PAGEALLOC + printk("DEBUG_PAGEALLOC "); +#endif +#ifdef CONFIG_NUMA + printk("NUMA "); +#endif + printk("%s\n", ppc_md.name ? ppc_md.name : ""); + + if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV) == NOTIFY_STOP) + return 1; + + print_modules(); + show_regs(regs); + + return 0; +} + +void die(const char *str, struct pt_regs *regs, long err) +{ + unsigned long flags = oops_begin(regs); + + if (__die(str, regs, err)) + err = 0; + oops_end(flags, regs, err); +} + +void user_single_step_siginfo(struct task_struct *tsk, + struct pt_regs *regs, siginfo_t *info) +{ + memset(info, 0, sizeof(*info)); + info->si_signo = SIGTRAP; + info->si_code = TRAP_TRACE; + info->si_addr = (void __user *)regs->nip; +} + +void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) +{ + siginfo_t info; + const char fmt32[] = KERN_INFO "%s[%d]: unhandled signal %d " \ + "at %08lx nip %08lx lr %08lx code %x\n"; + const char fmt64[] = KERN_INFO "%s[%d]: unhandled signal %d " \ + "at %016lx nip %016lx lr %016lx code %x\n"; + + if (!user_mode(regs)) { + die("Exception in kernel mode", regs, signr); + return; + } + + if (show_unhandled_signals && unhandled_signal(current, signr)) { + printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32, + current->comm, current->pid, signr, + addr, regs->nip, regs->link, code); + } + + if (arch_irqs_disabled() && !arch_irq_disabled_regs(regs)) + local_irq_enable(); + + memset(&info, 0, sizeof(info)); + info.si_signo = signr; + info.si_code = code; + info.si_addr = (void __user *) addr; + force_sig_info(signr, &info, current); +} + +#ifdef CONFIG_PPC64 +void system_reset_exception(struct pt_regs *regs) +{ + /* See if any machine dependent calls */ + if (ppc_md.system_reset_exception) { + if (ppc_md.system_reset_exception(regs)) + return; + } + + die("System Reset", regs, SIGABRT); + + /* Must die if the interrupt is not recoverable */ + if (!(regs->msr & MSR_RI)) + panic("Unrecoverable System Reset"); + + /* What should we do here? We could issue a shutdown or hard reset. */ +} +#endif + +/* + * I/O accesses can cause machine checks on powermacs. + * Check if the NIP corresponds to the address of a sync + * instruction for which there is an entry in the exception + * table. + * Note that the 601 only takes a machine check on TEA + * (transfer error ack) signal assertion, and does not + * set any of the top 16 bits of SRR1. + * -- paulus. + */ +static inline int check_io_access(struct pt_regs *regs) +{ +#ifdef CONFIG_PPC32 + unsigned long msr = regs->msr; + const struct exception_table_entry *entry; + unsigned int *nip = (unsigned int *)regs->nip; + + if (((msr & 0xffff0000) == 0 || (msr & (0x80000 | 0x40000))) + && (entry = search_exception_tables(regs->nip)) != NULL) { + /* + * Check that it's a sync instruction, or somewhere + * in the twi; isync; nop sequence that inb/inw/inl uses. + * As the address is in the exception table + * we should be able to read the instr there. + * For the debug message, we look at the preceding + * load or store. + */ + if (*nip == 0x60000000) /* nop */ + nip -= 2; + else if (*nip == 0x4c00012c) /* isync */ + --nip; + if (*nip == 0x7c0004ac || (*nip >> 26) == 3) { + /* sync or twi */ + unsigned int rb; + + --nip; + rb = (*nip >> 11) & 0x1f; + printk(KERN_DEBUG "%s bad port %lx at %p\n", + (*nip & 0x100)? "OUT to": "IN from", + regs->gpr[rb] - _IO_BASE, nip); + regs->msr |= MSR_RI; + regs->nip = entry->fixup; + return 1; + } + } +#endif /* CONFIG_PPC32 */ + return 0; +} + +#ifdef CONFIG_PPC_ADV_DEBUG_REGS +/* On 4xx, the reason for the machine check or program exception + is in the ESR. */ +#define get_reason(regs) ((regs)->dsisr) +#ifndef CONFIG_FSL_BOOKE +#define get_mc_reason(regs) ((regs)->dsisr) +#else +#define get_mc_reason(regs) (mfspr(SPRN_MCSR)) +#endif +#define REASON_FP ESR_FP +#define REASON_ILLEGAL (ESR_PIL | ESR_PUO) +#define REASON_PRIVILEGED ESR_PPR +#define REASON_TRAP ESR_PTR + +/* single-step stuff */ +#define single_stepping(regs) (current->thread.dbcr0 & DBCR0_IC) +#define clear_single_step(regs) (current->thread.dbcr0 &= ~DBCR0_IC) + +#else +/* On non-4xx, the reason for the machine check or program + exception is in the MSR. */ +#define get_reason(regs) ((regs)->msr) +#define get_mc_reason(regs) ((regs)->msr) +#define REASON_FP 0x100000 +#define REASON_ILLEGAL 0x80000 +#define REASON_PRIVILEGED 0x40000 +#define REASON_TRAP 0x20000 + +#define single_stepping(regs) ((regs)->msr & MSR_SE) +#define clear_single_step(regs) ((regs)->msr &= ~MSR_SE) +#endif + +#if defined(CONFIG_4xx) +int machine_check_4xx(struct pt_regs *regs) +{ + unsigned long reason = get_mc_reason(regs); + + if (reason & ESR_IMCP) { + printk("Instruction"); + mtspr(SPRN_ESR, reason & ~ESR_IMCP); + } else + printk("Data"); + printk(" machine check in kernel mode.\n"); + + return 0; +} + +int machine_check_440A(struct pt_regs *regs) +{ + unsigned long reason = get_mc_reason(regs); + + printk("Machine check in kernel mode.\n"); + if (reason & ESR_IMCP){ + printk("Instruction Synchronous Machine Check exception\n"); + mtspr(SPRN_ESR, reason & ~ESR_IMCP); + } + else { + u32 mcsr = mfspr(SPRN_MCSR); + if (mcsr & MCSR_IB) + printk("Instruction Read PLB Error\n"); + if (mcsr & MCSR_DRB) + printk("Data Read PLB Error\n"); + if (mcsr & MCSR_DWB) + printk("Data Write PLB Error\n"); + if (mcsr & MCSR_TLBP) + printk("TLB Parity Error\n"); + if (mcsr & MCSR_ICP){ + flush_instruction_cache(); + printk("I-Cache Parity Error\n"); + } + if (mcsr & MCSR_DCSP) + printk("D-Cache Search Parity Error\n"); + if (mcsr & MCSR_DCFP) + printk("D-Cache Flush Parity Error\n"); + if (mcsr & MCSR_IMPE) + printk("Machine Check exception is imprecise\n"); + + /* Clear MCSR */ + mtspr(SPRN_MCSR, mcsr); + } + return 0; +} + +int machine_check_47x(struct pt_regs *regs) +{ + unsigned long reason = get_mc_reason(regs); + u32 mcsr; + + printk(KERN_ERR "Machine check in kernel mode.\n"); + if (reason & ESR_IMCP) { + printk(KERN_ERR + "Instruction Synchronous Machine Check exception\n"); + mtspr(SPRN_ESR, reason & ~ESR_IMCP); + return 0; + } + mcsr = mfspr(SPRN_MCSR); + if (mcsr & MCSR_IB) + printk(KERN_ERR "Instruction Read PLB Error\n"); + if (mcsr & MCSR_DRB) + printk(KERN_ERR "Data Read PLB Error\n"); + if (mcsr & MCSR_DWB) + printk(KERN_ERR "Data Write PLB Error\n"); + if (mcsr & MCSR_TLBP) + printk(KERN_ERR "TLB Parity Error\n"); + if (mcsr & MCSR_ICP) { + flush_instruction_cache(); + printk(KERN_ERR "I-Cache Parity Error\n"); + } + if (mcsr & MCSR_DCSP) + printk(KERN_ERR "D-Cache Search Parity Error\n"); + if (mcsr & PPC47x_MCSR_GPR) + printk(KERN_ERR "GPR Parity Error\n"); + if (mcsr & PPC47x_MCSR_FPR) + printk(KERN_ERR "FPR Parity Error\n"); + if (mcsr & PPC47x_MCSR_IPR) + printk(KERN_ERR "Machine Check exception is imprecise\n"); + + /* Clear MCSR */ + mtspr(SPRN_MCSR, mcsr); + + return 0; +} +#elif defined(CONFIG_E500) +int machine_check_e500mc(struct pt_regs *regs) +{ + unsigned long mcsr = mfspr(SPRN_MCSR); + unsigned long reason = mcsr; + int recoverable = 1; + + if (reason & MCSR_LD) { + recoverable = fsl_rio_mcheck_exception(regs); + if (recoverable == 1) + goto silent_out; + } + + printk("Machine check in kernel mode.\n"); + printk("Caused by (from MCSR=%lx): ", reason); + + if (reason & MCSR_MCP) + printk("Machine Check Signal\n"); + + if (reason & MCSR_ICPERR) { + printk("Instruction Cache Parity Error\n"); + + /* + * This is recoverable by invalidating the i-cache. + */ + mtspr(SPRN_L1CSR1, mfspr(SPRN_L1CSR1) | L1CSR1_ICFI); + while (mfspr(SPRN_L1CSR1) & L1CSR1_ICFI) + ; + + /* + * This will generally be accompanied by an instruction + * fetch error report -- only treat MCSR_IF as fatal + * if it wasn't due to an L1 parity error. + */ + reason &= ~MCSR_IF; + } + + if (reason & MCSR_DCPERR_MC) { + printk("Data Cache Parity Error\n"); + + /* + * In write shadow mode we auto-recover from the error, but it + * may still get logged and cause a machine check. We should + * only treat the non-write shadow case as non-recoverable. + */ + if (!(mfspr(SPRN_L1CSR2) & L1CSR2_DCWS)) + recoverable = 0; + } + + if (reason & MCSR_L2MMU_MHIT) { + printk("Hit on multiple TLB entries\n"); + recoverable = 0; + } + + if (reason & MCSR_NMI) + printk("Non-maskable interrupt\n"); + + if (reason & MCSR_IF) { + printk("Instruction Fetch Error Report\n"); + recoverable = 0; + } + + if (reason & MCSR_LD) { + printk("Load Error Report\n"); + recoverable = 0; + } + + if (reason & MCSR_ST) { + printk("Store Error Report\n"); + recoverable = 0; + } + + if (reason & MCSR_LDG) { + printk("Guarded Load Error Report\n"); + recoverable = 0; + } + + if (reason & MCSR_TLBSYNC) + printk("Simultaneous tlbsync operations\n"); + + if (reason & MCSR_BSL2_ERR) { + printk("Level 2 Cache Error\n"); + recoverable = 0; + } + + if (reason & MCSR_MAV) { + u64 addr; + + addr = mfspr(SPRN_MCAR); + addr |= (u64)mfspr(SPRN_MCARU) << 32; + + printk("Machine Check %s Address: %#llx\n", + reason & MCSR_MEA ? "Effective" : "Physical", addr); + } + +silent_out: + mtspr(SPRN_MCSR, mcsr); + return mfspr(SPRN_MCSR) == 0 && recoverable; +} + +int machine_check_e500(struct pt_regs *regs) +{ + unsigned long reason = get_mc_reason(regs); + + if (reason & MCSR_BUS_RBERR) { + if (fsl_rio_mcheck_exception(regs)) + return 1; + } + + printk("Machine check in kernel mode.\n"); + printk("Caused by (from MCSR=%lx): ", reason); + + if (reason & MCSR_MCP) + printk("Machine Check Signal\n"); + if (reason & MCSR_ICPERR) + printk("Instruction Cache Parity Error\n"); + if (reason & MCSR_DCP_PERR) + printk("Data Cache Push Parity Error\n"); + if (reason & MCSR_DCPERR) + printk("Data Cache Parity Error\n"); + if (reason & MCSR_BUS_IAERR) + printk("Bus - Instruction Address Error\n"); + if (reason & MCSR_BUS_RAERR) + printk("Bus - Read Address Error\n"); + if (reason & MCSR_BUS_WAERR) + printk("Bus - Write Address Error\n"); + if (reason & MCSR_BUS_IBERR) + printk("Bus - Instruction Data Error\n"); + if (reason & MCSR_BUS_RBERR) + printk("Bus - Read Data Bus Error\n"); + if (reason & MCSR_BUS_WBERR) + printk("Bus - Read Data Bus Error\n"); + if (reason & MCSR_BUS_IPERR) + printk("Bus - Instruction Parity Error\n"); + if (reason & MCSR_BUS_RPERR) + printk("Bus - Read Parity Error\n"); + + return 0; +} + +int machine_check_generic(struct pt_regs *regs) +{ + return 0; +} +#elif defined(CONFIG_E200) +int machine_check_e200(struct pt_regs *regs) +{ + unsigned long reason = get_mc_reason(regs); + + printk("Machine check in kernel mode.\n"); + printk("Caused by (from MCSR=%lx): ", reason); + + if (reason & MCSR_MCP) + printk("Machine Check Signal\n"); + if (reason & MCSR_CP_PERR) + printk("Cache Push Parity Error\n"); + if (reason & MCSR_CPERR) + printk("Cache Parity Error\n"); + if (reason & MCSR_EXCP_ERR) + printk("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n"); + if (reason & MCSR_BUS_IRERR) + printk("Bus - Read Bus Error on instruction fetch\n"); + if (reason & MCSR_BUS_DRERR) + printk("Bus - Read Bus Error on data load\n"); + if (reason & MCSR_BUS_WRERR) + printk("Bus - Write Bus Error on buffered store or cache line push\n"); + + return 0; +} +#else +int machine_check_generic(struct pt_regs *regs) +{ + unsigned long reason = get_mc_reason(regs); + + printk("Machine check in kernel mode.\n"); + printk("Caused by (from SRR1=%lx): ", reason); + switch (reason & 0x601F0000) { + case 0x80000: + printk("Machine check signal\n"); + break; + case 0: /* for 601 */ + case 0x40000: + case 0x140000: /* 7450 MSS error and TEA */ + printk("Transfer error ack signal\n"); + break; + case 0x20000: + printk("Data parity error signal\n"); + break; + case 0x10000: + printk("Address parity error signal\n"); + break; + case 0x20000000: + printk("L1 Data Cache error\n"); + break; + case 0x40000000: + printk("L1 Instruction Cache error\n"); + break; + case 0x00100000: + printk("L2 data cache parity error\n"); + break; + default: + printk("Unknown values in msr\n"); + } + return 0; +} +#endif /* everything else */ + +void machine_check_exception(struct pt_regs *regs) +{ + int recover = 0; + + __get_cpu_var(irq_stat).mce_exceptions++; + + /* See if any machine dependent calls. In theory, we would want + * to call the CPU first, and call the ppc_md. one if the CPU + * one returns a positive number. However there is existing code + * that assumes the board gets a first chance, so let's keep it + * that way for now and fix things later. --BenH. + */ + if (ppc_md.machine_check_exception) + recover = ppc_md.machine_check_exception(regs); + else if (cur_cpu_spec->machine_check) + recover = cur_cpu_spec->machine_check(regs); + + if (recover > 0) + return; + +#if defined(CONFIG_8xx) && defined(CONFIG_PCI) + /* the qspan pci read routines can cause machine checks -- Cort + * + * yuck !!! that totally needs to go away ! There are better ways + * to deal with that than having a wart in the mcheck handler. + * -- BenH + */ + bad_page_fault(regs, regs->dar, SIGBUS); + return; +#endif + + if (debugger_fault_handler(regs)) + return; + + if (check_io_access(regs)) + return; + + die("Machine check", regs, SIGBUS); + + /* Must die if the interrupt is not recoverable */ + if (!(regs->msr & MSR_RI)) + panic("Unrecoverable Machine check"); +} + +void SMIException(struct pt_regs *regs) +{ + die("System Management Interrupt", regs, SIGABRT); +} + +void unknown_exception(struct pt_regs *regs) +{ + printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", + regs->nip, regs->msr, regs->trap); + + _exception(SIGTRAP, regs, 0, 0); +} + +void instruction_breakpoint_exception(struct pt_regs *regs) +{ + if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5, + 5, SIGTRAP) == NOTIFY_STOP) + return; + if (debugger_iabr_match(regs)) + return; + _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); +} + +void RunModeException(struct pt_regs *regs) +{ + _exception(SIGTRAP, regs, 0, 0); +} + +void __kprobes single_step_exception(struct pt_regs *regs) +{ + clear_single_step(regs); + + if (notify_die(DIE_SSTEP, "single_step", regs, 5, + 5, SIGTRAP) == NOTIFY_STOP) + return; + if (debugger_sstep(regs)) + return; + + _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); +} + +/* + * After we have successfully emulated an instruction, we have to + * check if the instruction was being single-stepped, and if so, + * pretend we got a single-step exception. This was pointed out + * by Kumar Gala. -- paulus + */ +static void emulate_single_step(struct pt_regs *regs) +{ + if (single_stepping(regs)) + single_step_exception(regs); +} + +static inline int __parse_fpscr(unsigned long fpscr) +{ + int ret = 0; + + /* Invalid operation */ + if ((fpscr & FPSCR_VE) && (fpscr & FPSCR_VX)) + ret = FPE_FLTINV; + + /* Overflow */ + else if ((fpscr & FPSCR_OE) && (fpscr & FPSCR_OX)) + ret = FPE_FLTOVF; + + /* Underflow */ + else if ((fpscr & FPSCR_UE) && (fpscr & FPSCR_UX)) + ret = FPE_FLTUND; + + /* Divide by zero */ + else if ((fpscr & FPSCR_ZE) && (fpscr & FPSCR_ZX)) + ret = FPE_FLTDIV; + + /* Inexact result */ + else if ((fpscr & FPSCR_XE) && (fpscr & FPSCR_XX)) + ret = FPE_FLTRES; + + return ret; +} + +static void parse_fpe(struct pt_regs *regs) +{ + int code = 0; + + flush_fp_to_thread(current); + + code = __parse_fpscr(current->thread.fpscr.val); + + _exception(SIGFPE, regs, code, regs->nip); +} + +/* + * Illegal instruction emulation support. Originally written to + * provide the PVR to user applications using the mfspr rd, PVR. + * Return non-zero if we can't emulate, or -EFAULT if the associated + * memory access caused an access fault. Return zero on success. + * + * There are a couple of ways to do this, either "decode" the instruction + * or directly match lots of bits. In this case, matching lots of + * bits is faster and easier. + * + */ +static int emulate_string_inst(struct pt_regs *regs, u32 instword) +{ + u8 rT = (instword >> 21) & 0x1f; + u8 rA = (instword >> 16) & 0x1f; + u8 NB_RB = (instword >> 11) & 0x1f; + u32 num_bytes; + unsigned long EA; + int pos = 0; + + /* Early out if we are an invalid form of lswx */ + if ((instword & PPC_INST_STRING_MASK) == PPC_INST_LSWX) + if ((rT == rA) || (rT == NB_RB)) + return -EINVAL; + + EA = (rA == 0) ? 0 : regs->gpr[rA]; + + switch (instword & PPC_INST_STRING_MASK) { + case PPC_INST_LSWX: + case PPC_INST_STSWX: + EA += NB_RB; + num_bytes = regs->xer & 0x7f; + break; + case PPC_INST_LSWI: + case PPC_INST_STSWI: + num_bytes = (NB_RB == 0) ? 32 : NB_RB; + break; + default: + return -EINVAL; + } + + while (num_bytes != 0) + { + u8 val; + u32 shift = 8 * (3 - (pos & 0x3)); + + switch ((instword & PPC_INST_STRING_MASK)) { + case PPC_INST_LSWX: + case PPC_INST_LSWI: + if (get_user(val, (u8 __user *)EA)) + return -EFAULT; + /* first time updating this reg, + * zero it out */ + if (pos == 0) + regs->gpr[rT] = 0; + regs->gpr[rT] |= val << shift; + break; + case PPC_INST_STSWI: + case PPC_INST_STSWX: + val = regs->gpr[rT] >> shift; + if (put_user(val, (u8 __user *)EA)) + return -EFAULT; + break; + } + /* move EA to next address */ + EA += 1; + num_bytes--; + + /* manage our position within the register */ + if (++pos == 4) { + pos = 0; + if (++rT == 32) + rT = 0; + } + } + + return 0; +} + +static int emulate_popcntb_inst(struct pt_regs *regs, u32 instword) +{ + u32 ra,rs; + unsigned long tmp; + + ra = (instword >> 16) & 0x1f; + rs = (instword >> 21) & 0x1f; + + tmp = regs->gpr[rs]; + tmp = tmp - ((tmp >> 1) & 0x5555555555555555ULL); + tmp = (tmp & 0x3333333333333333ULL) + ((tmp >> 2) & 0x3333333333333333ULL); + tmp = (tmp + (tmp >> 4)) & 0x0f0f0f0f0f0f0f0fULL; + regs->gpr[ra] = tmp; + + return 0; +} + +static int emulate_isel(struct pt_regs *regs, u32 instword) +{ + u8 rT = (instword >> 21) & 0x1f; + u8 rA = (instword >> 16) & 0x1f; + u8 rB = (instword >> 11) & 0x1f; + u8 BC = (instword >> 6) & 0x1f; + u8 bit; + unsigned long tmp; + + tmp = (rA == 0) ? 0 : regs->gpr[rA]; + bit = (regs->ccr >> (31 - BC)) & 0x1; + + regs->gpr[rT] = bit ? tmp : regs->gpr[rB]; + + return 0; +} + +static int emulate_instruction(struct pt_regs *regs) +{ + u32 instword; + u32 rd; + + if (!user_mode(regs) || (regs->msr & MSR_LE)) + return -EINVAL; + CHECK_FULL_REGS(regs); + + if (get_user(instword, (u32 __user *)(regs->nip))) + return -EFAULT; + + /* Emulate the mfspr rD, PVR. */ + if ((instword & PPC_INST_MFSPR_PVR_MASK) == PPC_INST_MFSPR_PVR) { + PPC_WARN_EMULATED(mfpvr, regs); + rd = (instword >> 21) & 0x1f; + regs->gpr[rd] = mfspr(SPRN_PVR); + return 0; + } + + /* Emulating the dcba insn is just a no-op. */ + if ((instword & PPC_INST_DCBA_MASK) == PPC_INST_DCBA) { + PPC_WARN_EMULATED(dcba, regs); + return 0; + } + + /* Emulate the mcrxr insn. */ + if ((instword & PPC_INST_MCRXR_MASK) == PPC_INST_MCRXR) { + int shift = (instword >> 21) & 0x1c; + unsigned long msk = 0xf0000000UL >> shift; + + PPC_WARN_EMULATED(mcrxr, regs); + regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk); + regs->xer &= ~0xf0000000UL; + return 0; + } + + /* Emulate load/store string insn. */ + if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) { + PPC_WARN_EMULATED(string, regs); + return emulate_string_inst(regs, instword); + } + + /* Emulate the popcntb (Population Count Bytes) instruction. */ + if ((instword & PPC_INST_POPCNTB_MASK) == PPC_INST_POPCNTB) { + PPC_WARN_EMULATED(popcntb, regs); + return emulate_popcntb_inst(regs, instword); + } + + /* Emulate isel (Integer Select) instruction */ + if ((instword & PPC_INST_ISEL_MASK) == PPC_INST_ISEL) { + PPC_WARN_EMULATED(isel, regs); + return emulate_isel(regs, instword); + } + +#ifdef CONFIG_PPC64 + /* Emulate the mfspr rD, DSCR. */ + if (((instword & PPC_INST_MFSPR_DSCR_MASK) == PPC_INST_MFSPR_DSCR) && + cpu_has_feature(CPU_FTR_DSCR)) { + PPC_WARN_EMULATED(mfdscr, regs); + rd = (instword >> 21) & 0x1f; + regs->gpr[rd] = mfspr(SPRN_DSCR); + return 0; + } + /* Emulate the mtspr DSCR, rD. */ + if (((instword & PPC_INST_MTSPR_DSCR_MASK) == PPC_INST_MTSPR_DSCR) && + cpu_has_feature(CPU_FTR_DSCR)) { + PPC_WARN_EMULATED(mtdscr, regs); + rd = (instword >> 21) & 0x1f; + mtspr(SPRN_DSCR, regs->gpr[rd]); + current->thread.dscr_inherit = 1; + return 0; + } +#endif + + return -EINVAL; +} + +int is_valid_bugaddr(unsigned long addr) +{ + return is_kernel_addr(addr); +} + +void __kprobes program_check_exception(struct pt_regs *regs) +{ + unsigned int reason = get_reason(regs); + extern int do_mathemu(struct pt_regs *regs); + + /* We can now get here via a FP Unavailable exception if the core + * has no FPU, in that case the reason flags will be 0 */ + + if (reason & REASON_FP) { + /* IEEE FP exception */ + parse_fpe(regs); + return; + } + if (reason & REASON_TRAP) { + /* Debugger is first in line to stop recursive faults in + * rcu_lock, notify_die, or atomic_notifier_call_chain */ + if (debugger_bpt(regs)) + return; + + /* trap exception */ + if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP) + == NOTIFY_STOP) + return; + + if (!(regs->msr & MSR_PR) && /* not user-mode */ + report_bug(regs->nip, regs) == BUG_TRAP_TYPE_WARN) { + regs->nip += 4; + return; + } + _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); + return; + } + + /* We restore the interrupt state now */ + if (!arch_irq_disabled_regs(regs)) + local_irq_enable(); + +#ifdef CONFIG_MATH_EMULATION + /* (reason & REASON_ILLEGAL) would be the obvious thing here, + * but there seems to be a hardware bug on the 405GP (RevD) + * that means ESR is sometimes set incorrectly - either to + * ESR_DST (!?) or 0. In the process of chasing this with the + * hardware people - not sure if it can happen on any illegal + * instruction or only on FP instructions, whether there is a + * pattern to occurrences etc. -dgibson 31/Mar/2003 */ + switch (do_mathemu(regs)) { + case 0: + emulate_single_step(regs); + return; + case 1: { + int code = 0; + code = __parse_fpscr(current->thread.fpscr.val); + _exception(SIGFPE, regs, code, regs->nip); + return; + } + case -EFAULT: + _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); + return; + } + /* fall through on any other errors */ +#endif /* CONFIG_MATH_EMULATION */ + + /* Try to emulate it if we should. */ + if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) { + switch (emulate_instruction(regs)) { + case 0: + regs->nip += 4; + emulate_single_step(regs); + return; + case -EFAULT: + _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); + return; + } + } + + if (reason & REASON_PRIVILEGED) + _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); + else + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); +} + +void alignment_exception(struct pt_regs *regs) +{ + int sig, code, fixed = 0; + + /* We restore the interrupt state now */ + if (!arch_irq_disabled_regs(regs)) + local_irq_enable(); + + /* we don't implement logging of alignment exceptions */ + if (!(current->thread.align_ctl & PR_UNALIGN_SIGBUS)) + fixed = fix_alignment(regs); + + if (fixed == 1) { + regs->nip += 4; /* skip over emulated instruction */ + emulate_single_step(regs); + return; + } + + /* Operand address was bad */ + if (fixed == -EFAULT) { + sig = SIGSEGV; + code = SEGV_ACCERR; + } else { + sig = SIGBUS; + code = BUS_ADRALN; + } + if (user_mode(regs)) + _exception(sig, regs, code, regs->dar); + else + bad_page_fault(regs, regs->dar, sig); +} + +void StackOverflow(struct pt_regs *regs) +{ + printk(KERN_CRIT "Kernel stack overflow in process %p, r1=%lx\n", + current, regs->gpr[1]); + debugger(regs); + show_regs(regs); + panic("kernel stack overflow"); +} + +void nonrecoverable_exception(struct pt_regs *regs) +{ + printk(KERN_ERR "Non-recoverable exception at PC=%lx MSR=%lx\n", + regs->nip, regs->msr); + debugger(regs); + die("nonrecoverable exception", regs, SIGKILL); +} + +void trace_syscall(struct pt_regs *regs) +{ + printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld %s\n", + current, task_pid_nr(current), regs->nip, regs->link, regs->gpr[0], + regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted()); +} + +void kernel_fp_unavailable_exception(struct pt_regs *regs) +{ + printk(KERN_EMERG "Unrecoverable FP Unavailable Exception " + "%lx at %lx\n", regs->trap, regs->nip); + die("Unrecoverable FP Unavailable Exception", regs, SIGABRT); +} + +void altivec_unavailable_exception(struct pt_regs *regs) +{ + if (user_mode(regs)) { + /* A user program has executed an altivec instruction, + but this kernel doesn't support altivec. */ + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return; + } + + printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception " + "%lx at %lx\n", regs->trap, regs->nip); + die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT); +} + +void vsx_unavailable_exception(struct pt_regs *regs) +{ + if (user_mode(regs)) { + /* A user program has executed an vsx instruction, + but this kernel doesn't support vsx. */ + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return; + } + + printk(KERN_EMERG "Unrecoverable VSX Unavailable Exception " + "%lx at %lx\n", regs->trap, regs->nip); + die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT); +} + +void performance_monitor_exception(struct pt_regs *regs) +{ + __get_cpu_var(irq_stat).pmu_irqs++; + + perf_irq(regs); +} + +#ifdef CONFIG_8xx +void SoftwareEmulation(struct pt_regs *regs) +{ + extern int do_mathemu(struct pt_regs *); + extern int Soft_emulate_8xx(struct pt_regs *); +#if defined(CONFIG_MATH_EMULATION) || defined(CONFIG_8XX_MINIMAL_FPEMU) + int errcode; +#endif + + CHECK_FULL_REGS(regs); + + if (!user_mode(regs)) { + debugger(regs); + die("Kernel Mode Software FPU Emulation", regs, SIGFPE); + } + +#ifdef CONFIG_MATH_EMULATION + errcode = do_mathemu(regs); + if (errcode >= 0) + PPC_WARN_EMULATED(math, regs); + + switch (errcode) { + case 0: + emulate_single_step(regs); + return; + case 1: { + int code = 0; + code = __parse_fpscr(current->thread.fpscr.val); + _exception(SIGFPE, regs, code, regs->nip); + return; + } + case -EFAULT: + _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); + return; + default: + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return; + } + +#elif defined(CONFIG_8XX_MINIMAL_FPEMU) + errcode = Soft_emulate_8xx(regs); + if (errcode >= 0) + PPC_WARN_EMULATED(8xx, regs); + + switch (errcode) { + case 0: + emulate_single_step(regs); + return; + case 1: + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return; + case -EFAULT: + _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); + return; + } +#else + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); +#endif +} +#endif /* CONFIG_8xx */ + +#ifdef CONFIG_PPC_ADV_DEBUG_REGS +static void handle_debug(struct pt_regs *regs, unsigned long debug_status) +{ + int changed = 0; + /* + * Determine the cause of the debug event, clear the + * event flags and send a trap to the handler. Torez + */ + if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) { + dbcr_dac(current) &= ~(DBCR_DAC1R | DBCR_DAC1W); +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + current->thread.dbcr2 &= ~DBCR2_DAC12MODE; +#endif + do_send_trap(regs, mfspr(SPRN_DAC1), debug_status, TRAP_HWBKPT, + 5); + changed |= 0x01; + } else if (debug_status & (DBSR_DAC2R | DBSR_DAC2W)) { + dbcr_dac(current) &= ~(DBCR_DAC2R | DBCR_DAC2W); + do_send_trap(regs, mfspr(SPRN_DAC2), debug_status, TRAP_HWBKPT, + 6); + changed |= 0x01; + } else if (debug_status & DBSR_IAC1) { + current->thread.dbcr0 &= ~DBCR0_IAC1; + dbcr_iac_range(current) &= ~DBCR_IAC12MODE; + do_send_trap(regs, mfspr(SPRN_IAC1), debug_status, TRAP_HWBKPT, + 1); + changed |= 0x01; + } else if (debug_status & DBSR_IAC2) { + current->thread.dbcr0 &= ~DBCR0_IAC2; + do_send_trap(regs, mfspr(SPRN_IAC2), debug_status, TRAP_HWBKPT, + 2); + changed |= 0x01; + } else if (debug_status & DBSR_IAC3) { + current->thread.dbcr0 &= ~DBCR0_IAC3; + dbcr_iac_range(current) &= ~DBCR_IAC34MODE; + do_send_trap(regs, mfspr(SPRN_IAC3), debug_status, TRAP_HWBKPT, + 3); + changed |= 0x01; + } else if (debug_status & DBSR_IAC4) { + current->thread.dbcr0 &= ~DBCR0_IAC4; + do_send_trap(regs, mfspr(SPRN_IAC4), debug_status, TRAP_HWBKPT, + 4); + changed |= 0x01; + } + /* + * At the point this routine was called, the MSR(DE) was turned off. + * Check all other debug flags and see if that bit needs to be turned + * back on or not. + */ + if (DBCR_ACTIVE_EVENTS(current->thread.dbcr0, current->thread.dbcr1)) + regs->msr |= MSR_DE; + else + /* Make sure the IDM flag is off */ + current->thread.dbcr0 &= ~DBCR0_IDM; + + if (changed & 0x01) + mtspr(SPRN_DBCR0, current->thread.dbcr0); +} + +void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status) +{ + current->thread.dbsr = debug_status; + + /* Hack alert: On BookE, Branch Taken stops on the branch itself, while + * on server, it stops on the target of the branch. In order to simulate + * the server behaviour, we thus restart right away with a single step + * instead of stopping here when hitting a BT + */ + if (debug_status & DBSR_BT) { + regs->msr &= ~MSR_DE; + + /* Disable BT */ + mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_BT); + /* Clear the BT event */ + mtspr(SPRN_DBSR, DBSR_BT); + + /* Do the single step trick only when coming from userspace */ + if (user_mode(regs)) { + current->thread.dbcr0 &= ~DBCR0_BT; + current->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC; + regs->msr |= MSR_DE; + return; + } + + if (notify_die(DIE_SSTEP, "block_step", regs, 5, + 5, SIGTRAP) == NOTIFY_STOP) { + return; + } + if (debugger_sstep(regs)) + return; + } else if (debug_status & DBSR_IC) { /* Instruction complete */ + regs->msr &= ~MSR_DE; + + /* Disable instruction completion */ + mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_IC); + /* Clear the instruction completion event */ + mtspr(SPRN_DBSR, DBSR_IC); + + if (notify_die(DIE_SSTEP, "single_step", regs, 5, + 5, SIGTRAP) == NOTIFY_STOP) { + return; + } + + if (debugger_sstep(regs)) + return; + + if (user_mode(regs)) { + current->thread.dbcr0 &= ~DBCR0_IC; + if (DBCR_ACTIVE_EVENTS(current->thread.dbcr0, + current->thread.dbcr1)) + regs->msr |= MSR_DE; + else + /* Make sure the IDM bit is off */ + current->thread.dbcr0 &= ~DBCR0_IDM; + } + + _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); + } else + handle_debug(regs, debug_status); +} +#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ + +#if !defined(CONFIG_TAU_INT) +void TAUException(struct pt_regs *regs) +{ + printk("TAU trap at PC: %lx, MSR: %lx, vector=%lx %s\n", + regs->nip, regs->msr, regs->trap, print_tainted()); +} +#endif /* CONFIG_INT_TAU */ + +#ifdef CONFIG_ALTIVEC +void altivec_assist_exception(struct pt_regs *regs) +{ + int err; + + if (!user_mode(regs)) { + printk(KERN_EMERG "VMX/Altivec assist exception in kernel mode" + " at %lx\n", regs->nip); + die("Kernel VMX/Altivec assist exception", regs, SIGILL); + } + + flush_altivec_to_thread(current); + + PPC_WARN_EMULATED(altivec, regs); + err = emulate_altivec(regs); + if (err == 0) { + regs->nip += 4; /* skip emulated instruction */ + emulate_single_step(regs); + return; + } + + if (err == -EFAULT) { + /* got an error reading the instruction */ + _exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip); + } else { + /* didn't recognize the instruction */ + /* XXX quick hack for now: set the non-Java bit in the VSCR */ + printk_ratelimited(KERN_ERR "Unrecognized altivec instruction " + "in %s at %lx\n", current->comm, regs->nip); + current->thread.vscr.u[3] |= 0x10000; + } +} +#endif /* CONFIG_ALTIVEC */ + +#ifdef CONFIG_VSX +void vsx_assist_exception(struct pt_regs *regs) +{ + if (!user_mode(regs)) { + printk(KERN_EMERG "VSX assist exception in kernel mode" + " at %lx\n", regs->nip); + die("Kernel VSX assist exception", regs, SIGILL); + } + + flush_vsx_to_thread(current); + printk(KERN_INFO "VSX assist not supported at %lx\n", regs->nip); + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); +} +#endif /* CONFIG_VSX */ + +#ifdef CONFIG_FSL_BOOKE +void CacheLockingException(struct pt_regs *regs, unsigned long address, + unsigned long error_code) +{ + /* We treat cache locking instructions from the user + * as priv ops, in the future we could try to do + * something smarter + */ + if (error_code & (ESR_DLK|ESR_ILK)) + _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); + return; +} +#endif /* CONFIG_FSL_BOOKE */ + +#ifdef CONFIG_SPE +void SPEFloatingPointException(struct pt_regs *regs) +{ + extern int do_spe_mathemu(struct pt_regs *regs); + unsigned long spefscr; + int fpexc_mode; + int code = 0; + int err; + + flush_spe_to_thread(current); + + spefscr = current->thread.spefscr; + fpexc_mode = current->thread.fpexc_mode; + + if ((spefscr & SPEFSCR_FOVF) && (fpexc_mode & PR_FP_EXC_OVF)) { + code = FPE_FLTOVF; + } + else if ((spefscr & SPEFSCR_FUNF) && (fpexc_mode & PR_FP_EXC_UND)) { + code = FPE_FLTUND; + } + else if ((spefscr & SPEFSCR_FDBZ) && (fpexc_mode & PR_FP_EXC_DIV)) + code = FPE_FLTDIV; + else if ((spefscr & SPEFSCR_FINV) && (fpexc_mode & PR_FP_EXC_INV)) { + code = FPE_FLTINV; + } + else if ((spefscr & (SPEFSCR_FG | SPEFSCR_FX)) && (fpexc_mode & PR_FP_EXC_RES)) + code = FPE_FLTRES; + + err = do_spe_mathemu(regs); + if (err == 0) { + regs->nip += 4; /* skip emulated instruction */ + emulate_single_step(regs); + return; + } + + if (err == -EFAULT) { + /* got an error reading the instruction */ + _exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip); + } else if (err == -EINVAL) { + /* didn't recognize the instruction */ + printk(KERN_ERR "unrecognized spe instruction " + "in %s at %lx\n", current->comm, regs->nip); + } else { + _exception(SIGFPE, regs, code, regs->nip); + } + + return; +} + +void SPEFloatingPointRoundException(struct pt_regs *regs) +{ + extern int speround_handler(struct pt_regs *regs); + int err; + + preempt_disable(); + if (regs->msr & MSR_SPE) + giveup_spe(current); + preempt_enable(); + + regs->nip -= 4; + err = speround_handler(regs); + if (err == 0) { + regs->nip += 4; /* skip emulated instruction */ + emulate_single_step(regs); + return; + } + + if (err == -EFAULT) { + /* got an error reading the instruction */ + _exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip); + } else if (err == -EINVAL) { + /* didn't recognize the instruction */ + printk(KERN_ERR "unrecognized spe instruction " + "in %s at %lx\n", current->comm, regs->nip); + } else { + _exception(SIGFPE, regs, 0, regs->nip); + return; + } +} +#endif + +/* + * We enter here if we get an unrecoverable exception, that is, one + * that happened at a point where the RI (recoverable interrupt) bit + * in the MSR is 0. This indicates that SRR0/1 are live, and that + * we therefore lost state by taking this exception. + */ +void unrecoverable_exception(struct pt_regs *regs) +{ + printk(KERN_EMERG "Unrecoverable exception %lx at %lx\n", + regs->trap, regs->nip); + die("Unrecoverable exception", regs, SIGABRT); +} + +#ifdef CONFIG_BOOKE_WDT +/* + * Default handler for a Watchdog exception, + * spins until a reboot occurs + */ +void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs) +{ + /* Generic WatchdogHandler, implement your own */ + mtspr(SPRN_TCR, mfspr(SPRN_TCR)&(~TCR_WIE)); + return; +} + +void WatchdogException(struct pt_regs *regs) +{ + printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n"); + WatchdogHandler(regs); +} +#endif + +/* + * We enter here if we discover during exception entry that we are + * running in supervisor mode with a userspace value in the stack pointer. + */ +void kernel_bad_stack(struct pt_regs *regs) +{ + printk(KERN_EMERG "Bad kernel stack pointer %lx at %lx\n", + regs->gpr[1], regs->nip); + die("Bad kernel stack pointer", regs, SIGABRT); +} + +void __init trap_init(void) +{ +} + + +#ifdef CONFIG_PPC_EMULATED_STATS + +#define WARN_EMULATED_SETUP(type) .type = { .name = #type } + +struct ppc_emulated ppc_emulated = { +#ifdef CONFIG_ALTIVEC + WARN_EMULATED_SETUP(altivec), +#endif + WARN_EMULATED_SETUP(dcba), + WARN_EMULATED_SETUP(dcbz), + WARN_EMULATED_SETUP(fp_pair), + WARN_EMULATED_SETUP(isel), + WARN_EMULATED_SETUP(mcrxr), + WARN_EMULATED_SETUP(mfpvr), + WARN_EMULATED_SETUP(multiple), + WARN_EMULATED_SETUP(popcntb), + WARN_EMULATED_SETUP(spe), + WARN_EMULATED_SETUP(string), + WARN_EMULATED_SETUP(unaligned), +#ifdef CONFIG_MATH_EMULATION + WARN_EMULATED_SETUP(math), +#elif defined(CONFIG_8XX_MINIMAL_FPEMU) + WARN_EMULATED_SETUP(8xx), +#endif +#ifdef CONFIG_VSX + WARN_EMULATED_SETUP(vsx), +#endif +#ifdef CONFIG_PPC64 + WARN_EMULATED_SETUP(mfdscr), + WARN_EMULATED_SETUP(mtdscr), +#endif +}; + +u32 ppc_warn_emulated; + +void ppc_warn_emulated_print(const char *type) +{ + pr_warn_ratelimited("%s used emulated %s instruction\n", current->comm, + type); +} + +static int __init ppc_warn_emulated_init(void) +{ + struct dentry *dir, *d; + unsigned int i; + struct ppc_emulated_entry *entries = (void *)&ppc_emulated; + + if (!powerpc_debugfs_root) + return -ENODEV; + + dir = debugfs_create_dir("emulated_instructions", + powerpc_debugfs_root); + if (!dir) + return -ENOMEM; + + d = debugfs_create_u32("do_warn", S_IRUGO | S_IWUSR, dir, + &ppc_warn_emulated); + if (!d) + goto fail; + + for (i = 0; i < sizeof(ppc_emulated)/sizeof(*entries); i++) { + d = debugfs_create_u32(entries[i].name, S_IRUGO | S_IWUSR, dir, + (u32 *)&entries[i].val.counter); + if (!d) + goto fail; + } + + return 0; + +fail: + debugfs_remove_recursive(dir); + return -ENOMEM; +} + +device_initcall(ppc_warn_emulated_init); + +#endif /* CONFIG_PPC_EMULATED_STATS */ diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c new file mode 100644 index 00000000..c39c1ca7 --- /dev/null +++ b/arch/powerpc/kernel/udbg.c @@ -0,0 +1,206 @@ +/* + * polling mode stateless debugging stuff, originally for NS16550 Serial Ports + * + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <stdarg.h> +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/console.h> +#include <linux/init.h> +#include <asm/processor.h> +#include <asm/udbg.h> + +void (*udbg_putc)(char c); +void (*udbg_flush)(void); +int (*udbg_getc)(void); +int (*udbg_getc_poll)(void); + +/* + * Early debugging facilities. You can enable _one_ of these via .config, + * if you do so your kernel _will not boot_ on anything else. Be careful. + */ +void __init udbg_early_init(void) +{ +#if defined(CONFIG_PPC_EARLY_DEBUG_LPAR) + /* For LPAR machines that have an HVC console on vterm 0 */ + udbg_init_debug_lpar(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_LPAR_HVSI) + /* For LPAR machines that have an HVSI console on vterm 0 */ + udbg_init_debug_lpar_hvsi(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_G5) + /* For use on Apple G5 machines */ + udbg_init_pmac_realmode(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_RTAS_PANEL) + /* RTAS panel debug */ + udbg_init_rtas_panel(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_RTAS_CONSOLE) + /* RTAS console debug */ + udbg_init_rtas_console(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_MAPLE) + /* Maple real mode debug */ + udbg_init_maple_realmode(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_BEAT) + udbg_init_debug_beat(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_PAS_REALMODE) + udbg_init_pas_realmode(); +#elif defined(CONFIG_BOOTX_TEXT) + udbg_init_btext(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_44x) + /* PPC44x debug */ + udbg_init_44x_as1(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_40x) + /* PPC40x debug */ + udbg_init_40x_realmode(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_CPM) + udbg_init_cpm(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_USBGECKO) + udbg_init_usbgecko(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_WSP) + udbg_init_wsp(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_EHV_BC) + udbg_init_ehv_bc(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_PS3GELIC) + udbg_init_ps3gelic(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_OPAL_RAW) + udbg_init_debug_opal_raw(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_OPAL_HVSI) + udbg_init_debug_opal_hvsi(); +#endif + +#ifdef CONFIG_PPC_EARLY_DEBUG + console_loglevel = 10; + + register_early_udbg_console(); +#endif +} + +/* udbg library, used by xmon et al */ +void udbg_puts(const char *s) +{ + if (udbg_putc) { + char c; + + if (s && *s != '\0') { + while ((c = *s++) != '\0') + udbg_putc(c); + } + + if (udbg_flush) + udbg_flush(); + } +#if 0 + else { + printk("%s", s); + } +#endif +} + +int udbg_write(const char *s, int n) +{ + int remain = n; + char c; + + if (!udbg_putc) + return 0; + + if (s && *s != '\0') { + while (((c = *s++) != '\0') && (remain-- > 0)) { + udbg_putc(c); + } + } + + if (udbg_flush) + udbg_flush(); + + return n - remain; +} + +int udbg_read(char *buf, int buflen) +{ + char *p = buf; + int i, c; + + if (!udbg_getc) + return 0; + + for (i = 0; i < buflen; ++i) { + do { + c = udbg_getc(); + if (c == -1 && i == 0) + return -1; + + } while (c == 0x11 || c == 0x13); + if (c == 0 || c == -1) + break; + *p++ = c; + } + + return i; +} + +#define UDBG_BUFSIZE 256 +void udbg_printf(const char *fmt, ...) +{ + char buf[UDBG_BUFSIZE]; + va_list args; + + va_start(args, fmt); + vsnprintf(buf, UDBG_BUFSIZE, fmt, args); + udbg_puts(buf); + va_end(args); +} + +void __init udbg_progress(char *s, unsigned short hex) +{ + udbg_puts(s); + udbg_puts("\n"); +} + +/* + * Early boot console based on udbg + */ +static void udbg_console_write(struct console *con, const char *s, + unsigned int n) +{ + udbg_write(s, n); +} + +static struct console udbg_console = { + .name = "udbg", + .write = udbg_console_write, + .flags = CON_PRINTBUFFER | CON_ENABLED | CON_BOOT | CON_ANYTIME, + .index = 0, +}; + +static int early_console_initialized; + +/* + * Called by setup_system after ppc_md->probe and ppc_md->early_init. + * Call it again after setting udbg_putc in ppc_md->setup_arch. + */ +void __init register_early_udbg_console(void) +{ + if (early_console_initialized) + return; + + if (!udbg_putc) + return; + + if (strstr(boot_command_line, "udbg-immortal")) { + printk(KERN_INFO "early console immortal !\n"); + udbg_console.flags &= ~CON_BOOT; + } + early_console_initialized = 1; + register_console(&udbg_console); +} + +#if 0 /* if you want to use this as a regular output console */ +console_initcall(register_udbg_console); +#endif diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c new file mode 100644 index 00000000..6837f839 --- /dev/null +++ b/arch/powerpc/kernel/udbg_16550.c @@ -0,0 +1,351 @@ +/* + * udbg for NS16550 compatible serial ports + * + * Copyright (C) 2001-2005 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/types.h> +#include <asm/udbg.h> +#include <asm/io.h> +#include <asm/reg_a2.h> + +extern u8 real_readb(volatile u8 __iomem *addr); +extern void real_writeb(u8 data, volatile u8 __iomem *addr); +extern u8 real_205_readb(volatile u8 __iomem *addr); +extern void real_205_writeb(u8 data, volatile u8 __iomem *addr); + +struct NS16550 { + /* this struct must be packed */ + unsigned char rbr; /* 0 */ + unsigned char ier; /* 1 */ + unsigned char fcr; /* 2 */ + unsigned char lcr; /* 3 */ + unsigned char mcr; /* 4 */ + unsigned char lsr; /* 5 */ + unsigned char msr; /* 6 */ + unsigned char scr; /* 7 */ +}; + +#define thr rbr +#define iir fcr +#define dll rbr +#define dlm ier +#define dlab lcr + +#define LSR_DR 0x01 /* Data ready */ +#define LSR_OE 0x02 /* Overrun */ +#define LSR_PE 0x04 /* Parity error */ +#define LSR_FE 0x08 /* Framing error */ +#define LSR_BI 0x10 /* Break */ +#define LSR_THRE 0x20 /* Xmit holding register empty */ +#define LSR_TEMT 0x40 /* Xmitter empty */ +#define LSR_ERR 0x80 /* Error */ + +#define LCR_DLAB 0x80 + +static struct NS16550 __iomem *udbg_comport; + +static void udbg_550_flush(void) +{ + if (udbg_comport) { + while ((in_8(&udbg_comport->lsr) & LSR_THRE) == 0) + /* wait for idle */; + } +} + +static void udbg_550_putc(char c) +{ + if (udbg_comport) { + if (c == '\n') + udbg_550_putc('\r'); + udbg_550_flush(); + out_8(&udbg_comport->thr, c); + } +} + +static int udbg_550_getc_poll(void) +{ + if (udbg_comport) { + if ((in_8(&udbg_comport->lsr) & LSR_DR) != 0) + return in_8(&udbg_comport->rbr); + else + return -1; + } + return -1; +} + +static int udbg_550_getc(void) +{ + if (udbg_comport) { + while ((in_8(&udbg_comport->lsr) & LSR_DR) == 0) + /* wait for char */; + return in_8(&udbg_comport->rbr); + } + return -1; +} + +void udbg_init_uart(void __iomem *comport, unsigned int speed, + unsigned int clock) +{ + unsigned int dll, base_bauds; + + if (clock == 0) + clock = 1843200; + if (speed == 0) + speed = 9600; + + base_bauds = clock / 16; + dll = base_bauds / speed; + + if (comport) { + udbg_comport = (struct NS16550 __iomem *)comport; + out_8(&udbg_comport->lcr, 0x00); + out_8(&udbg_comport->ier, 0xff); + out_8(&udbg_comport->ier, 0x00); + out_8(&udbg_comport->lcr, LCR_DLAB); + out_8(&udbg_comport->dll, dll & 0xff); + out_8(&udbg_comport->dlm, dll >> 8); + /* 8 data, 1 stop, no parity */ + out_8(&udbg_comport->lcr, 0x03); + /* RTS/DTR */ + out_8(&udbg_comport->mcr, 0x03); + /* Clear & enable FIFOs */ + out_8(&udbg_comport->fcr ,0x07); + udbg_putc = udbg_550_putc; + udbg_flush = udbg_550_flush; + udbg_getc = udbg_550_getc; + udbg_getc_poll = udbg_550_getc_poll; + } +} + +unsigned int udbg_probe_uart_speed(void __iomem *comport, unsigned int clock) +{ + unsigned int dll, dlm, divisor, prescaler, speed; + u8 old_lcr; + struct NS16550 __iomem *port = comport; + + old_lcr = in_8(&port->lcr); + + /* select divisor latch registers. */ + out_8(&port->lcr, LCR_DLAB); + + /* now, read the divisor */ + dll = in_8(&port->dll); + dlm = in_8(&port->dlm); + divisor = dlm << 8 | dll; + + /* check prescaling */ + if (in_8(&port->mcr) & 0x80) + prescaler = 4; + else + prescaler = 1; + + /* restore the LCR */ + out_8(&port->lcr, old_lcr); + + /* calculate speed */ + speed = (clock / prescaler) / (divisor * 16); + + /* sanity check */ + if (speed > (clock / 16)) + speed = 9600; + + return speed; +} + +#ifdef CONFIG_PPC_MAPLE +void udbg_maple_real_flush(void) +{ + if (udbg_comport) { + while ((real_readb(&udbg_comport->lsr) & LSR_THRE) == 0) + /* wait for idle */; + } +} + +void udbg_maple_real_putc(char c) +{ + if (udbg_comport) { + if (c == '\n') + udbg_maple_real_putc('\r'); + udbg_maple_real_flush(); + real_writeb(c, &udbg_comport->thr); eieio(); + } +} + +void __init udbg_init_maple_realmode(void) +{ + udbg_comport = (struct NS16550 __iomem *)0xf40003f8; + + udbg_putc = udbg_maple_real_putc; + udbg_flush = udbg_maple_real_flush; + udbg_getc = NULL; + udbg_getc_poll = NULL; +} +#endif /* CONFIG_PPC_MAPLE */ + +#ifdef CONFIG_PPC_PASEMI +void udbg_pas_real_flush(void) +{ + if (udbg_comport) { + while ((real_205_readb(&udbg_comport->lsr) & LSR_THRE) == 0) + /* wait for idle */; + } +} + +void udbg_pas_real_putc(char c) +{ + if (udbg_comport) { + if (c == '\n') + udbg_pas_real_putc('\r'); + udbg_pas_real_flush(); + real_205_writeb(c, &udbg_comport->thr); eieio(); + } +} + +void udbg_init_pas_realmode(void) +{ + udbg_comport = (struct NS16550 __iomem *)0xfcff03f8UL; + + udbg_putc = udbg_pas_real_putc; + udbg_flush = udbg_pas_real_flush; + udbg_getc = NULL; + udbg_getc_poll = NULL; +} +#endif /* CONFIG_PPC_MAPLE */ + +#ifdef CONFIG_PPC_EARLY_DEBUG_44x +#include <platforms/44x/44x.h> + +static void udbg_44x_as1_flush(void) +{ + if (udbg_comport) { + while ((as1_readb(&udbg_comport->lsr) & LSR_THRE) == 0) + /* wait for idle */; + } +} + +static void udbg_44x_as1_putc(char c) +{ + if (udbg_comport) { + if (c == '\n') + udbg_44x_as1_putc('\r'); + udbg_44x_as1_flush(); + as1_writeb(c, &udbg_comport->thr); eieio(); + } +} + +static int udbg_44x_as1_getc(void) +{ + if (udbg_comport) { + while ((as1_readb(&udbg_comport->lsr) & LSR_DR) == 0) + ; /* wait for char */ + return as1_readb(&udbg_comport->rbr); + } + return -1; +} + +void __init udbg_init_44x_as1(void) +{ + udbg_comport = + (struct NS16550 __iomem *)PPC44x_EARLY_DEBUG_VIRTADDR; + + udbg_putc = udbg_44x_as1_putc; + udbg_flush = udbg_44x_as1_flush; + udbg_getc = udbg_44x_as1_getc; +} +#endif /* CONFIG_PPC_EARLY_DEBUG_44x */ + +#ifdef CONFIG_PPC_EARLY_DEBUG_40x +static void udbg_40x_real_flush(void) +{ + if (udbg_comport) { + while ((real_readb(&udbg_comport->lsr) & LSR_THRE) == 0) + /* wait for idle */; + } +} + +static void udbg_40x_real_putc(char c) +{ + if (udbg_comport) { + if (c == '\n') + udbg_40x_real_putc('\r'); + udbg_40x_real_flush(); + real_writeb(c, &udbg_comport->thr); eieio(); + } +} + +static int udbg_40x_real_getc(void) +{ + if (udbg_comport) { + while ((real_readb(&udbg_comport->lsr) & LSR_DR) == 0) + ; /* wait for char */ + return real_readb(&udbg_comport->rbr); + } + return -1; +} + +void __init udbg_init_40x_realmode(void) +{ + udbg_comport = (struct NS16550 __iomem *) + CONFIG_PPC_EARLY_DEBUG_40x_PHYSADDR; + + udbg_putc = udbg_40x_real_putc; + udbg_flush = udbg_40x_real_flush; + udbg_getc = udbg_40x_real_getc; + udbg_getc_poll = NULL; +} +#endif /* CONFIG_PPC_EARLY_DEBUG_40x */ + +#ifdef CONFIG_PPC_EARLY_DEBUG_WSP +static void udbg_wsp_flush(void) +{ + if (udbg_comport) { + while ((readb(&udbg_comport->lsr) & LSR_THRE) == 0) + /* wait for idle */; + } +} + +static void udbg_wsp_putc(char c) +{ + if (udbg_comport) { + if (c == '\n') + udbg_wsp_putc('\r'); + udbg_wsp_flush(); + writeb(c, &udbg_comport->thr); eieio(); + } +} + +static int udbg_wsp_getc(void) +{ + if (udbg_comport) { + while ((readb(&udbg_comport->lsr) & LSR_DR) == 0) + ; /* wait for char */ + return readb(&udbg_comport->rbr); + } + return -1; +} + +static int udbg_wsp_getc_poll(void) +{ + if (udbg_comport) + if (readb(&udbg_comport->lsr) & LSR_DR) + return readb(&udbg_comport->rbr); + return -1; +} + +void __init udbg_init_wsp(void) +{ + udbg_comport = (struct NS16550 __iomem *)WSP_UART_VIRT; + + udbg_init_uart(udbg_comport, 57600, 50000000); + + udbg_putc = udbg_wsp_putc; + udbg_flush = udbg_wsp_flush; + udbg_getc = udbg_wsp_getc; + udbg_getc_poll = udbg_wsp_getc_poll; +} +#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */ diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c new file mode 100644 index 00000000..9eb5b9b5 --- /dev/null +++ b/arch/powerpc/kernel/vdso.c @@ -0,0 +1,829 @@ + +/* + * Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/slab.h> +#include <linux/user.h> +#include <linux/elf.h> +#include <linux/security.h> +#include <linux/bootmem.h> +#include <linux/memblock.h> + +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/mmu.h> +#include <asm/mmu_context.h> +#include <asm/prom.h> +#include <asm/machdep.h> +#include <asm/cputable.h> +#include <asm/sections.h> +#include <asm/firmware.h> +#include <asm/vdso.h> +#include <asm/vdso_datapage.h> + +#include "setup.h" + +#undef DEBUG + +#ifdef DEBUG +#define DBG(fmt...) printk(fmt) +#else +#define DBG(fmt...) +#endif + +/* Max supported size for symbol names */ +#define MAX_SYMNAME 64 + +/* The alignment of the vDSO */ +#define VDSO_ALIGNMENT (1 << 16) + +extern char vdso32_start, vdso32_end; +static void *vdso32_kbase = &vdso32_start; +static unsigned int vdso32_pages; +static struct page **vdso32_pagelist; +unsigned long vdso32_sigtramp; +unsigned long vdso32_rt_sigtramp; + +#ifdef CONFIG_PPC64 +extern char vdso64_start, vdso64_end; +static void *vdso64_kbase = &vdso64_start; +static unsigned int vdso64_pages; +static struct page **vdso64_pagelist; +unsigned long vdso64_rt_sigtramp; +#endif /* CONFIG_PPC64 */ + +static int vdso_ready; + +/* + * The vdso data page (aka. systemcfg for old ppc64 fans) is here. + * Once the early boot kernel code no longer needs to muck around + * with it, it will become dynamically allocated + */ +static union { + struct vdso_data data; + u8 page[PAGE_SIZE]; +} vdso_data_store __page_aligned_data; +struct vdso_data *vdso_data = &vdso_data_store.data; + +/* Format of the patch table */ +struct vdso_patch_def +{ + unsigned long ftr_mask, ftr_value; + const char *gen_name; + const char *fix_name; +}; + +/* Table of functions to patch based on the CPU type/revision + * + * Currently, we only change sync_dicache to do nothing on processors + * with a coherent icache + */ +static struct vdso_patch_def vdso_patches[] = { + { + CPU_FTR_COHERENT_ICACHE, CPU_FTR_COHERENT_ICACHE, + "__kernel_sync_dicache", "__kernel_sync_dicache_p5" + }, + { + CPU_FTR_USE_TB, 0, + "__kernel_gettimeofday", NULL + }, + { + CPU_FTR_USE_TB, 0, + "__kernel_clock_gettime", NULL + }, + { + CPU_FTR_USE_TB, 0, + "__kernel_clock_getres", NULL + }, + { + CPU_FTR_USE_TB, 0, + "__kernel_get_tbfreq", NULL + }, +}; + +/* + * Some infos carried around for each of them during parsing at + * boot time. + */ +struct lib32_elfinfo +{ + Elf32_Ehdr *hdr; /* ptr to ELF */ + Elf32_Sym *dynsym; /* ptr to .dynsym section */ + unsigned long dynsymsize; /* size of .dynsym section */ + char *dynstr; /* ptr to .dynstr section */ + unsigned long text; /* offset of .text section in .so */ +}; + +struct lib64_elfinfo +{ + Elf64_Ehdr *hdr; + Elf64_Sym *dynsym; + unsigned long dynsymsize; + char *dynstr; + unsigned long text; +}; + + +#ifdef __DEBUG +static void dump_one_vdso_page(struct page *pg, struct page *upg) +{ + printk("kpg: %p (c:%d,f:%08lx)", __va(page_to_pfn(pg) << PAGE_SHIFT), + page_count(pg), + pg->flags); + if (upg && !IS_ERR(upg) /* && pg != upg*/) { + printk(" upg: %p (c:%d,f:%08lx)", __va(page_to_pfn(upg) + << PAGE_SHIFT), + page_count(upg), + upg->flags); + } + printk("\n"); +} + +static void dump_vdso_pages(struct vm_area_struct * vma) +{ + int i; + + if (!vma || is_32bit_task()) { + printk("vDSO32 @ %016lx:\n", (unsigned long)vdso32_kbase); + for (i=0; i<vdso32_pages; i++) { + struct page *pg = virt_to_page(vdso32_kbase + + i*PAGE_SIZE); + struct page *upg = (vma && vma->vm_mm) ? + follow_page(vma, vma->vm_start + i*PAGE_SIZE, 0) + : NULL; + dump_one_vdso_page(pg, upg); + } + } + if (!vma || !is_32bit_task()) { + printk("vDSO64 @ %016lx:\n", (unsigned long)vdso64_kbase); + for (i=0; i<vdso64_pages; i++) { + struct page *pg = virt_to_page(vdso64_kbase + + i*PAGE_SIZE); + struct page *upg = (vma && vma->vm_mm) ? + follow_page(vma, vma->vm_start + i*PAGE_SIZE, 0) + : NULL; + dump_one_vdso_page(pg, upg); + } + } +} +#endif /* DEBUG */ + +/* + * This is called from binfmt_elf, we create the special vma for the + * vDSO and insert it into the mm struct tree + */ +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +{ + struct mm_struct *mm = current->mm; + struct page **vdso_pagelist; + unsigned long vdso_pages; + unsigned long vdso_base; + int rc; + + if (!vdso_ready) + return 0; + +#ifdef CONFIG_PPC64 + if (is_32bit_task()) { + vdso_pagelist = vdso32_pagelist; + vdso_pages = vdso32_pages; + vdso_base = VDSO32_MBASE; + } else { + vdso_pagelist = vdso64_pagelist; + vdso_pages = vdso64_pages; + /* + * On 64bit we don't have a preferred map address. This + * allows get_unmapped_area to find an area near other mmaps + * and most likely share a SLB entry. + */ + vdso_base = 0; + } +#else + vdso_pagelist = vdso32_pagelist; + vdso_pages = vdso32_pages; + vdso_base = VDSO32_MBASE; +#endif + + current->mm->context.vdso_base = 0; + + /* vDSO has a problem and was disabled, just don't "enable" it for the + * process + */ + if (vdso_pages == 0) + return 0; + /* Add a page to the vdso size for the data page */ + vdso_pages ++; + + /* + * pick a base address for the vDSO in process space. We try to put it + * at vdso_base which is the "natural" base for it, but we might fail + * and end up putting it elsewhere. + * Add enough to the size so that the result can be aligned. + */ + down_write(&mm->mmap_sem); + vdso_base = get_unmapped_area(NULL, vdso_base, + (vdso_pages << PAGE_SHIFT) + + ((VDSO_ALIGNMENT - 1) & PAGE_MASK), + 0, 0); + if (IS_ERR_VALUE(vdso_base)) { + rc = vdso_base; + goto fail_mmapsem; + } + + /* Add required alignment. */ + vdso_base = ALIGN(vdso_base, VDSO_ALIGNMENT); + + /* + * Put vDSO base into mm struct. We need to do this before calling + * install_special_mapping or the perf counter mmap tracking code + * will fail to recognise it as a vDSO (since arch_vma_name fails). + */ + current->mm->context.vdso_base = vdso_base; + + /* + * our vma flags don't have VM_WRITE so by default, the process isn't + * allowed to write those pages. + * gdb can break that with ptrace interface, and thus trigger COW on + * those pages but it's then your responsibility to never do that on + * the "data" page of the vDSO or you'll stop getting kernel updates + * and your nice userland gettimeofday will be totally dead. + * It's fine to use that for setting breakpoints in the vDSO code + * pages though. + */ + rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + vdso_pagelist); + if (rc) { + current->mm->context.vdso_base = 0; + goto fail_mmapsem; + } + + up_write(&mm->mmap_sem); + return 0; + + fail_mmapsem: + up_write(&mm->mmap_sem); + return rc; +} + +const char *arch_vma_name(struct vm_area_struct *vma) +{ + if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso_base) + return "[vdso]"; + return NULL; +} + + + +static void * __init find_section32(Elf32_Ehdr *ehdr, const char *secname, + unsigned long *size) +{ + Elf32_Shdr *sechdrs; + unsigned int i; + char *secnames; + + /* Grab section headers and strings so we can tell who is who */ + sechdrs = (void *)ehdr + ehdr->e_shoff; + secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset; + + /* Find the section they want */ + for (i = 1; i < ehdr->e_shnum; i++) { + if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) { + if (size) + *size = sechdrs[i].sh_size; + return (void *)ehdr + sechdrs[i].sh_offset; + } + } + *size = 0; + return NULL; +} + +static Elf32_Sym * __init find_symbol32(struct lib32_elfinfo *lib, + const char *symname) +{ + unsigned int i; + char name[MAX_SYMNAME], *c; + + for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) { + if (lib->dynsym[i].st_name == 0) + continue; + strlcpy(name, lib->dynstr + lib->dynsym[i].st_name, + MAX_SYMNAME); + c = strchr(name, '@'); + if (c) + *c = 0; + if (strcmp(symname, name) == 0) + return &lib->dynsym[i]; + } + return NULL; +} + +/* Note that we assume the section is .text and the symbol is relative to + * the library base + */ +static unsigned long __init find_function32(struct lib32_elfinfo *lib, + const char *symname) +{ + Elf32_Sym *sym = find_symbol32(lib, symname); + + if (sym == NULL) { + printk(KERN_WARNING "vDSO32: function %s not found !\n", + symname); + return 0; + } + return sym->st_value - VDSO32_LBASE; +} + +static int __init vdso_do_func_patch32(struct lib32_elfinfo *v32, + struct lib64_elfinfo *v64, + const char *orig, const char *fix) +{ + Elf32_Sym *sym32_gen, *sym32_fix; + + sym32_gen = find_symbol32(v32, orig); + if (sym32_gen == NULL) { + printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", orig); + return -1; + } + if (fix == NULL) { + sym32_gen->st_name = 0; + return 0; + } + sym32_fix = find_symbol32(v32, fix); + if (sym32_fix == NULL) { + printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", fix); + return -1; + } + sym32_gen->st_value = sym32_fix->st_value; + sym32_gen->st_size = sym32_fix->st_size; + sym32_gen->st_info = sym32_fix->st_info; + sym32_gen->st_other = sym32_fix->st_other; + sym32_gen->st_shndx = sym32_fix->st_shndx; + + return 0; +} + + +#ifdef CONFIG_PPC64 + +static void * __init find_section64(Elf64_Ehdr *ehdr, const char *secname, + unsigned long *size) +{ + Elf64_Shdr *sechdrs; + unsigned int i; + char *secnames; + + /* Grab section headers and strings so we can tell who is who */ + sechdrs = (void *)ehdr + ehdr->e_shoff; + secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset; + + /* Find the section they want */ + for (i = 1; i < ehdr->e_shnum; i++) { + if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) { + if (size) + *size = sechdrs[i].sh_size; + return (void *)ehdr + sechdrs[i].sh_offset; + } + } + if (size) + *size = 0; + return NULL; +} + +static Elf64_Sym * __init find_symbol64(struct lib64_elfinfo *lib, + const char *symname) +{ + unsigned int i; + char name[MAX_SYMNAME], *c; + + for (i = 0; i < (lib->dynsymsize / sizeof(Elf64_Sym)); i++) { + if (lib->dynsym[i].st_name == 0) + continue; + strlcpy(name, lib->dynstr + lib->dynsym[i].st_name, + MAX_SYMNAME); + c = strchr(name, '@'); + if (c) + *c = 0; + if (strcmp(symname, name) == 0) + return &lib->dynsym[i]; + } + return NULL; +} + +/* Note that we assume the section is .text and the symbol is relative to + * the library base + */ +static unsigned long __init find_function64(struct lib64_elfinfo *lib, + const char *symname) +{ + Elf64_Sym *sym = find_symbol64(lib, symname); + + if (sym == NULL) { + printk(KERN_WARNING "vDSO64: function %s not found !\n", + symname); + return 0; + } +#ifdef VDS64_HAS_DESCRIPTORS + return *((u64 *)(vdso64_kbase + sym->st_value - VDSO64_LBASE)) - + VDSO64_LBASE; +#else + return sym->st_value - VDSO64_LBASE; +#endif +} + +static int __init vdso_do_func_patch64(struct lib32_elfinfo *v32, + struct lib64_elfinfo *v64, + const char *orig, const char *fix) +{ + Elf64_Sym *sym64_gen, *sym64_fix; + + sym64_gen = find_symbol64(v64, orig); + if (sym64_gen == NULL) { + printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", orig); + return -1; + } + if (fix == NULL) { + sym64_gen->st_name = 0; + return 0; + } + sym64_fix = find_symbol64(v64, fix); + if (sym64_fix == NULL) { + printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", fix); + return -1; + } + sym64_gen->st_value = sym64_fix->st_value; + sym64_gen->st_size = sym64_fix->st_size; + sym64_gen->st_info = sym64_fix->st_info; + sym64_gen->st_other = sym64_fix->st_other; + sym64_gen->st_shndx = sym64_fix->st_shndx; + + return 0; +} + +#endif /* CONFIG_PPC64 */ + + +static __init int vdso_do_find_sections(struct lib32_elfinfo *v32, + struct lib64_elfinfo *v64) +{ + void *sect; + + /* + * Locate symbol tables & text section + */ + + v32->dynsym = find_section32(v32->hdr, ".dynsym", &v32->dynsymsize); + v32->dynstr = find_section32(v32->hdr, ".dynstr", NULL); + if (v32->dynsym == NULL || v32->dynstr == NULL) { + printk(KERN_ERR "vDSO32: required symbol section not found\n"); + return -1; + } + sect = find_section32(v32->hdr, ".text", NULL); + if (sect == NULL) { + printk(KERN_ERR "vDSO32: the .text section was not found\n"); + return -1; + } + v32->text = sect - vdso32_kbase; + +#ifdef CONFIG_PPC64 + v64->dynsym = find_section64(v64->hdr, ".dynsym", &v64->dynsymsize); + v64->dynstr = find_section64(v64->hdr, ".dynstr", NULL); + if (v64->dynsym == NULL || v64->dynstr == NULL) { + printk(KERN_ERR "vDSO64: required symbol section not found\n"); + return -1; + } + sect = find_section64(v64->hdr, ".text", NULL); + if (sect == NULL) { + printk(KERN_ERR "vDSO64: the .text section was not found\n"); + return -1; + } + v64->text = sect - vdso64_kbase; +#endif /* CONFIG_PPC64 */ + + return 0; +} + +static __init void vdso_setup_trampolines(struct lib32_elfinfo *v32, + struct lib64_elfinfo *v64) +{ + /* + * Find signal trampolines + */ + +#ifdef CONFIG_PPC64 + vdso64_rt_sigtramp = find_function64(v64, "__kernel_sigtramp_rt64"); +#endif + vdso32_sigtramp = find_function32(v32, "__kernel_sigtramp32"); + vdso32_rt_sigtramp = find_function32(v32, "__kernel_sigtramp_rt32"); +} + +static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32, + struct lib64_elfinfo *v64) +{ + Elf32_Sym *sym32; +#ifdef CONFIG_PPC64 + Elf64_Sym *sym64; + + sym64 = find_symbol64(v64, "__kernel_datapage_offset"); + if (sym64 == NULL) { + printk(KERN_ERR "vDSO64: Can't find symbol " + "__kernel_datapage_offset !\n"); + return -1; + } + *((int *)(vdso64_kbase + sym64->st_value - VDSO64_LBASE)) = + (vdso64_pages << PAGE_SHIFT) - + (sym64->st_value - VDSO64_LBASE); +#endif /* CONFIG_PPC64 */ + + sym32 = find_symbol32(v32, "__kernel_datapage_offset"); + if (sym32 == NULL) { + printk(KERN_ERR "vDSO32: Can't find symbol " + "__kernel_datapage_offset !\n"); + return -1; + } + *((int *)(vdso32_kbase + (sym32->st_value - VDSO32_LBASE))) = + (vdso32_pages << PAGE_SHIFT) - + (sym32->st_value - VDSO32_LBASE); + + return 0; +} + + +static __init int vdso_fixup_features(struct lib32_elfinfo *v32, + struct lib64_elfinfo *v64) +{ + void *start32; + unsigned long size32; + +#ifdef CONFIG_PPC64 + void *start64; + unsigned long size64; + + start64 = find_section64(v64->hdr, "__ftr_fixup", &size64); + if (start64) + do_feature_fixups(cur_cpu_spec->cpu_features, + start64, start64 + size64); + + start64 = find_section64(v64->hdr, "__mmu_ftr_fixup", &size64); + if (start64) + do_feature_fixups(cur_cpu_spec->mmu_features, + start64, start64 + size64); + + start64 = find_section64(v64->hdr, "__fw_ftr_fixup", &size64); + if (start64) + do_feature_fixups(powerpc_firmware_features, + start64, start64 + size64); + + start64 = find_section64(v64->hdr, "__lwsync_fixup", &size64); + if (start64) + do_lwsync_fixups(cur_cpu_spec->cpu_features, + start64, start64 + size64); +#endif /* CONFIG_PPC64 */ + + start32 = find_section32(v32->hdr, "__ftr_fixup", &size32); + if (start32) + do_feature_fixups(cur_cpu_spec->cpu_features, + start32, start32 + size32); + + start32 = find_section32(v32->hdr, "__mmu_ftr_fixup", &size32); + if (start32) + do_feature_fixups(cur_cpu_spec->mmu_features, + start32, start32 + size32); + +#ifdef CONFIG_PPC64 + start32 = find_section32(v32->hdr, "__fw_ftr_fixup", &size32); + if (start32) + do_feature_fixups(powerpc_firmware_features, + start32, start32 + size32); +#endif /* CONFIG_PPC64 */ + + start32 = find_section32(v32->hdr, "__lwsync_fixup", &size32); + if (start32) + do_lwsync_fixups(cur_cpu_spec->cpu_features, + start32, start32 + size32); + + return 0; +} + +static __init int vdso_fixup_alt_funcs(struct lib32_elfinfo *v32, + struct lib64_elfinfo *v64) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(vdso_patches); i++) { + struct vdso_patch_def *patch = &vdso_patches[i]; + int match = (cur_cpu_spec->cpu_features & patch->ftr_mask) + == patch->ftr_value; + if (!match) + continue; + + DBG("replacing %s with %s...\n", patch->gen_name, + patch->fix_name ? "NONE" : patch->fix_name); + + /* + * Patch the 32 bits and 64 bits symbols. Note that we do not + * patch the "." symbol on 64 bits. + * It would be easy to do, but doesn't seem to be necessary, + * patching the OPD symbol is enough. + */ + vdso_do_func_patch32(v32, v64, patch->gen_name, + patch->fix_name); +#ifdef CONFIG_PPC64 + vdso_do_func_patch64(v32, v64, patch->gen_name, + patch->fix_name); +#endif /* CONFIG_PPC64 */ + } + + return 0; +} + + +static __init int vdso_setup(void) +{ + struct lib32_elfinfo v32; + struct lib64_elfinfo v64; + + v32.hdr = vdso32_kbase; +#ifdef CONFIG_PPC64 + v64.hdr = vdso64_kbase; +#endif + if (vdso_do_find_sections(&v32, &v64)) + return -1; + + if (vdso_fixup_datapage(&v32, &v64)) + return -1; + + if (vdso_fixup_features(&v32, &v64)) + return -1; + + if (vdso_fixup_alt_funcs(&v32, &v64)) + return -1; + + vdso_setup_trampolines(&v32, &v64); + + return 0; +} + +/* + * Called from setup_arch to initialize the bitmap of available + * syscalls in the systemcfg page + */ +static void __init vdso_setup_syscall_map(void) +{ + unsigned int i; + extern unsigned long *sys_call_table; + extern unsigned long sys_ni_syscall; + + + for (i = 0; i < __NR_syscalls; i++) { +#ifdef CONFIG_PPC64 + if (sys_call_table[i*2] != sys_ni_syscall) + vdso_data->syscall_map_64[i >> 5] |= + 0x80000000UL >> (i & 0x1f); + if (sys_call_table[i*2+1] != sys_ni_syscall) + vdso_data->syscall_map_32[i >> 5] |= + 0x80000000UL >> (i & 0x1f); +#else /* CONFIG_PPC64 */ + if (sys_call_table[i] != sys_ni_syscall) + vdso_data->syscall_map_32[i >> 5] |= + 0x80000000UL >> (i & 0x1f); +#endif /* CONFIG_PPC64 */ + } +} + + +static int __init vdso_init(void) +{ + int i; + +#ifdef CONFIG_PPC64 + /* + * Fill up the "systemcfg" stuff for backward compatibility + */ + strcpy((char *)vdso_data->eye_catcher, "SYSTEMCFG:PPC64"); + vdso_data->version.major = SYSTEMCFG_MAJOR; + vdso_data->version.minor = SYSTEMCFG_MINOR; + vdso_data->processor = mfspr(SPRN_PVR); + /* + * Fake the old platform number for pSeries and add + * in LPAR bit if necessary + */ + vdso_data->platform = 0x100; + if (firmware_has_feature(FW_FEATURE_LPAR)) + vdso_data->platform |= 1; + vdso_data->physicalMemorySize = memblock_phys_mem_size(); + vdso_data->dcache_size = ppc64_caches.dsize; + vdso_data->dcache_line_size = ppc64_caches.dline_size; + vdso_data->icache_size = ppc64_caches.isize; + vdso_data->icache_line_size = ppc64_caches.iline_size; + + /* XXXOJN: Blocks should be added to ppc64_caches and used instead */ + vdso_data->dcache_block_size = ppc64_caches.dline_size; + vdso_data->icache_block_size = ppc64_caches.iline_size; + vdso_data->dcache_log_block_size = ppc64_caches.log_dline_size; + vdso_data->icache_log_block_size = ppc64_caches.log_iline_size; + + /* + * Calculate the size of the 64 bits vDSO + */ + vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT; + DBG("vdso64_kbase: %p, 0x%x pages\n", vdso64_kbase, vdso64_pages); +#else + vdso_data->dcache_block_size = L1_CACHE_BYTES; + vdso_data->dcache_log_block_size = L1_CACHE_SHIFT; + vdso_data->icache_block_size = L1_CACHE_BYTES; + vdso_data->icache_log_block_size = L1_CACHE_SHIFT; +#endif /* CONFIG_PPC64 */ + + + /* + * Calculate the size of the 32 bits vDSO + */ + vdso32_pages = (&vdso32_end - &vdso32_start) >> PAGE_SHIFT; + DBG("vdso32_kbase: %p, 0x%x pages\n", vdso32_kbase, vdso32_pages); + + + /* + * Setup the syscall map in the vDOS + */ + vdso_setup_syscall_map(); + + /* + * Initialize the vDSO images in memory, that is do necessary + * fixups of vDSO symbols, locate trampolines, etc... + */ + if (vdso_setup()) { + printk(KERN_ERR "vDSO setup failure, not enabled !\n"); + vdso32_pages = 0; +#ifdef CONFIG_PPC64 + vdso64_pages = 0; +#endif + return 0; + } + + /* Make sure pages are in the correct state */ + vdso32_pagelist = kzalloc(sizeof(struct page *) * (vdso32_pages + 2), + GFP_KERNEL); + BUG_ON(vdso32_pagelist == NULL); + for (i = 0; i < vdso32_pages; i++) { + struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE); + ClearPageReserved(pg); + get_page(pg); + vdso32_pagelist[i] = pg; + } + vdso32_pagelist[i++] = virt_to_page(vdso_data); + vdso32_pagelist[i] = NULL; + +#ifdef CONFIG_PPC64 + vdso64_pagelist = kzalloc(sizeof(struct page *) * (vdso64_pages + 2), + GFP_KERNEL); + BUG_ON(vdso64_pagelist == NULL); + for (i = 0; i < vdso64_pages; i++) { + struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE); + ClearPageReserved(pg); + get_page(pg); + vdso64_pagelist[i] = pg; + } + vdso64_pagelist[i++] = virt_to_page(vdso_data); + vdso64_pagelist[i] = NULL; +#endif /* CONFIG_PPC64 */ + + get_page(virt_to_page(vdso_data)); + + smp_wmb(); + vdso_ready = 1; + + return 0; +} +arch_initcall(vdso_init); + +int in_gate_area_no_mm(unsigned long addr) +{ + return 0; +} + +int in_gate_area(struct mm_struct *mm, unsigned long addr) +{ + return 0; +} + +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) +{ + return NULL; +} + diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile new file mode 100644 index 00000000..9a7946c4 --- /dev/null +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -0,0 +1,56 @@ + +# List of files in the vdso, has to be asm only for now + +obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o + +# Build rules + +ifeq ($(CONFIG_PPC32),y) +CROSS32CC := $(CC) +endif + +targets := $(obj-vdso32) vdso32.so vdso32.so.dbg +obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) + +GCOV_PROFILE := n + +ccflags-y := -shared -fno-common -fno-builtin +ccflags-y += -nostdlib -Wl,-soname=linux-vdso32.so.1 \ + $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +asflags-y := -D__VDSO32__ -s + +obj-y += vdso32_wrapper.o +extra-y += vdso32.lds +CPPFLAGS_vdso32.lds += -P -C -Upowerpc + +# Force dependency (incbin is bad) +$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so + +# link rule for the .so file, .lds has to be first +$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) + $(call if_changed,vdso32ld) + +# strip rule for the .so file +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +# assembly rules for the .S files +$(obj-vdso32): %.o: %.S + $(call if_changed_dep,vdso32as) + +# actual build commands +quiet_cmd_vdso32ld = VDSO32L $@ + cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $^ -o $@ +quiet_cmd_vdso32as = VDSO32A $@ + cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $< + +# install commands for the unstripped file +quiet_cmd_vdso_install = INSTALL $@ + cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ + +vdso32.so: $(obj)/vdso32.so.dbg + @mkdir -p $(MODLIB)/vdso + $(call cmd,vdso_install) + +vdso_install: vdso32.so diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S new file mode 100644 index 00000000..1ba6feb7 --- /dev/null +++ b/arch/powerpc/kernel/vdso32/cacheflush.S @@ -0,0 +1,85 @@ +/* + * vDSO provided cache flush routines + * + * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), + * IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <asm/processor.h> +#include <asm/ppc_asm.h> +#include <asm/vdso.h> +#include <asm/asm-offsets.h> + + .text + +/* + * Default "generic" version of __kernel_sync_dicache. + * + * void __kernel_sync_dicache(unsigned long start, unsigned long end) + * + * Flushes the data cache & invalidate the instruction cache for the + * provided range [start, end[ + */ +V_FUNCTION_BEGIN(__kernel_sync_dicache) + .cfi_startproc + mflr r12 + .cfi_register lr,r12 + mr r11,r3 + bl __get_datapage@local + mtlr r12 + mr r10,r3 + + lwz r7,CFG_DCACHE_BLOCKSZ(r10) + addi r5,r7,-1 + andc r6,r11,r5 /* round low to line bdy */ + subf r8,r6,r4 /* compute length */ + add r8,r8,r5 /* ensure we get enough */ + lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10) + srw. r8,r8,r9 /* compute line count */ + crclr cr0*4+so + beqlr /* nothing to do? */ + mtctr r8 +1: dcbst 0,r6 + add r6,r6,r7 + bdnz 1b + sync + +/* Now invalidate the instruction cache */ + + lwz r7,CFG_ICACHE_BLOCKSZ(r10) + addi r5,r7,-1 + andc r6,r11,r5 /* round low to line bdy */ + subf r8,r6,r4 /* compute length */ + add r8,r8,r5 + lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10) + srw. r8,r8,r9 /* compute line count */ + crclr cr0*4+so + beqlr /* nothing to do? */ + mtctr r8 +2: icbi 0,r6 + add r6,r6,r7 + bdnz 2b + isync + li r3,0 + blr + .cfi_endproc +V_FUNCTION_END(__kernel_sync_dicache) + + +/* + * POWER5 version of __kernel_sync_dicache + */ +V_FUNCTION_BEGIN(__kernel_sync_dicache_p5) + .cfi_startproc + crclr cr0*4+so + sync + isync + li r3,0 + blr + .cfi_endproc +V_FUNCTION_END(__kernel_sync_dicache_p5) + diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S new file mode 100644 index 00000000..dc21e891 --- /dev/null +++ b/arch/powerpc/kernel/vdso32/datapage.S @@ -0,0 +1,85 @@ +/* + * Access to the shared data page by the vDSO & syscall map + * + * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <asm/processor.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> +#include <asm/vdso.h> + + .text +V_FUNCTION_BEGIN(__get_datapage) + .cfi_startproc + /* We don't want that exposed or overridable as we want other objects + * to be able to bl directly to here + */ + .protected __get_datapage + .hidden __get_datapage + + mflr r0 + .cfi_register lr,r0 + + bcl 20,31,1f + .global __kernel_datapage_offset; +__kernel_datapage_offset: + .long 0 +1: + mflr r3 + mtlr r0 + lwz r0,0(r3) + add r3,r0,r3 + blr + .cfi_endproc +V_FUNCTION_END(__get_datapage) + +/* + * void *__kernel_get_syscall_map(unsigned int *syscall_count) ; + * + * returns a pointer to the syscall map. the map is agnostic to the + * size of "long", unlike kernel bitops, it stores bits from top to + * bottom so that memory actually contains a linear bitmap + * check for syscall N by testing bit (0x80000000 >> (N & 0x1f)) of + * 32 bits int at N >> 5. + */ +V_FUNCTION_BEGIN(__kernel_get_syscall_map) + .cfi_startproc + mflr r12 + .cfi_register lr,r12 + mr r4,r3 + bl __get_datapage@local + mtlr r12 + addi r3,r3,CFG_SYSCALL_MAP32 + cmpli cr0,r4,0 + beqlr + li r0,__NR_syscalls + stw r0,0(r4) + crclr cr0*4+so + blr + .cfi_endproc +V_FUNCTION_END(__kernel_get_syscall_map) + +/* + * void unsigned long long __kernel_get_tbfreq(void); + * + * returns the timebase frequency in HZ + */ +V_FUNCTION_BEGIN(__kernel_get_tbfreq) + .cfi_startproc + mflr r12 + .cfi_register lr,r12 + bl __get_datapage@local + lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3) + lwz r3,CFG_TB_TICKS_PER_SEC(r3) + mtlr r12 + crclr cr0*4+so + blr + .cfi_endproc +V_FUNCTION_END(__kernel_get_tbfreq) diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S new file mode 100644 index 00000000..4ee09ee2 --- /dev/null +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -0,0 +1,266 @@ +/* + * Userland implementation of gettimeofday() for 32 bits processes in a + * ppc64 kernel for use in the vDSO + * + * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org, + * IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <asm/processor.h> +#include <asm/ppc_asm.h> +#include <asm/vdso.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + +/* Offset for the low 32-bit part of a field of long type */ +#ifdef CONFIG_PPC64 +#define LOPART 4 +#define TSPEC_TV_SEC TSPC64_TV_SEC+LOPART +#else +#define LOPART 0 +#define TSPEC_TV_SEC TSPC32_TV_SEC +#endif + + .text +/* + * Exact prototype of gettimeofday + * + * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); + * + */ +V_FUNCTION_BEGIN(__kernel_gettimeofday) + .cfi_startproc + mflr r12 + .cfi_register lr,r12 + + mr r10,r3 /* r10 saves tv */ + mr r11,r4 /* r11 saves tz */ + bl __get_datapage@local /* get data page */ + mr r9, r3 /* datapage ptr in r9 */ + cmplwi r10,0 /* check if tv is NULL */ + beq 3f + lis r7,1000000@ha /* load up USEC_PER_SEC */ + addi r7,r7,1000000@l /* so we get microseconds in r4 */ + bl __do_get_tspec@local /* get sec/usec from tb & kernel */ + stw r3,TVAL32_TV_SEC(r10) + stw r4,TVAL32_TV_USEC(r10) + +3: cmplwi r11,0 /* check if tz is NULL */ + beq 1f + lwz r4,CFG_TZ_MINUTEWEST(r9)/* fill tz */ + lwz r5,CFG_TZ_DSTTIME(r9) + stw r4,TZONE_TZ_MINWEST(r11) + stw r5,TZONE_TZ_DSTTIME(r11) + +1: mtlr r12 + crclr cr0*4+so + li r3,0 + blr + .cfi_endproc +V_FUNCTION_END(__kernel_gettimeofday) + +/* + * Exact prototype of clock_gettime() + * + * int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); + * + */ +V_FUNCTION_BEGIN(__kernel_clock_gettime) + .cfi_startproc + /* Check for supported clock IDs */ + cmpli cr0,r3,CLOCK_REALTIME + cmpli cr1,r3,CLOCK_MONOTONIC + cror cr0*4+eq,cr0*4+eq,cr1*4+eq + bne cr0,99f + + mflr r12 /* r12 saves lr */ + .cfi_register lr,r12 + mr r11,r4 /* r11 saves tp */ + bl __get_datapage@local /* get data page */ + mr r9,r3 /* datapage ptr in r9 */ + lis r7,NSEC_PER_SEC@h /* want nanoseconds */ + ori r7,r7,NSEC_PER_SEC@l +50: bl __do_get_tspec@local /* get sec/nsec from tb & kernel */ + bne cr1,80f /* not monotonic -> all done */ + + /* + * CLOCK_MONOTONIC + */ + + /* now we must fixup using wall to monotonic. We need to snapshot + * that value and do the counter trick again. Fortunately, we still + * have the counter value in r8 that was returned by __do_get_xsec. + * At this point, r3,r4 contain our sec/nsec values, r5 and r6 + * can be used, r7 contains NSEC_PER_SEC. + */ + + lwz r5,WTOM_CLOCK_SEC(r9) + lwz r6,WTOM_CLOCK_NSEC(r9) + + /* We now have our offset in r5,r6. We create a fake dependency + * on that value and re-check the counter + */ + or r0,r6,r5 + xor r0,r0,r0 + add r9,r9,r0 + lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9) + cmpl cr0,r8,r0 /* check if updated */ + bne- 50b + + /* Calculate and store result. Note that this mimics the C code, + * which may cause funny results if nsec goes negative... is that + * possible at all ? + */ + add r3,r3,r5 + add r4,r4,r6 + cmpw cr0,r4,r7 + cmpwi cr1,r4,0 + blt 1f + subf r4,r7,r4 + addi r3,r3,1 +1: bge cr1,80f + addi r3,r3,-1 + add r4,r4,r7 + +80: stw r3,TSPC32_TV_SEC(r11) + stw r4,TSPC32_TV_NSEC(r11) + + mtlr r12 + crclr cr0*4+so + li r3,0 + blr + + /* + * syscall fallback + */ +99: + li r0,__NR_clock_gettime + sc + blr + .cfi_endproc +V_FUNCTION_END(__kernel_clock_gettime) + + +/* + * Exact prototype of clock_getres() + * + * int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); + * + */ +V_FUNCTION_BEGIN(__kernel_clock_getres) + .cfi_startproc + /* Check for supported clock IDs */ + cmpwi cr0,r3,CLOCK_REALTIME + cmpwi cr1,r3,CLOCK_MONOTONIC + cror cr0*4+eq,cr0*4+eq,cr1*4+eq + bne cr0,99f + + li r3,0 + cmpli cr0,r4,0 + crclr cr0*4+so + beqlr + lis r5,CLOCK_REALTIME_RES@h + ori r5,r5,CLOCK_REALTIME_RES@l + stw r3,TSPC32_TV_SEC(r4) + stw r5,TSPC32_TV_NSEC(r4) + blr + + /* + * syscall fallback + */ +99: + li r0,__NR_clock_getres + sc + blr + .cfi_endproc +V_FUNCTION_END(__kernel_clock_getres) + + +/* + * This is the core of clock_gettime() and gettimeofday(), + * it returns the current time in r3 (seconds) and r4. + * On entry, r7 gives the resolution of r4, either USEC_PER_SEC + * or NSEC_PER_SEC, giving r4 in microseconds or nanoseconds. + * It expects the datapage ptr in r9 and doesn't clobber it. + * It clobbers r0, r5 and r6. + * On return, r8 contains the counter value that can be reused. + * This clobbers cr0 but not any other cr field. + */ +__do_get_tspec: + .cfi_startproc + /* Check for update count & load values. We use the low + * order 32 bits of the update count + */ +1: lwz r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9) + andi. r0,r8,1 /* pending update ? loop */ + bne- 1b + xor r0,r8,r8 /* create dependency */ + add r9,r9,r0 + + /* Load orig stamp (offset to TB) */ + lwz r5,CFG_TB_ORIG_STAMP(r9) + lwz r6,(CFG_TB_ORIG_STAMP+4)(r9) + + /* Get a stable TB value */ +2: mftbu r3 + mftbl r4 + mftbu r0 + cmplw cr0,r3,r0 + bne- 2b + + /* Subtract tb orig stamp and shift left 12 bits. + */ + subfc r4,r6,r4 + subfe r0,r5,r3 + slwi r0,r0,12 + rlwimi. r0,r4,12,20,31 + slwi r4,r4,12 + + /* + * Load scale factor & do multiplication. + * We only use the high 32 bits of the tb_to_xs value. + * Even with a 1GHz timebase clock, the high 32 bits of + * tb_to_xs will be at least 4 million, so the error from + * ignoring the low 32 bits will be no more than 0.25ppm. + * The error will just make the clock run very very slightly + * slow until the next time the kernel updates the VDSO data, + * at which point the clock will catch up to the kernel's value, + * so there is no long-term error accumulation. + */ + lwz r5,CFG_TB_TO_XS(r9) /* load values */ + mulhwu r4,r4,r5 + li r3,0 + + beq+ 4f /* skip high part computation if 0 */ + mulhwu r3,r0,r5 + mullw r5,r0,r5 + addc r4,r4,r5 + addze r3,r3 +4: + /* At this point, we have seconds since the xtime stamp + * as a 32.32 fixed-point number in r3 and r4. + * Load & add the xtime stamp. + */ + lwz r5,STAMP_XTIME+TSPEC_TV_SEC(r9) + lwz r6,STAMP_SEC_FRAC(r9) + addc r4,r4,r6 + adde r3,r3,r5 + + /* We create a fake dependency on the result in r3/r4 + * and re-check the counter + */ + or r6,r4,r3 + xor r0,r6,r6 + add r9,r9,r0 + lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9) + cmplw cr0,r8,r0 /* check if updated */ + bne- 1b + + mulhwu r4,r4,r7 /* convert to micro or nanoseconds */ + + blr + .cfi_endproc diff --git a/arch/powerpc/kernel/vdso32/note.S b/arch/powerpc/kernel/vdso32/note.S new file mode 100644 index 00000000..d4b5be4f --- /dev/null +++ b/arch/powerpc/kernel/vdso32/note.S @@ -0,0 +1,25 @@ +/* + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include <linux/uts.h> +#include <linux/version.h> + +#define ASM_ELF_NOTE_BEGIN(name, flags, vendor, type) \ + .section name, flags; \ + .balign 4; \ + .long 1f - 0f; /* name length */ \ + .long 3f - 2f; /* data length */ \ + .long type; /* note type */ \ +0: .asciz vendor; /* vendor name */ \ +1: .balign 4; \ +2: + +#define ASM_ELF_NOTE_END \ +3: .balign 4; /* pad out section */ \ + .previous + + ASM_ELF_NOTE_BEGIN(".note.kernel-version", "a", UTS_SYSNAME, 0) + .long LINUX_VERSION_CODE + ASM_ELF_NOTE_END diff --git a/arch/powerpc/kernel/vdso32/sigtramp.S b/arch/powerpc/kernel/vdso32/sigtramp.S new file mode 100644 index 00000000..cf0c9c9c --- /dev/null +++ b/arch/powerpc/kernel/vdso32/sigtramp.S @@ -0,0 +1,299 @@ +/* + * Signal trampolines for 32 bits processes in a ppc64 kernel for + * use in the vDSO + * + * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp. + * Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <asm/processor.h> +#include <asm/ppc_asm.h> +#include <asm/unistd.h> +#include <asm/vdso.h> + + .text + +/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from + the return address to get an address in the middle of the presumed + call instruction. Since we don't have a call here, we artificially + extend the range covered by the unwind info by adding a nop before + the real start. */ + nop +V_FUNCTION_BEGIN(__kernel_sigtramp32) +.Lsig_start = . - 4 + li r0,__NR_sigreturn + sc +.Lsig_end: +V_FUNCTION_END(__kernel_sigtramp32) + +.Lsigrt_start: + nop +V_FUNCTION_BEGIN(__kernel_sigtramp_rt32) + li r0,__NR_rt_sigreturn + sc +.Lsigrt_end: +V_FUNCTION_END(__kernel_sigtramp_rt32) + + .section .eh_frame,"a",@progbits + +/* Register r1 can be found at offset 4 of a pt_regs structure. + A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */ +#define cfa_save \ + .byte 0x0f; /* DW_CFA_def_cfa_expression */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \ + .byte 0x06; /* DW_OP_deref */ \ + .byte 0x23; .uleb128 RSIZE; /* DW_OP_plus_uconst */ \ + .byte 0x06; /* DW_OP_deref */ \ +9: + +/* Register REGNO can be found at offset OFS of a pt_regs structure. + A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */ +#define rsave(regno, ofs) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno; /* regno */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \ + .byte 0x06; /* DW_OP_deref */ \ + .ifne ofs; \ + .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \ + .endif; \ +9: + +/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16 + of the VMX reg struct. The VMX reg struct is at offset VREGS of + the pt_regs struct. This macro is for REGNO == 0, and contains + 'subroutines' that the other macros jump to. */ +#define vsave_msr0(regno) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno + 77; /* regno */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x30 + regno; /* DW_OP_lit0 */ \ +2: \ + .byte 0x40; /* DW_OP_lit16 */ \ + .byte 0x1e; /* DW_OP_mul */ \ +3: \ + .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \ + .byte 0x06; /* DW_OP_deref */ \ + .byte 0x12; /* DW_OP_dup */ \ + .byte 0x23; /* DW_OP_plus_uconst */ \ + .uleb128 33*RSIZE; /* msr offset */ \ + .byte 0x06; /* DW_OP_deref */ \ + .byte 0x0c; .long 1 << 25; /* DW_OP_const4u */ \ + .byte 0x1a; /* DW_OP_and */ \ + .byte 0x12; /* DW_OP_dup, ret 0 if bra taken */ \ + .byte 0x30; /* DW_OP_lit0 */ \ + .byte 0x29; /* DW_OP_eq */ \ + .byte 0x28; .short 0x7fff; /* DW_OP_bra to end */ \ + .byte 0x13; /* DW_OP_drop, pop the 0 */ \ + .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \ + .byte 0x22; /* DW_OP_plus */ \ + .byte 0x2f; .short 0x7fff; /* DW_OP_skip to end */ \ +9: + +/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16 + of the VMX reg struct. REGNO is 1 thru 31. */ +#define vsave_msr1(regno) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno + 77; /* regno */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x30 + regno; /* DW_OP_lit n */ \ + .byte 0x2f; .short 2b - 9f; /* DW_OP_skip */ \ +9: + +/* If msr bit 1<<25 is set, then VMX register REGNO is at offset OFS of + the VMX save block. */ +#define vsave_msr2(regno, ofs) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno + 77; /* regno */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x0a; .short ofs; /* DW_OP_const2u */ \ + .byte 0x2f; .short 3b - 9f; /* DW_OP_skip */ \ +9: + +/* VMX register REGNO is at offset OFS of the VMX save area. */ +#define vsave(regno, ofs) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno + 77; /* regno */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \ + .byte 0x06; /* DW_OP_deref */ \ + .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \ + .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \ +9: + +/* This is where the pt_regs pointer can be found on the stack. */ +#define PTREGS 64+28 + +/* Size of regs. */ +#define RSIZE 4 + +/* This is the offset of the VMX regs. */ +#define VREGS 48*RSIZE+34*8 + +/* Describe where general purpose regs are saved. */ +#define EH_FRAME_GEN \ + cfa_save; \ + rsave ( 0, 0*RSIZE); \ + rsave ( 2, 2*RSIZE); \ + rsave ( 3, 3*RSIZE); \ + rsave ( 4, 4*RSIZE); \ + rsave ( 5, 5*RSIZE); \ + rsave ( 6, 6*RSIZE); \ + rsave ( 7, 7*RSIZE); \ + rsave ( 8, 8*RSIZE); \ + rsave ( 9, 9*RSIZE); \ + rsave (10, 10*RSIZE); \ + rsave (11, 11*RSIZE); \ + rsave (12, 12*RSIZE); \ + rsave (13, 13*RSIZE); \ + rsave (14, 14*RSIZE); \ + rsave (15, 15*RSIZE); \ + rsave (16, 16*RSIZE); \ + rsave (17, 17*RSIZE); \ + rsave (18, 18*RSIZE); \ + rsave (19, 19*RSIZE); \ + rsave (20, 20*RSIZE); \ + rsave (21, 21*RSIZE); \ + rsave (22, 22*RSIZE); \ + rsave (23, 23*RSIZE); \ + rsave (24, 24*RSIZE); \ + rsave (25, 25*RSIZE); \ + rsave (26, 26*RSIZE); \ + rsave (27, 27*RSIZE); \ + rsave (28, 28*RSIZE); \ + rsave (29, 29*RSIZE); \ + rsave (30, 30*RSIZE); \ + rsave (31, 31*RSIZE); \ + rsave (67, 32*RSIZE); /* ap, used as temp for nip */ \ + rsave (65, 36*RSIZE); /* lr */ \ + rsave (70, 38*RSIZE) /* cr */ + +/* Describe where the FP regs are saved. */ +#define EH_FRAME_FP \ + rsave (32, 48*RSIZE + 0*8); \ + rsave (33, 48*RSIZE + 1*8); \ + rsave (34, 48*RSIZE + 2*8); \ + rsave (35, 48*RSIZE + 3*8); \ + rsave (36, 48*RSIZE + 4*8); \ + rsave (37, 48*RSIZE + 5*8); \ + rsave (38, 48*RSIZE + 6*8); \ + rsave (39, 48*RSIZE + 7*8); \ + rsave (40, 48*RSIZE + 8*8); \ + rsave (41, 48*RSIZE + 9*8); \ + rsave (42, 48*RSIZE + 10*8); \ + rsave (43, 48*RSIZE + 11*8); \ + rsave (44, 48*RSIZE + 12*8); \ + rsave (45, 48*RSIZE + 13*8); \ + rsave (46, 48*RSIZE + 14*8); \ + rsave (47, 48*RSIZE + 15*8); \ + rsave (48, 48*RSIZE + 16*8); \ + rsave (49, 48*RSIZE + 17*8); \ + rsave (50, 48*RSIZE + 18*8); \ + rsave (51, 48*RSIZE + 19*8); \ + rsave (52, 48*RSIZE + 20*8); \ + rsave (53, 48*RSIZE + 21*8); \ + rsave (54, 48*RSIZE + 22*8); \ + rsave (55, 48*RSIZE + 23*8); \ + rsave (56, 48*RSIZE + 24*8); \ + rsave (57, 48*RSIZE + 25*8); \ + rsave (58, 48*RSIZE + 26*8); \ + rsave (59, 48*RSIZE + 27*8); \ + rsave (60, 48*RSIZE + 28*8); \ + rsave (61, 48*RSIZE + 29*8); \ + rsave (62, 48*RSIZE + 30*8); \ + rsave (63, 48*RSIZE + 31*8) + +/* Describe where the VMX regs are saved. */ +#ifdef CONFIG_ALTIVEC +#define EH_FRAME_VMX \ + vsave_msr0 ( 0); \ + vsave_msr1 ( 1); \ + vsave_msr1 ( 2); \ + vsave_msr1 ( 3); \ + vsave_msr1 ( 4); \ + vsave_msr1 ( 5); \ + vsave_msr1 ( 6); \ + vsave_msr1 ( 7); \ + vsave_msr1 ( 8); \ + vsave_msr1 ( 9); \ + vsave_msr1 (10); \ + vsave_msr1 (11); \ + vsave_msr1 (12); \ + vsave_msr1 (13); \ + vsave_msr1 (14); \ + vsave_msr1 (15); \ + vsave_msr1 (16); \ + vsave_msr1 (17); \ + vsave_msr1 (18); \ + vsave_msr1 (19); \ + vsave_msr1 (20); \ + vsave_msr1 (21); \ + vsave_msr1 (22); \ + vsave_msr1 (23); \ + vsave_msr1 (24); \ + vsave_msr1 (25); \ + vsave_msr1 (26); \ + vsave_msr1 (27); \ + vsave_msr1 (28); \ + vsave_msr1 (29); \ + vsave_msr1 (30); \ + vsave_msr1 (31); \ + vsave_msr2 (33, 32*16+12); \ + vsave (32, 32*16) +#else +#define EH_FRAME_VMX +#endif + +.Lcie: + .long .Lcie_end - .Lcie_start +.Lcie_start: + .long 0 /* CIE ID */ + .byte 1 /* Version number */ + .string "zRS" /* NUL-terminated augmentation string */ + .uleb128 4 /* Code alignment factor */ + .sleb128 -4 /* Data alignment factor */ + .byte 67 /* Return address register column, ap */ + .uleb128 1 /* Augmentation value length */ + .byte 0x1b /* DW_EH_PE_pcrel | DW_EH_PE_sdata4. */ + .byte 0x0c,1,0 /* DW_CFA_def_cfa: r1 ofs 0 */ + .balign 4 +.Lcie_end: + + .long .Lfde0_end - .Lfde0_start +.Lfde0_start: + .long .Lfde0_start - .Lcie /* CIE pointer. */ + .long .Lsig_start - . /* PC start, length */ + .long .Lsig_end - .Lsig_start + .uleb128 0 /* Augmentation */ + EH_FRAME_GEN + EH_FRAME_FP + EH_FRAME_VMX + .balign 4 +.Lfde0_end: + +/* We have a different stack layout for rt_sigreturn. */ +#undef PTREGS +#define PTREGS 64+16+128+20+28 + + .long .Lfde1_end - .Lfde1_start +.Lfde1_start: + .long .Lfde1_start - .Lcie /* CIE pointer. */ + .long .Lsigrt_start - . /* PC start, length */ + .long .Lsigrt_end - .Lsigrt_start + .uleb128 0 /* Augmentation */ + EH_FRAME_GEN + EH_FRAME_FP + EH_FRAME_VMX + .balign 4 +.Lfde1_end: diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S new file mode 100644 index 00000000..0546bcd4 --- /dev/null +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -0,0 +1,153 @@ +/* + * This is the infamous ld script for the 32 bits vdso + * library + */ +#include <asm/vdso.h> + +OUTPUT_FORMAT("elf32-powerpc", "elf32-powerpc", "elf32-powerpc") +OUTPUT_ARCH(powerpc:common) +ENTRY(_start) + +SECTIONS +{ + . = VDSO32_LBASE + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + . = ALIGN(16); + .text : { + *(.text .stub .text.* .gnu.linkonce.t.* __ftr_alt_*) + } :text + PROVIDE(__etext = .); + PROVIDE(_etext = .); + PROVIDE(etext = .); + + . = ALIGN(8); + __ftr_fixup : { *(__ftr_fixup) } + + . = ALIGN(8); + __mmu_ftr_fixup : { *(__mmu_ftr_fixup) } + + . = ALIGN(8); + __lwsync_fixup : { *(__lwsync_fixup) } + +#ifdef CONFIG_PPC64 + . = ALIGN(8); + __fw_ftr_fixup : { *(__fw_ftr_fixup) } +#endif + + /* + * Other stuff is appended to the text segment: + */ + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata1 : { *(.rodata1) } + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + .gcc_except_table : { *(.gcc_except_table) } + .fixup : { *(.fixup) } + + .dynamic : { *(.dynamic) } :text :dynamic + .got : { *(.got) } :text + .plt : { *(.plt) } + + _end = .; + __end = .; + PROVIDE(end = .); + + /* + * Stabs debugging sections are here too. + */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + + /* + * DWARF debug sections. + * Symbols in the DWARF debugging sections are relative to the beginning + * of the section so we begin them at 0. + */ + /* DWARF 1 */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2 */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2 */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + + /DISCARD/ : { + *(.note.GNU-stack) + *(.data .data.* .gnu.linkonce.d.* .sdata*) + *(.bss .sbss .dynbss .dynsbss) + } +} + +/* + * Very old versions of ld do not recognize this name token; use the constant. + */ +#define PT_GNU_EH_FRAME 0x6474e550 + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + VDSO_VERSION_STRING { + global: + /* + * Has to be there for the kernel to find + */ + __kernel_datapage_offset; + + __kernel_get_syscall_map; + __kernel_gettimeofday; + __kernel_clock_gettime; + __kernel_clock_getres; + __kernel_get_tbfreq; + __kernel_sync_dicache; + __kernel_sync_dicache_p5; + __kernel_sigtramp32; + __kernel_sigtramp_rt32; + + local: *; + }; +} diff --git a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S new file mode 100644 index 00000000..6e8f507e --- /dev/null +++ b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S @@ -0,0 +1,14 @@ +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/page.h> + + __PAGE_ALIGNED_DATA + + .globl vdso32_start, vdso32_end + .balign PAGE_SIZE +vdso32_start: + .incbin "arch/powerpc/kernel/vdso32/vdso32.so" + .balign PAGE_SIZE +vdso32_end: + + .previous diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile new file mode 100644 index 00000000..8c500d86 --- /dev/null +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -0,0 +1,51 @@ +# List of files in the vdso, has to be asm only for now + +obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o + +# Build rules + +targets := $(obj-vdso64) vdso64.so vdso64.so.dbg +obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) + +GCOV_PROFILE := n + +ccflags-y := -shared -fno-common -fno-builtin +ccflags-y += -nostdlib -Wl,-soname=linux-vdso64.so.1 \ + $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +asflags-y := -D__VDSO64__ -s + +obj-y += vdso64_wrapper.o +extra-y += vdso64.lds +CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) + +# Force dependency (incbin is bad) +$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so + +# link rule for the .so file, .lds has to be first +$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) + $(call if_changed,vdso64ld) + +# strip rule for the .so file +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +# assembly rules for the .S files +$(obj-vdso64): %.o: %.S + $(call if_changed_dep,vdso64as) + +# actual build commands +quiet_cmd_vdso64ld = VDSO64L $@ + cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@ +quiet_cmd_vdso64as = VDSO64A $@ + cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< + +# install commands for the unstripped file +quiet_cmd_vdso_install = INSTALL $@ + cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ + +vdso64.so: $(obj)/vdso64.so.dbg + @mkdir -p $(MODLIB)/vdso + $(call cmd,vdso_install) + +vdso_install: vdso64.so diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S new file mode 100644 index 00000000..69c5af2b --- /dev/null +++ b/arch/powerpc/kernel/vdso64/cacheflush.S @@ -0,0 +1,84 @@ +/* + * vDSO provided cache flush routines + * + * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), + * IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <asm/processor.h> +#include <asm/ppc_asm.h> +#include <asm/vdso.h> +#include <asm/asm-offsets.h> + + .text + +/* + * Default "generic" version of __kernel_sync_dicache. + * + * void __kernel_sync_dicache(unsigned long start, unsigned long end) + * + * Flushes the data cache & invalidate the instruction cache for the + * provided range [start, end[ + */ +V_FUNCTION_BEGIN(__kernel_sync_dicache) + .cfi_startproc + mflr r12 + .cfi_register lr,r12 + mr r11,r3 + bl V_LOCAL_FUNC(__get_datapage) + mtlr r12 + mr r10,r3 + + lwz r7,CFG_DCACHE_BLOCKSZ(r10) + addi r5,r7,-1 + andc r6,r11,r5 /* round low to line bdy */ + subf r8,r6,r4 /* compute length */ + add r8,r8,r5 /* ensure we get enough */ + lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10) + srw. r8,r8,r9 /* compute line count */ + crclr cr0*4+so + beqlr /* nothing to do? */ + mtctr r8 +1: dcbst 0,r6 + add r6,r6,r7 + bdnz 1b + sync + +/* Now invalidate the instruction cache */ + + lwz r7,CFG_ICACHE_BLOCKSZ(r10) + addi r5,r7,-1 + andc r6,r11,r5 /* round low to line bdy */ + subf r8,r6,r4 /* compute length */ + add r8,r8,r5 + lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10) + srw. r8,r8,r9 /* compute line count */ + crclr cr0*4+so + beqlr /* nothing to do? */ + mtctr r8 +2: icbi 0,r6 + add r6,r6,r7 + bdnz 2b + isync + li r3,0 + blr + .cfi_endproc +V_FUNCTION_END(__kernel_sync_dicache) + + +/* + * POWER5 version of __kernel_sync_dicache + */ +V_FUNCTION_BEGIN(__kernel_sync_dicache_p5) + .cfi_startproc + crclr cr0*4+so + sync + isync + li r3,0 + blr + .cfi_endproc +V_FUNCTION_END(__kernel_sync_dicache_p5) diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S new file mode 100644 index 00000000..79796de1 --- /dev/null +++ b/arch/powerpc/kernel/vdso64/datapage.S @@ -0,0 +1,85 @@ +/* + * Access to the shared data page by the vDSO & syscall map + * + * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <asm/processor.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> +#include <asm/vdso.h> + + .text +V_FUNCTION_BEGIN(__get_datapage) + .cfi_startproc + /* We don't want that exposed or overridable as we want other objects + * to be able to bl directly to here + */ + .protected __get_datapage + .hidden __get_datapage + + mflr r0 + .cfi_register lr,r0 + + bcl 20,31,1f + .global __kernel_datapage_offset; +__kernel_datapage_offset: + .long 0 +1: + mflr r3 + mtlr r0 + lwz r0,0(r3) + add r3,r0,r3 + blr + .cfi_endproc +V_FUNCTION_END(__get_datapage) + +/* + * void *__kernel_get_syscall_map(unsigned int *syscall_count) ; + * + * returns a pointer to the syscall map. the map is agnostic to the + * size of "long", unlike kernel bitops, it stores bits from top to + * bottom so that memory actually contains a linear bitmap + * check for syscall N by testing bit (0x80000000 >> (N & 0x1f)) of + * 32 bits int at N >> 5. + */ +V_FUNCTION_BEGIN(__kernel_get_syscall_map) + .cfi_startproc + mflr r12 + .cfi_register lr,r12 + mr r4,r3 + bl V_LOCAL_FUNC(__get_datapage) + mtlr r12 + addi r3,r3,CFG_SYSCALL_MAP64 + cmpli cr0,r4,0 + crclr cr0*4+so + beqlr + li r0,__NR_syscalls + stw r0,0(r4) + blr + .cfi_endproc +V_FUNCTION_END(__kernel_get_syscall_map) + + +/* + * void unsigned long __kernel_get_tbfreq(void); + * + * returns the timebase frequency in HZ + */ +V_FUNCTION_BEGIN(__kernel_get_tbfreq) + .cfi_startproc + mflr r12 + .cfi_register lr,r12 + bl V_LOCAL_FUNC(__get_datapage) + ld r3,CFG_TB_TICKS_PER_SEC(r3) + mtlr r12 + crclr cr0*4+so + blr + .cfi_endproc +V_FUNCTION_END(__kernel_get_tbfreq) diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S new file mode 100644 index 00000000..e97a9a0d --- /dev/null +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S @@ -0,0 +1,218 @@ +/* + * Userland implementation of gettimeofday() for 64 bits processes in a + * ppc64 kernel for use in the vDSO + * + * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), + * IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <asm/processor.h> +#include <asm/ppc_asm.h> +#include <asm/vdso.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + + .text +/* + * Exact prototype of gettimeofday + * + * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); + * + */ +V_FUNCTION_BEGIN(__kernel_gettimeofday) + .cfi_startproc + mflr r12 + .cfi_register lr,r12 + + mr r11,r3 /* r11 holds tv */ + mr r10,r4 /* r10 holds tz */ + bl V_LOCAL_FUNC(__get_datapage) /* get data page */ + cmpldi r11,0 /* check if tv is NULL */ + beq 2f + lis r7,1000000@ha /* load up USEC_PER_SEC */ + addi r7,r7,1000000@l + bl V_LOCAL_FUNC(__do_get_tspec) /* get sec/us from tb & kernel */ + std r4,TVAL64_TV_SEC(r11) /* store sec in tv */ + std r5,TVAL64_TV_USEC(r11) /* store usec in tv */ +2: cmpldi r10,0 /* check if tz is NULL */ + beq 1f + lwz r4,CFG_TZ_MINUTEWEST(r3)/* fill tz */ + lwz r5,CFG_TZ_DSTTIME(r3) + stw r4,TZONE_TZ_MINWEST(r10) + stw r5,TZONE_TZ_DSTTIME(r10) +1: mtlr r12 + crclr cr0*4+so + li r3,0 /* always success */ + blr + .cfi_endproc +V_FUNCTION_END(__kernel_gettimeofday) + + +/* + * Exact prototype of clock_gettime() + * + * int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); + * + */ +V_FUNCTION_BEGIN(__kernel_clock_gettime) + .cfi_startproc + /* Check for supported clock IDs */ + cmpwi cr0,r3,CLOCK_REALTIME + cmpwi cr1,r3,CLOCK_MONOTONIC + cror cr0*4+eq,cr0*4+eq,cr1*4+eq + bne cr0,99f + + mflr r12 /* r12 saves lr */ + .cfi_register lr,r12 + mr r11,r4 /* r11 saves tp */ + bl V_LOCAL_FUNC(__get_datapage) /* get data page */ + lis r7,NSEC_PER_SEC@h /* want nanoseconds */ + ori r7,r7,NSEC_PER_SEC@l +50: bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & kernel */ + bne cr1,80f /* if not monotonic, all done */ + + /* + * CLOCK_MONOTONIC + */ + + /* now we must fixup using wall to monotonic. We need to snapshot + * that value and do the counter trick again. Fortunately, we still + * have the counter value in r8 that was returned by __do_get_tspec. + * At this point, r4,r5 contain our sec/nsec values. + */ + + lwa r6,WTOM_CLOCK_SEC(r3) + lwa r9,WTOM_CLOCK_NSEC(r3) + + /* We now have our result in r6,r9. We create a fake dependency + * on that result and re-check the counter + */ + or r0,r6,r9 + xor r0,r0,r0 + add r3,r3,r0 + ld r0,CFG_TB_UPDATE_COUNT(r3) + cmpld cr0,r0,r8 /* check if updated */ + bne- 50b + + /* Add wall->monotonic offset and check for overflow or underflow. + */ + add r4,r4,r6 + add r5,r5,r9 + cmpd cr0,r5,r7 + cmpdi cr1,r5,0 + blt 1f + subf r5,r7,r5 + addi r4,r4,1 +1: bge cr1,80f + addi r4,r4,-1 + add r5,r5,r7 + +80: std r4,TSPC64_TV_SEC(r11) + std r5,TSPC64_TV_NSEC(r11) + + mtlr r12 + crclr cr0*4+so + li r3,0 + blr + + /* + * syscall fallback + */ +99: + li r0,__NR_clock_gettime + sc + blr + .cfi_endproc +V_FUNCTION_END(__kernel_clock_gettime) + + +/* + * Exact prototype of clock_getres() + * + * int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); + * + */ +V_FUNCTION_BEGIN(__kernel_clock_getres) + .cfi_startproc + /* Check for supported clock IDs */ + cmpwi cr0,r3,CLOCK_REALTIME + cmpwi cr1,r3,CLOCK_MONOTONIC + cror cr0*4+eq,cr0*4+eq,cr1*4+eq + bne cr0,99f + + li r3,0 + cmpli cr0,r4,0 + crclr cr0*4+so + beqlr + lis r5,CLOCK_REALTIME_RES@h + ori r5,r5,CLOCK_REALTIME_RES@l + std r3,TSPC64_TV_SEC(r4) + std r5,TSPC64_TV_NSEC(r4) + blr + + /* + * syscall fallback + */ +99: + li r0,__NR_clock_getres + sc + blr + .cfi_endproc +V_FUNCTION_END(__kernel_clock_getres) + + +/* + * This is the core of clock_gettime() and gettimeofday(), + * it returns the current time in r4 (seconds) and r5. + * On entry, r7 gives the resolution of r5, either USEC_PER_SEC + * or NSEC_PER_SEC, giving r5 in microseconds or nanoseconds. + * It expects the datapage ptr in r3 and doesn't clobber it. + * It clobbers r0, r6 and r9. + * On return, r8 contains the counter value that can be reused. + * This clobbers cr0 but not any other cr field. + */ +V_FUNCTION_BEGIN(__do_get_tspec) + .cfi_startproc + /* check for update count & load values */ +1: ld r8,CFG_TB_UPDATE_COUNT(r3) + andi. r0,r8,1 /* pending update ? loop */ + bne- 1b + xor r0,r8,r8 /* create dependency */ + add r3,r3,r0 + + /* Get TB & offset it. We use the MFTB macro which will generate + * workaround code for Cell. + */ + MFTB(r6) + ld r9,CFG_TB_ORIG_STAMP(r3) + subf r6,r9,r6 + + /* Scale result */ + ld r5,CFG_TB_TO_XS(r3) + sldi r6,r6,12 /* compute time since stamp_xtime */ + mulhdu r6,r6,r5 /* in units of 2^-32 seconds */ + + /* Add stamp since epoch */ + ld r4,STAMP_XTIME+TSPC64_TV_SEC(r3) + lwz r5,STAMP_SEC_FRAC(r3) + or r0,r4,r5 + or r0,r0,r6 + xor r0,r0,r0 + add r3,r3,r0 + ld r0,CFG_TB_UPDATE_COUNT(r3) + cmpld r0,r8 /* check if updated */ + bne- 1b /* reload if so */ + + /* convert to seconds & nanoseconds and add to stamp */ + add r6,r6,r5 /* add on fractional seconds of xtime */ + mulhwu r5,r6,r7 /* compute micro or nanoseconds and */ + srdi r6,r6,32 /* seconds since stamp_xtime */ + clrldi r5,r5,32 + add r4,r4,r6 + blr + .cfi_endproc +V_FUNCTION_END(__do_get_tspec) diff --git a/arch/powerpc/kernel/vdso64/note.S b/arch/powerpc/kernel/vdso64/note.S new file mode 100644 index 00000000..dc2a509f --- /dev/null +++ b/arch/powerpc/kernel/vdso64/note.S @@ -0,0 +1 @@ +#include "../vdso32/note.S" diff --git a/arch/powerpc/kernel/vdso64/sigtramp.S b/arch/powerpc/kernel/vdso64/sigtramp.S new file mode 100644 index 00000000..45ea281e --- /dev/null +++ b/arch/powerpc/kernel/vdso64/sigtramp.S @@ -0,0 +1,297 @@ +/* + * Signal trampoline for 64 bits processes in a ppc64 kernel for + * use in the vDSO + * + * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp. + * Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <asm/processor.h> +#include <asm/ppc_asm.h> +#include <asm/unistd.h> +#include <asm/vdso.h> +#include <asm/ptrace.h> /* XXX for __SIGNAL_FRAMESIZE */ + + .text + +/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from + the return address to get an address in the middle of the presumed + call instruction. Since we don't have a call here, we artificially + extend the range covered by the unwind info by padding before the + real start. */ + nop + .balign 8 +V_FUNCTION_BEGIN(__kernel_sigtramp_rt64) +.Lsigrt_start = . - 4 + addi r1, r1, __SIGNAL_FRAMESIZE + li r0,__NR_rt_sigreturn + sc +.Lsigrt_end: +V_FUNCTION_END(__kernel_sigtramp_rt64) +/* The ".balign 8" above and the following zeros mimic the old stack + trampoline layout. The last magic value is the ucontext pointer, + chosen in such a way that older libgcc unwind code returns a zero + for a sigcontext pointer. */ + .long 0,0,0 + .quad 0,-21*8 + +/* Register r1 can be found at offset 8 of a pt_regs structure. + A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */ +#define cfa_save \ + .byte 0x0f; /* DW_CFA_def_cfa_expression */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \ + .byte 0x06; /* DW_OP_deref */ \ + .byte 0x23; .uleb128 RSIZE; /* DW_OP_plus_uconst */ \ + .byte 0x06; /* DW_OP_deref */ \ +9: + +/* Register REGNO can be found at offset OFS of a pt_regs structure. + A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */ +#define rsave(regno, ofs) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno; /* regno */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \ + .byte 0x06; /* DW_OP_deref */ \ + .ifne ofs; \ + .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \ + .endif; \ +9: + +/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16 + of the VMX reg struct. A pointer to the VMX reg struct is at VREGS in + the pt_regs struct. This macro is for REGNO == 0, and contains + 'subroutines' that the other macros jump to. */ +#define vsave_msr0(regno) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno + 77; /* regno */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x30 + regno; /* DW_OP_lit0 */ \ +2: \ + .byte 0x40; /* DW_OP_lit16 */ \ + .byte 0x1e; /* DW_OP_mul */ \ +3: \ + .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \ + .byte 0x06; /* DW_OP_deref */ \ + .byte 0x12; /* DW_OP_dup */ \ + .byte 0x23; /* DW_OP_plus_uconst */ \ + .uleb128 33*RSIZE; /* msr offset */ \ + .byte 0x06; /* DW_OP_deref */ \ + .byte 0x0c; .long 1 << 25; /* DW_OP_const4u */ \ + .byte 0x1a; /* DW_OP_and */ \ + .byte 0x12; /* DW_OP_dup, ret 0 if bra taken */ \ + .byte 0x30; /* DW_OP_lit0 */ \ + .byte 0x29; /* DW_OP_eq */ \ + .byte 0x28; .short 0x7fff; /* DW_OP_bra to end */ \ + .byte 0x13; /* DW_OP_drop, pop the 0 */ \ + .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \ + .byte 0x06; /* DW_OP_deref */ \ + .byte 0x22; /* DW_OP_plus */ \ + .byte 0x2f; .short 0x7fff; /* DW_OP_skip to end */ \ +9: + +/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16 + of the VMX reg struct. REGNO is 1 thru 31. */ +#define vsave_msr1(regno) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno + 77; /* regno */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x30 + regno; /* DW_OP_lit n */ \ + .byte 0x2f; .short 2b - 9f; /* DW_OP_skip */ \ +9: + +/* If msr bit 1<<25 is set, then VMX register REGNO is at offset OFS of + the VMX save block. */ +#define vsave_msr2(regno, ofs) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno + 77; /* regno */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x0a; .short ofs; /* DW_OP_const2u */ \ + .byte 0x2f; .short 3b - 9f; /* DW_OP_skip */ \ +9: + +/* VMX register REGNO is at offset OFS of the VMX save area. */ +#define vsave(regno, ofs) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno + 77; /* regno */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \ + .byte 0x06; /* DW_OP_deref */ \ + .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \ + .byte 0x06; /* DW_OP_deref */ \ + .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \ +9: + +/* This is where the pt_regs pointer can be found on the stack. */ +#define PTREGS 128+168+56 + +/* Size of regs. */ +#define RSIZE 8 + +/* Size of CR reg in DWARF unwind info. */ +#define CRSIZE 4 + +/* This is the offset of the VMX reg pointer. */ +#define VREGS 48*RSIZE+33*8 + +/* Describe where general purpose regs are saved. */ +#define EH_FRAME_GEN \ + cfa_save; \ + rsave ( 0, 0*RSIZE); \ + rsave ( 2, 2*RSIZE); \ + rsave ( 3, 3*RSIZE); \ + rsave ( 4, 4*RSIZE); \ + rsave ( 5, 5*RSIZE); \ + rsave ( 6, 6*RSIZE); \ + rsave ( 7, 7*RSIZE); \ + rsave ( 8, 8*RSIZE); \ + rsave ( 9, 9*RSIZE); \ + rsave (10, 10*RSIZE); \ + rsave (11, 11*RSIZE); \ + rsave (12, 12*RSIZE); \ + rsave (13, 13*RSIZE); \ + rsave (14, 14*RSIZE); \ + rsave (15, 15*RSIZE); \ + rsave (16, 16*RSIZE); \ + rsave (17, 17*RSIZE); \ + rsave (18, 18*RSIZE); \ + rsave (19, 19*RSIZE); \ + rsave (20, 20*RSIZE); \ + rsave (21, 21*RSIZE); \ + rsave (22, 22*RSIZE); \ + rsave (23, 23*RSIZE); \ + rsave (24, 24*RSIZE); \ + rsave (25, 25*RSIZE); \ + rsave (26, 26*RSIZE); \ + rsave (27, 27*RSIZE); \ + rsave (28, 28*RSIZE); \ + rsave (29, 29*RSIZE); \ + rsave (30, 30*RSIZE); \ + rsave (31, 31*RSIZE); \ + rsave (67, 32*RSIZE); /* ap, used as temp for nip */ \ + rsave (65, 36*RSIZE); /* lr */ \ + rsave (70, 38*RSIZE + (RSIZE - CRSIZE)) /* cr */ + +/* Describe where the FP regs are saved. */ +#define EH_FRAME_FP \ + rsave (32, 48*RSIZE + 0*8); \ + rsave (33, 48*RSIZE + 1*8); \ + rsave (34, 48*RSIZE + 2*8); \ + rsave (35, 48*RSIZE + 3*8); \ + rsave (36, 48*RSIZE + 4*8); \ + rsave (37, 48*RSIZE + 5*8); \ + rsave (38, 48*RSIZE + 6*8); \ + rsave (39, 48*RSIZE + 7*8); \ + rsave (40, 48*RSIZE + 8*8); \ + rsave (41, 48*RSIZE + 9*8); \ + rsave (42, 48*RSIZE + 10*8); \ + rsave (43, 48*RSIZE + 11*8); \ + rsave (44, 48*RSIZE + 12*8); \ + rsave (45, 48*RSIZE + 13*8); \ + rsave (46, 48*RSIZE + 14*8); \ + rsave (47, 48*RSIZE + 15*8); \ + rsave (48, 48*RSIZE + 16*8); \ + rsave (49, 48*RSIZE + 17*8); \ + rsave (50, 48*RSIZE + 18*8); \ + rsave (51, 48*RSIZE + 19*8); \ + rsave (52, 48*RSIZE + 20*8); \ + rsave (53, 48*RSIZE + 21*8); \ + rsave (54, 48*RSIZE + 22*8); \ + rsave (55, 48*RSIZE + 23*8); \ + rsave (56, 48*RSIZE + 24*8); \ + rsave (57, 48*RSIZE + 25*8); \ + rsave (58, 48*RSIZE + 26*8); \ + rsave (59, 48*RSIZE + 27*8); \ + rsave (60, 48*RSIZE + 28*8); \ + rsave (61, 48*RSIZE + 29*8); \ + rsave (62, 48*RSIZE + 30*8); \ + rsave (63, 48*RSIZE + 31*8) + +/* Describe where the VMX regs are saved. */ +#ifdef CONFIG_ALTIVEC +#define EH_FRAME_VMX \ + vsave_msr0 ( 0); \ + vsave_msr1 ( 1); \ + vsave_msr1 ( 2); \ + vsave_msr1 ( 3); \ + vsave_msr1 ( 4); \ + vsave_msr1 ( 5); \ + vsave_msr1 ( 6); \ + vsave_msr1 ( 7); \ + vsave_msr1 ( 8); \ + vsave_msr1 ( 9); \ + vsave_msr1 (10); \ + vsave_msr1 (11); \ + vsave_msr1 (12); \ + vsave_msr1 (13); \ + vsave_msr1 (14); \ + vsave_msr1 (15); \ + vsave_msr1 (16); \ + vsave_msr1 (17); \ + vsave_msr1 (18); \ + vsave_msr1 (19); \ + vsave_msr1 (20); \ + vsave_msr1 (21); \ + vsave_msr1 (22); \ + vsave_msr1 (23); \ + vsave_msr1 (24); \ + vsave_msr1 (25); \ + vsave_msr1 (26); \ + vsave_msr1 (27); \ + vsave_msr1 (28); \ + vsave_msr1 (29); \ + vsave_msr1 (30); \ + vsave_msr1 (31); \ + vsave_msr2 (33, 32*16+12); \ + vsave (32, 33*16) +#else +#define EH_FRAME_VMX +#endif + + .section .eh_frame,"a",@progbits +.Lcie: + .long .Lcie_end - .Lcie_start +.Lcie_start: + .long 0 /* CIE ID */ + .byte 1 /* Version number */ + .string "zRS" /* NUL-terminated augmentation string */ + .uleb128 4 /* Code alignment factor */ + .sleb128 -8 /* Data alignment factor */ + .byte 67 /* Return address register column, ap */ + .uleb128 1 /* Augmentation value length */ + .byte 0x14 /* DW_EH_PE_pcrel | DW_EH_PE_udata8. */ + .byte 0x0c,1,0 /* DW_CFA_def_cfa: r1 ofs 0 */ + .balign 8 +.Lcie_end: + + .long .Lfde0_end - .Lfde0_start +.Lfde0_start: + .long .Lfde0_start - .Lcie /* CIE pointer. */ + .quad .Lsigrt_start - . /* PC start, length */ + .quad .Lsigrt_end - .Lsigrt_start + .uleb128 0 /* Augmentation */ + EH_FRAME_GEN + EH_FRAME_FP + EH_FRAME_VMX +# Do we really need to describe the frame at this point? ie. will +# we ever have some call chain that returns somewhere past the addi? +# I don't think so, since gcc doesn't support async signals. +# .byte 0x41 /* DW_CFA_advance_loc 1*4 */ +#undef PTREGS +#define PTREGS 168+56 +# EH_FRAME_GEN +# EH_FRAME_FP +# EH_FRAME_VMX + .balign 8 +.Lfde0_end: diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S new file mode 100644 index 00000000..0e615404 --- /dev/null +++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -0,0 +1,152 @@ +/* + * This is the infamous ld script for the 64 bits vdso + * library + */ +#include <asm/vdso.h> + +OUTPUT_FORMAT("elf64-powerpc", "elf64-powerpc", "elf64-powerpc") +OUTPUT_ARCH(powerpc:common64) +ENTRY(_start) + +SECTIONS +{ + . = VDSO64_LBASE + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + . = ALIGN(16); + .text : { + *(.text .stub .text.* .gnu.linkonce.t.* __ftr_alt_*) + *(.sfpr .glink) + } :text + PROVIDE(__etext = .); + PROVIDE(_etext = .); + PROVIDE(etext = .); + + . = ALIGN(8); + __ftr_fixup : { *(__ftr_fixup) } + + . = ALIGN(8); + __mmu_ftr_fixup : { *(__mmu_ftr_fixup) } + + . = ALIGN(8); + __lwsync_fixup : { *(__lwsync_fixup) } + + . = ALIGN(8); + __fw_ftr_fixup : { *(__fw_ftr_fixup) } + + /* + * Other stuff is appended to the text segment: + */ + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata1 : { *(.rodata1) } + + .dynamic : { *(.dynamic) } :text :dynamic + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + .gcc_except_table : { *(.gcc_except_table) } + .rela.dyn ALIGN(8) : { *(.rela.dyn) } + + .opd ALIGN(8) : { KEEP (*(.opd)) } + .got ALIGN(8) : { *(.got .toc) } + + _end = .; + PROVIDE(end = .); + + /* + * Stabs debugging sections are here too. + */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + + /* + * DWARF debug sections. + * Symbols in the DWARF debugging sections are relative to the beginning + * of the section so we begin them at 0. + */ + /* DWARF 1 */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2 */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2 */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + + /DISCARD/ : { + *(.note.GNU-stack) + *(.branch_lt) + *(.data .data.* .gnu.linkonce.d.* .sdata*) + *(.bss .sbss .dynbss .dynsbss) + } +} + +/* + * Very old versions of ld do not recognize this name token; use the constant. + */ +#define PT_GNU_EH_FRAME 0x6474e550 + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + VDSO_VERSION_STRING { + global: + /* + * Has to be there for the kernel to find + */ + __kernel_datapage_offset; + + __kernel_get_syscall_map; + __kernel_gettimeofday; + __kernel_clock_gettime; + __kernel_clock_getres; + __kernel_get_tbfreq; + __kernel_sync_dicache; + __kernel_sync_dicache_p5; + __kernel_sigtramp_rt64; + + local: *; + }; +} diff --git a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S new file mode 100644 index 00000000..b8553d62 --- /dev/null +++ b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S @@ -0,0 +1,14 @@ +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/page.h> + + __PAGE_ALIGNED_DATA + + .globl vdso64_start, vdso64_end + .balign PAGE_SIZE +vdso64_start: + .incbin "arch/powerpc/kernel/vdso64/vdso64.so" + .balign PAGE_SIZE +vdso64_end: + + .previous diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c new file mode 100644 index 00000000..604d0947 --- /dev/null +++ b/arch/powerpc/kernel/vecemu.c @@ -0,0 +1,345 @@ +/* + * Routines to emulate some Altivec/VMX instructions, specifically + * those that can trap when given denormalized operands in Java mode. + */ +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <asm/ptrace.h> +#include <asm/processor.h> +#include <asm/uaccess.h> + +/* Functions in vector.S */ +extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b); +extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b); +extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); +extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); +extern void vrefp(vector128 *dst, vector128 *src); +extern void vrsqrtefp(vector128 *dst, vector128 *src); +extern void vexptep(vector128 *dst, vector128 *src); + +static unsigned int exp2s[8] = { + 0x800000, + 0x8b95c2, + 0x9837f0, + 0xa5fed7, + 0xb504f3, + 0xc5672a, + 0xd744fd, + 0xeac0c7 +}; + +/* + * Computes an estimate of 2^x. The `s' argument is the 32-bit + * single-precision floating-point representation of x. + */ +static unsigned int eexp2(unsigned int s) +{ + int exp, pwr; + unsigned int mant, frac; + + /* extract exponent field from input */ + exp = ((s >> 23) & 0xff) - 127; + if (exp > 7) { + /* check for NaN input */ + if (exp == 128 && (s & 0x7fffff) != 0) + return s | 0x400000; /* return QNaN */ + /* 2^-big = 0, 2^+big = +Inf */ + return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */ + } + if (exp < -23) + return 0x3f800000; /* 1.0 */ + + /* convert to fixed point integer in 9.23 representation */ + pwr = (s & 0x7fffff) | 0x800000; + if (exp > 0) + pwr <<= exp; + else + pwr >>= -exp; + if (s & 0x80000000) + pwr = -pwr; + + /* extract integer part, which becomes exponent part of result */ + exp = (pwr >> 23) + 126; + if (exp >= 254) + return 0x7f800000; + if (exp < -23) + return 0; + + /* table lookup on top 3 bits of fraction to get mantissa */ + mant = exp2s[(pwr >> 20) & 7]; + + /* linear interpolation using remaining 20 bits of fraction */ + asm("mulhwu %0,%1,%2" : "=r" (frac) + : "r" (pwr << 12), "r" (0x172b83ff)); + asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant)); + mant += frac; + + if (exp >= 0) + return mant + (exp << 23); + + /* denormalized result */ + exp = -exp; + mant += 1 << (exp - 1); + return mant >> exp; +} + +/* + * Computes an estimate of log_2(x). The `s' argument is the 32-bit + * single-precision floating-point representation of x. + */ +static unsigned int elog2(unsigned int s) +{ + int exp, mant, lz, frac; + + exp = s & 0x7f800000; + mant = s & 0x7fffff; + if (exp == 0x7f800000) { /* Inf or NaN */ + if (mant != 0) + s |= 0x400000; /* turn NaN into QNaN */ + return s; + } + if ((exp | mant) == 0) /* +0 or -0 */ + return 0xff800000; /* return -Inf */ + + if (exp == 0) { + /* denormalized */ + asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant)); + mant <<= lz - 8; + exp = (-118 - lz) << 23; + } else { + mant |= 0x800000; + exp -= 127 << 23; + } + + if (mant >= 0xb504f3) { /* 2^0.5 * 2^23 */ + exp |= 0x400000; /* 0.5 * 2^23 */ + asm("mulhwu %0,%1,%2" : "=r" (mant) + : "r" (mant), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */ + } + if (mant >= 0x9837f0) { /* 2^0.25 * 2^23 */ + exp |= 0x200000; /* 0.25 * 2^23 */ + asm("mulhwu %0,%1,%2" : "=r" (mant) + : "r" (mant), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */ + } + if (mant >= 0x8b95c2) { /* 2^0.125 * 2^23 */ + exp |= 0x100000; /* 0.125 * 2^23 */ + asm("mulhwu %0,%1,%2" : "=r" (mant) + : "r" (mant), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */ + } + if (mant > 0x800000) { /* 1.0 * 2^23 */ + /* calculate (mant - 1) * 1.381097463 */ + /* 1.381097463 == 0.125 / (2^0.125 - 1) */ + asm("mulhwu %0,%1,%2" : "=r" (frac) + : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a)); + exp += frac; + } + s = exp & 0x80000000; + if (exp != 0) { + if (s) + exp = -exp; + asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp)); + lz = 8 - lz; + if (lz > 0) + exp >>= lz; + else if (lz < 0) + exp <<= -lz; + s += ((lz + 126) << 23) + exp; + } + return s; +} + +#define VSCR_SAT 1 + +static int ctsxs(unsigned int x, int scale, unsigned int *vscrp) +{ + int exp, mant; + + exp = (x >> 23) & 0xff; + mant = x & 0x7fffff; + if (exp == 255 && mant != 0) + return 0; /* NaN -> 0 */ + exp = exp - 127 + scale; + if (exp < 0) + return 0; /* round towards zero */ + if (exp >= 31) { + /* saturate, unless the result would be -2^31 */ + if (x + (scale << 23) != 0xcf000000) + *vscrp |= VSCR_SAT; + return (x & 0x80000000)? 0x80000000: 0x7fffffff; + } + mant |= 0x800000; + mant = (mant << 7) >> (30 - exp); + return (x & 0x80000000)? -mant: mant; +} + +static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp) +{ + int exp; + unsigned int mant; + + exp = (x >> 23) & 0xff; + mant = x & 0x7fffff; + if (exp == 255 && mant != 0) + return 0; /* NaN -> 0 */ + exp = exp - 127 + scale; + if (exp < 0) + return 0; /* round towards zero */ + if (x & 0x80000000) { + /* negative => saturate to 0 */ + *vscrp |= VSCR_SAT; + return 0; + } + if (exp >= 32) { + /* saturate */ + *vscrp |= VSCR_SAT; + return 0xffffffff; + } + mant |= 0x800000; + mant = (mant << 8) >> (31 - exp); + return mant; +} + +/* Round to floating integer, towards 0 */ +static unsigned int rfiz(unsigned int x) +{ + int exp; + + exp = ((x >> 23) & 0xff) - 127; + if (exp == 128 && (x & 0x7fffff) != 0) + return x | 0x400000; /* NaN -> make it a QNaN */ + if (exp >= 23) + return x; /* it's an integer already (or Inf) */ + if (exp < 0) + return x & 0x80000000; /* |x| < 1.0 rounds to 0 */ + return x & ~(0x7fffff >> exp); +} + +/* Round to floating integer, towards +/- Inf */ +static unsigned int rfii(unsigned int x) +{ + int exp, mask; + + exp = ((x >> 23) & 0xff) - 127; + if (exp == 128 && (x & 0x7fffff) != 0) + return x | 0x400000; /* NaN -> make it a QNaN */ + if (exp >= 23) + return x; /* it's an integer already (or Inf) */ + if ((x & 0x7fffffff) == 0) + return x; /* +/-0 -> +/-0 */ + if (exp < 0) + /* 0 < |x| < 1.0 rounds to +/- 1.0 */ + return (x & 0x80000000) | 0x3f800000; + mask = 0x7fffff >> exp; + /* mantissa overflows into exponent - that's OK, + it can't overflow into the sign bit */ + return (x + mask) & ~mask; +} + +/* Round to floating integer, to nearest */ +static unsigned int rfin(unsigned int x) +{ + int exp, half; + + exp = ((x >> 23) & 0xff) - 127; + if (exp == 128 && (x & 0x7fffff) != 0) + return x | 0x400000; /* NaN -> make it a QNaN */ + if (exp >= 23) + return x; /* it's an integer already (or Inf) */ + if (exp < -1) + return x & 0x80000000; /* |x| < 0.5 -> +/-0 */ + if (exp == -1) + /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */ + return (x & 0x80000000) | 0x3f800000; + half = 0x400000 >> exp; + /* add 0.5 to the magnitude and chop off the fraction bits */ + return (x + half) & ~(0x7fffff >> exp); +} + +int emulate_altivec(struct pt_regs *regs) +{ + unsigned int instr, i; + unsigned int va, vb, vc, vd; + vector128 *vrs; + + if (get_user(instr, (unsigned int __user *) regs->nip)) + return -EFAULT; + if ((instr >> 26) != 4) + return -EINVAL; /* not an altivec instruction */ + vd = (instr >> 21) & 0x1f; + va = (instr >> 16) & 0x1f; + vb = (instr >> 11) & 0x1f; + vc = (instr >> 6) & 0x1f; + + vrs = current->thread.vr; + switch (instr & 0x3f) { + case 10: + switch (vc) { + case 0: /* vaddfp */ + vaddfp(&vrs[vd], &vrs[va], &vrs[vb]); + break; + case 1: /* vsubfp */ + vsubfp(&vrs[vd], &vrs[va], &vrs[vb]); + break; + case 4: /* vrefp */ + vrefp(&vrs[vd], &vrs[vb]); + break; + case 5: /* vrsqrtefp */ + vrsqrtefp(&vrs[vd], &vrs[vb]); + break; + case 6: /* vexptefp */ + for (i = 0; i < 4; ++i) + vrs[vd].u[i] = eexp2(vrs[vb].u[i]); + break; + case 7: /* vlogefp */ + for (i = 0; i < 4; ++i) + vrs[vd].u[i] = elog2(vrs[vb].u[i]); + break; + case 8: /* vrfin */ + for (i = 0; i < 4; ++i) + vrs[vd].u[i] = rfin(vrs[vb].u[i]); + break; + case 9: /* vrfiz */ + for (i = 0; i < 4; ++i) + vrs[vd].u[i] = rfiz(vrs[vb].u[i]); + break; + case 10: /* vrfip */ + for (i = 0; i < 4; ++i) { + u32 x = vrs[vb].u[i]; + x = (x & 0x80000000)? rfiz(x): rfii(x); + vrs[vd].u[i] = x; + } + break; + case 11: /* vrfim */ + for (i = 0; i < 4; ++i) { + u32 x = vrs[vb].u[i]; + x = (x & 0x80000000)? rfii(x): rfiz(x); + vrs[vd].u[i] = x; + } + break; + case 14: /* vctuxs */ + for (i = 0; i < 4; ++i) + vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va, + ¤t->thread.vscr.u[3]); + break; + case 15: /* vctsxs */ + for (i = 0; i < 4; ++i) + vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va, + ¤t->thread.vscr.u[3]); + break; + default: + return -EINVAL; + } + break; + case 46: /* vmaddfp */ + vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); + break; + case 47: /* vnmsubfp */ + vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); + break; + default: + return -EINVAL; + } + + return 0; +} diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S new file mode 100644 index 00000000..4d5a3edf --- /dev/null +++ b/arch/powerpc/kernel/vector.S @@ -0,0 +1,407 @@ +#include <asm/processor.h> +#include <asm/ppc_asm.h> +#include <asm/reg.h> +#include <asm/asm-offsets.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/page.h> +#include <asm/ptrace.h> + +/* + * load_up_altivec(unused, unused, tsk) + * Disable VMX for the task which had it previously, + * and save its vector registers in its thread_struct. + * Enables the VMX for use in the kernel on return. + * On SMP we know the VMX is free, since we give it up every + * switch (ie, no lazy save of the vector registers). + */ +_GLOBAL(load_up_altivec) + mfmsr r5 /* grab the current MSR */ + oris r5,r5,MSR_VEC@h + MTMSRD(r5) /* enable use of AltiVec now */ + isync + +/* + * For SMP, we don't do lazy VMX switching because it just gets too + * horrendously complex, especially when a task switches from one CPU + * to another. Instead we call giveup_altvec in switch_to. + * VRSAVE isn't dealt with here, that is done in the normal context + * switch code. Note that we could rely on vrsave value to eventually + * avoid saving all of the VREGs here... + */ +#ifndef CONFIG_SMP + LOAD_REG_ADDRBASE(r3, last_task_used_altivec) + toreal(r3) + PPC_LL r4,ADDROFF(last_task_used_altivec)(r3) + PPC_LCMPI 0,r4,0 + beq 1f + + /* Save VMX state to last_task_used_altivec's THREAD struct */ + toreal(r4) + addi r4,r4,THREAD + SAVE_32VRS(0,r5,r4) + mfvscr vr0 + li r10,THREAD_VSCR + stvx vr0,r10,r4 + /* Disable VMX for last_task_used_altivec */ + PPC_LL r5,PT_REGS(r4) + toreal(r5) + PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5) + lis r10,MSR_VEC@h + andc r4,r4,r10 + PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#endif /* CONFIG_SMP */ + + /* Hack: if we get an altivec unavailable trap with VRSAVE + * set to all zeros, we assume this is a broken application + * that fails to set it properly, and thus we switch it to + * all 1's + */ + mfspr r4,SPRN_VRSAVE + cmpwi 0,r4,0 + bne+ 1f + li r4,-1 + mtspr SPRN_VRSAVE,r4 +1: + /* enable use of VMX after return */ +#ifdef CONFIG_PPC32 + mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ + oris r9,r9,MSR_VEC@h +#else + ld r4,PACACURRENT(r13) + addi r5,r4,THREAD /* Get THREAD */ + oris r12,r12,MSR_VEC@h + std r12,_MSR(r1) +#endif + li r4,1 + li r10,THREAD_VSCR + stw r4,THREAD_USED_VR(r5) + lvx vr0,r10,r5 + mtvscr vr0 + REST_32VRS(0,r4,r5) +#ifndef CONFIG_SMP + /* Update last_task_used_altivec to 'current' */ + subi r4,r5,THREAD /* Back to 'current' */ + fromreal(r4) + PPC_STL r4,ADDROFF(last_task_used_altivec)(r3) +#endif /* CONFIG_SMP */ + /* restore registers and return */ + blr + +/* + * giveup_altivec(tsk) + * Disable VMX for the task given as the argument, + * and save the vector registers in its thread_struct. + * Enables the VMX for use in the kernel on return. + */ +_GLOBAL(giveup_altivec) + mfmsr r5 + oris r5,r5,MSR_VEC@h + SYNC + MTMSRD(r5) /* enable use of VMX now */ + isync + PPC_LCMPI 0,r3,0 + beqlr /* if no previous owner, done */ + addi r3,r3,THREAD /* want THREAD of task */ + PPC_LL r5,PT_REGS(r3) + PPC_LCMPI 0,r5,0 + SAVE_32VRS(0,r4,r3) + mfvscr vr0 + li r4,THREAD_VSCR + stvx vr0,r4,r3 + beq 1f + PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5) +#ifdef CONFIG_VSX +BEGIN_FTR_SECTION + lis r3,(MSR_VEC|MSR_VSX)@h +FTR_SECTION_ELSE + lis r3,MSR_VEC@h +ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX) +#else + lis r3,MSR_VEC@h +#endif + andc r4,r4,r3 /* disable FP for previous task */ + PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#ifndef CONFIG_SMP + li r5,0 + LOAD_REG_ADDRBASE(r4,last_task_used_altivec) + PPC_STL r5,ADDROFF(last_task_used_altivec)(r4) +#endif /* CONFIG_SMP */ + blr + +#ifdef CONFIG_VSX + +#ifdef CONFIG_PPC32 +#error This asm code isn't ready for 32-bit kernels +#endif + +/* + * load_up_vsx(unused, unused, tsk) + * Disable VSX for the task which had it previously, + * and save its vector registers in its thread_struct. + * Reuse the fp and vsx saves, but first check to see if they have + * been saved already. + */ +_GLOBAL(load_up_vsx) +/* Load FP and VSX registers if they haven't been done yet */ + andi. r5,r12,MSR_FP + beql+ load_up_fpu /* skip if already loaded */ + andis. r5,r12,MSR_VEC@h + beql+ load_up_altivec /* skip if already loaded */ + +#ifndef CONFIG_SMP + ld r3,last_task_used_vsx@got(r2) + ld r4,0(r3) + cmpdi 0,r4,0 + beq 1f + /* Disable VSX for last_task_used_vsx */ + addi r4,r4,THREAD + ld r5,PT_REGS(r4) + ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) + lis r6,MSR_VSX@h + andc r6,r4,r6 + std r6,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#endif /* CONFIG_SMP */ + ld r4,PACACURRENT(r13) + addi r4,r4,THREAD /* Get THREAD */ + li r6,1 + stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */ + /* enable use of VSX after return */ + oris r12,r12,MSR_VSX@h + std r12,_MSR(r1) +#ifndef CONFIG_SMP + /* Update last_task_used_vsx to 'current' */ + ld r4,PACACURRENT(r13) + std r4,0(r3) +#endif /* CONFIG_SMP */ + b fast_exception_return + +/* + * __giveup_vsx(tsk) + * Disable VSX for the task given as the argument. + * Does NOT save vsx registers. + * Enables the VSX for use in the kernel on return. + */ +_GLOBAL(__giveup_vsx) + mfmsr r5 + oris r5,r5,MSR_VSX@h + mtmsrd r5 /* enable use of VSX now */ + isync + + cmpdi 0,r3,0 + beqlr- /* if no previous owner, done */ + addi r3,r3,THREAD /* want THREAD of task */ + ld r5,PT_REGS(r3) + cmpdi 0,r5,0 + beq 1f + ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) + lis r3,MSR_VSX@h + andc r4,r4,r3 /* disable VSX for previous task */ + std r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#ifndef CONFIG_SMP + li r5,0 + ld r4,last_task_used_vsx@got(r2) + std r5,0(r4) +#endif /* CONFIG_SMP */ + blr + +#endif /* CONFIG_VSX */ + + +/* + * The routines below are in assembler so we can closely control the + * usage of floating-point registers. These routines must be called + * with preempt disabled. + */ +#ifdef CONFIG_PPC32 + .data +fpzero: + .long 0 +fpone: + .long 0x3f800000 /* 1.0 in single-precision FP */ +fphalf: + .long 0x3f000000 /* 0.5 in single-precision FP */ + +#define LDCONST(fr, name) \ + lis r11,name@ha; \ + lfs fr,name@l(r11) +#else + + .section ".toc","aw" +fpzero: + .tc FD_0_0[TC],0 +fpone: + .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */ +fphalf: + .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */ + +#define LDCONST(fr, name) \ + lfd fr,name@toc(r2) +#endif + + .text +/* + * Internal routine to enable floating point and set FPSCR to 0. + * Don't call it from C; it doesn't use the normal calling convention. + */ +fpenable: +#ifdef CONFIG_PPC32 + stwu r1,-64(r1) +#else + stdu r1,-64(r1) +#endif + mfmsr r10 + ori r11,r10,MSR_FP + mtmsr r11 + isync + stfd fr0,24(r1) + stfd fr1,16(r1) + stfd fr31,8(r1) + LDCONST(fr1, fpzero) + mffs fr31 + MTFSF_L(fr1) + blr + +fpdisable: + mtlr r12 + MTFSF_L(fr31) + lfd fr31,8(r1) + lfd fr1,16(r1) + lfd fr0,24(r1) + mtmsr r10 + isync + addi r1,r1,64 + blr + +/* + * Vector add, floating point. + */ +_GLOBAL(vaddfp) + mflr r12 + bl fpenable + li r0,4 + mtctr r0 + li r6,0 +1: lfsx fr0,r4,r6 + lfsx fr1,r5,r6 + fadds fr0,fr0,fr1 + stfsx fr0,r3,r6 + addi r6,r6,4 + bdnz 1b + b fpdisable + +/* + * Vector subtract, floating point. + */ +_GLOBAL(vsubfp) + mflr r12 + bl fpenable + li r0,4 + mtctr r0 + li r6,0 +1: lfsx fr0,r4,r6 + lfsx fr1,r5,r6 + fsubs fr0,fr0,fr1 + stfsx fr0,r3,r6 + addi r6,r6,4 + bdnz 1b + b fpdisable + +/* + * Vector multiply and add, floating point. + */ +_GLOBAL(vmaddfp) + mflr r12 + bl fpenable + stfd fr2,32(r1) + li r0,4 + mtctr r0 + li r7,0 +1: lfsx fr0,r4,r7 + lfsx fr1,r5,r7 + lfsx fr2,r6,r7 + fmadds fr0,fr0,fr2,fr1 + stfsx fr0,r3,r7 + addi r7,r7,4 + bdnz 1b + lfd fr2,32(r1) + b fpdisable + +/* + * Vector negative multiply and subtract, floating point. + */ +_GLOBAL(vnmsubfp) + mflr r12 + bl fpenable + stfd fr2,32(r1) + li r0,4 + mtctr r0 + li r7,0 +1: lfsx fr0,r4,r7 + lfsx fr1,r5,r7 + lfsx fr2,r6,r7 + fnmsubs fr0,fr0,fr2,fr1 + stfsx fr0,r3,r7 + addi r7,r7,4 + bdnz 1b + lfd fr2,32(r1) + b fpdisable + +/* + * Vector reciprocal estimate. We just compute 1.0/x. + * r3 -> destination, r4 -> source. + */ +_GLOBAL(vrefp) + mflr r12 + bl fpenable + li r0,4 + LDCONST(fr1, fpone) + mtctr r0 + li r6,0 +1: lfsx fr0,r4,r6 + fdivs fr0,fr1,fr0 + stfsx fr0,r3,r6 + addi r6,r6,4 + bdnz 1b + b fpdisable + +/* + * Vector reciprocal square-root estimate, floating point. + * We use the frsqrte instruction for the initial estimate followed + * by 2 iterations of Newton-Raphson to get sufficient accuracy. + * r3 -> destination, r4 -> source. + */ +_GLOBAL(vrsqrtefp) + mflr r12 + bl fpenable + stfd fr2,32(r1) + stfd fr3,40(r1) + stfd fr4,48(r1) + stfd fr5,56(r1) + li r0,4 + LDCONST(fr4, fpone) + LDCONST(fr5, fphalf) + mtctr r0 + li r6,0 +1: lfsx fr0,r4,r6 + frsqrte fr1,fr0 /* r = frsqrte(s) */ + fmuls fr3,fr1,fr0 /* r * s */ + fmuls fr2,fr1,fr5 /* r * 0.5 */ + fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ + fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ + fmuls fr3,fr1,fr0 /* r * s */ + fmuls fr2,fr1,fr5 /* r * 0.5 */ + fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ + fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ + stfsx fr1,r3,r6 + addi r6,r6,4 + bdnz 1b + lfd fr5,56(r1) + lfd fr4,48(r1) + lfd fr3,40(r1) + lfd fr2,32(r1) + b fpdisable diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c new file mode 100644 index 00000000..a3a99901 --- /dev/null +++ b/arch/powerpc/kernel/vio.c @@ -0,0 +1,1467 @@ +/* + * IBM PowerPC Virtual I/O Infrastructure Support. + * + * Copyright (c) 2003,2008 IBM Corp. + * Dave Engebretsen engebret@us.ibm.com + * Santiago Leon santil@us.ibm.com + * Hollis Blanchard <hollisb@us.ibm.com> + * Stephen Rothwell + * Robert Jennings <rcjenn@us.ibm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/types.h> +#include <linux/stat.h> +#include <linux/device.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/console.h> +#include <linux/export.h> +#include <linux/mm.h> +#include <linux/dma-mapping.h> +#include <linux/kobject.h> + +#include <asm/iommu.h> +#include <asm/dma.h> +#include <asm/vio.h> +#include <asm/prom.h> +#include <asm/firmware.h> +#include <asm/tce.h> +#include <asm/abs_addr.h> +#include <asm/page.h> +#include <asm/hvcall.h> + +static struct bus_type vio_bus_type; + +static struct vio_dev vio_bus_device = { /* fake "parent" device */ + .name = "vio", + .type = "", + .dev.init_name = "vio", + .dev.bus = &vio_bus_type, +}; + +#ifdef CONFIG_PPC_SMLPAR +/** + * vio_cmo_pool - A pool of IO memory for CMO use + * + * @size: The size of the pool in bytes + * @free: The amount of free memory in the pool + */ +struct vio_cmo_pool { + size_t size; + size_t free; +}; + +/* How many ms to delay queued balance work */ +#define VIO_CMO_BALANCE_DELAY 100 + +/* Portion out IO memory to CMO devices by this chunk size */ +#define VIO_CMO_BALANCE_CHUNK 131072 + +/** + * vio_cmo_dev_entry - A device that is CMO-enabled and requires entitlement + * + * @vio_dev: struct vio_dev pointer + * @list: pointer to other devices on bus that are being tracked + */ +struct vio_cmo_dev_entry { + struct vio_dev *viodev; + struct list_head list; +}; + +/** + * vio_cmo - VIO bus accounting structure for CMO entitlement + * + * @lock: spinlock for entire structure + * @balance_q: work queue for balancing system entitlement + * @device_list: list of CMO-enabled devices requiring entitlement + * @entitled: total system entitlement in bytes + * @reserve: pool of memory from which devices reserve entitlement, incl. spare + * @excess: pool of excess entitlement not needed for device reserves or spare + * @spare: IO memory for device hotplug functionality + * @min: minimum necessary for system operation + * @desired: desired memory for system operation + * @curr: bytes currently allocated + * @high: high water mark for IO data usage + */ +struct vio_cmo { + spinlock_t lock; + struct delayed_work balance_q; + struct list_head device_list; + size_t entitled; + struct vio_cmo_pool reserve; + struct vio_cmo_pool excess; + size_t spare; + size_t min; + size_t desired; + size_t curr; + size_t high; +} vio_cmo; + +/** + * vio_cmo_OF_devices - Count the number of OF devices that have DMA windows + */ +static int vio_cmo_num_OF_devs(void) +{ + struct device_node *node_vroot; + int count = 0; + + /* + * Count the number of vdevice entries with an + * ibm,my-dma-window OF property + */ + node_vroot = of_find_node_by_name(NULL, "vdevice"); + if (node_vroot) { + struct device_node *of_node; + struct property *prop; + + for_each_child_of_node(node_vroot, of_node) { + prop = of_find_property(of_node, "ibm,my-dma-window", + NULL); + if (prop) + count++; + } + } + of_node_put(node_vroot); + return count; +} + +/** + * vio_cmo_alloc - allocate IO memory for CMO-enable devices + * + * @viodev: VIO device requesting IO memory + * @size: size of allocation requested + * + * Allocations come from memory reserved for the devices and any excess + * IO memory available to all devices. The spare pool used to service + * hotplug must be equal to %VIO_CMO_MIN_ENT for the excess pool to be + * made available. + * + * Return codes: + * 0 for successful allocation and -ENOMEM for a failure + */ +static inline int vio_cmo_alloc(struct vio_dev *viodev, size_t size) +{ + unsigned long flags; + size_t reserve_free = 0; + size_t excess_free = 0; + int ret = -ENOMEM; + + spin_lock_irqsave(&vio_cmo.lock, flags); + + /* Determine the amount of free entitlement available in reserve */ + if (viodev->cmo.entitled > viodev->cmo.allocated) + reserve_free = viodev->cmo.entitled - viodev->cmo.allocated; + + /* If spare is not fulfilled, the excess pool can not be used. */ + if (vio_cmo.spare >= VIO_CMO_MIN_ENT) + excess_free = vio_cmo.excess.free; + + /* The request can be satisfied */ + if ((reserve_free + excess_free) >= size) { + vio_cmo.curr += size; + if (vio_cmo.curr > vio_cmo.high) + vio_cmo.high = vio_cmo.curr; + viodev->cmo.allocated += size; + size -= min(reserve_free, size); + vio_cmo.excess.free -= size; + ret = 0; + } + + spin_unlock_irqrestore(&vio_cmo.lock, flags); + return ret; +} + +/** + * vio_cmo_dealloc - deallocate IO memory from CMO-enable devices + * @viodev: VIO device freeing IO memory + * @size: size of deallocation + * + * IO memory is freed by the device back to the correct memory pools. + * The spare pool is replenished first from either memory pool, then + * the reserve pool is used to reduce device entitlement, the excess + * pool is used to increase the reserve pool toward the desired entitlement + * target, and then the remaining memory is returned to the pools. + * + */ +static inline void vio_cmo_dealloc(struct vio_dev *viodev, size_t size) +{ + unsigned long flags; + size_t spare_needed = 0; + size_t excess_freed = 0; + size_t reserve_freed = size; + size_t tmp; + int balance = 0; + + spin_lock_irqsave(&vio_cmo.lock, flags); + vio_cmo.curr -= size; + + /* Amount of memory freed from the excess pool */ + if (viodev->cmo.allocated > viodev->cmo.entitled) { + excess_freed = min(reserve_freed, (viodev->cmo.allocated - + viodev->cmo.entitled)); + reserve_freed -= excess_freed; + } + + /* Remove allocation from device */ + viodev->cmo.allocated -= (reserve_freed + excess_freed); + + /* Spare is a subset of the reserve pool, replenish it first. */ + spare_needed = VIO_CMO_MIN_ENT - vio_cmo.spare; + + /* + * Replenish the spare in the reserve pool from the excess pool. + * This moves entitlement into the reserve pool. + */ + if (spare_needed && excess_freed) { + tmp = min(excess_freed, spare_needed); + vio_cmo.excess.size -= tmp; + vio_cmo.reserve.size += tmp; + vio_cmo.spare += tmp; + excess_freed -= tmp; + spare_needed -= tmp; + balance = 1; + } + + /* + * Replenish the spare in the reserve pool from the reserve pool. + * This removes entitlement from the device down to VIO_CMO_MIN_ENT, + * if needed, and gives it to the spare pool. The amount of used + * memory in this pool does not change. + */ + if (spare_needed && reserve_freed) { + tmp = min3(spare_needed, reserve_freed, (viodev->cmo.entitled - VIO_CMO_MIN_ENT)); + + vio_cmo.spare += tmp; + viodev->cmo.entitled -= tmp; + reserve_freed -= tmp; + spare_needed -= tmp; + balance = 1; + } + + /* + * Increase the reserve pool until the desired allocation is met. + * Move an allocation freed from the excess pool into the reserve + * pool and schedule a balance operation. + */ + if (excess_freed && (vio_cmo.desired > vio_cmo.reserve.size)) { + tmp = min(excess_freed, (vio_cmo.desired - vio_cmo.reserve.size)); + + vio_cmo.excess.size -= tmp; + vio_cmo.reserve.size += tmp; + excess_freed -= tmp; + balance = 1; + } + + /* Return memory from the excess pool to that pool */ + if (excess_freed) + vio_cmo.excess.free += excess_freed; + + if (balance) + schedule_delayed_work(&vio_cmo.balance_q, VIO_CMO_BALANCE_DELAY); + spin_unlock_irqrestore(&vio_cmo.lock, flags); +} + +/** + * vio_cmo_entitlement_update - Manage system entitlement changes + * + * @new_entitlement: new system entitlement to attempt to accommodate + * + * Increases in entitlement will be used to fulfill the spare entitlement + * and the rest is given to the excess pool. Decreases, if they are + * possible, come from the excess pool and from unused device entitlement + * + * Returns: 0 on success, -ENOMEM when change can not be made + */ +int vio_cmo_entitlement_update(size_t new_entitlement) +{ + struct vio_dev *viodev; + struct vio_cmo_dev_entry *dev_ent; + unsigned long flags; + size_t avail, delta, tmp; + + spin_lock_irqsave(&vio_cmo.lock, flags); + + /* Entitlement increases */ + if (new_entitlement > vio_cmo.entitled) { + delta = new_entitlement - vio_cmo.entitled; + + /* Fulfill spare allocation */ + if (vio_cmo.spare < VIO_CMO_MIN_ENT) { + tmp = min(delta, (VIO_CMO_MIN_ENT - vio_cmo.spare)); + vio_cmo.spare += tmp; + vio_cmo.reserve.size += tmp; + delta -= tmp; + } + + /* Remaining new allocation goes to the excess pool */ + vio_cmo.entitled += delta; + vio_cmo.excess.size += delta; + vio_cmo.excess.free += delta; + + goto out; + } + + /* Entitlement decreases */ + delta = vio_cmo.entitled - new_entitlement; + avail = vio_cmo.excess.free; + + /* + * Need to check how much unused entitlement each device can + * sacrifice to fulfill entitlement change. + */ + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) { + if (avail >= delta) + break; + + viodev = dev_ent->viodev; + if ((viodev->cmo.entitled > viodev->cmo.allocated) && + (viodev->cmo.entitled > VIO_CMO_MIN_ENT)) + avail += viodev->cmo.entitled - + max_t(size_t, viodev->cmo.allocated, + VIO_CMO_MIN_ENT); + } + + if (delta <= avail) { + vio_cmo.entitled -= delta; + + /* Take entitlement from the excess pool first */ + tmp = min(vio_cmo.excess.free, delta); + vio_cmo.excess.size -= tmp; + vio_cmo.excess.free -= tmp; + delta -= tmp; + + /* + * Remove all but VIO_CMO_MIN_ENT bytes from devices + * until entitlement change is served + */ + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) { + if (!delta) + break; + + viodev = dev_ent->viodev; + tmp = 0; + if ((viodev->cmo.entitled > viodev->cmo.allocated) && + (viodev->cmo.entitled > VIO_CMO_MIN_ENT)) + tmp = viodev->cmo.entitled - + max_t(size_t, viodev->cmo.allocated, + VIO_CMO_MIN_ENT); + viodev->cmo.entitled -= min(tmp, delta); + delta -= min(tmp, delta); + } + } else { + spin_unlock_irqrestore(&vio_cmo.lock, flags); + return -ENOMEM; + } + +out: + schedule_delayed_work(&vio_cmo.balance_q, 0); + spin_unlock_irqrestore(&vio_cmo.lock, flags); + return 0; +} + +/** + * vio_cmo_balance - Balance entitlement among devices + * + * @work: work queue structure for this operation + * + * Any system entitlement above the minimum needed for devices, or + * already allocated to devices, can be distributed to the devices. + * The list of devices is iterated through to recalculate the desired + * entitlement level and to determine how much entitlement above the + * minimum entitlement is allocated to devices. + * + * Small chunks of the available entitlement are given to devices until + * their requirements are fulfilled or there is no entitlement left to give. + * Upon completion sizes of the reserve and excess pools are calculated. + * + * The system minimum entitlement level is also recalculated here. + * Entitlement will be reserved for devices even after vio_bus_remove to + * accommodate reloading the driver. The OF tree is walked to count the + * number of devices present and this will remove entitlement for devices + * that have actually left the system after having vio_bus_remove called. + */ +static void vio_cmo_balance(struct work_struct *work) +{ + struct vio_cmo *cmo; + struct vio_dev *viodev; + struct vio_cmo_dev_entry *dev_ent; + unsigned long flags; + size_t avail = 0, level, chunk, need; + int devcount = 0, fulfilled; + + cmo = container_of(work, struct vio_cmo, balance_q.work); + + spin_lock_irqsave(&vio_cmo.lock, flags); + + /* Calculate minimum entitlement and fulfill spare */ + cmo->min = vio_cmo_num_OF_devs() * VIO_CMO_MIN_ENT; + BUG_ON(cmo->min > cmo->entitled); + cmo->spare = min_t(size_t, VIO_CMO_MIN_ENT, (cmo->entitled - cmo->min)); + cmo->min += cmo->spare; + cmo->desired = cmo->min; + + /* + * Determine how much entitlement is available and reset device + * entitlements + */ + avail = cmo->entitled - cmo->spare; + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) { + viodev = dev_ent->viodev; + devcount++; + viodev->cmo.entitled = VIO_CMO_MIN_ENT; + cmo->desired += (viodev->cmo.desired - VIO_CMO_MIN_ENT); + avail -= max_t(size_t, viodev->cmo.allocated, VIO_CMO_MIN_ENT); + } + + /* + * Having provided each device with the minimum entitlement, loop + * over the devices portioning out the remaining entitlement + * until there is nothing left. + */ + level = VIO_CMO_MIN_ENT; + while (avail) { + fulfilled = 0; + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) { + viodev = dev_ent->viodev; + + if (viodev->cmo.desired <= level) { + fulfilled++; + continue; + } + + /* + * Give the device up to VIO_CMO_BALANCE_CHUNK + * bytes of entitlement, but do not exceed the + * desired level of entitlement for the device. + */ + chunk = min_t(size_t, avail, VIO_CMO_BALANCE_CHUNK); + chunk = min(chunk, (viodev->cmo.desired - + viodev->cmo.entitled)); + viodev->cmo.entitled += chunk; + + /* + * If the memory for this entitlement increase was + * already allocated to the device it does not come + * from the available pool being portioned out. + */ + need = max(viodev->cmo.allocated, viodev->cmo.entitled)- + max(viodev->cmo.allocated, level); + avail -= need; + + } + if (fulfilled == devcount) + break; + level += VIO_CMO_BALANCE_CHUNK; + } + + /* Calculate new reserve and excess pool sizes */ + cmo->reserve.size = cmo->min; + cmo->excess.free = 0; + cmo->excess.size = 0; + need = 0; + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) { + viodev = dev_ent->viodev; + /* Calculated reserve size above the minimum entitlement */ + if (viodev->cmo.entitled) + cmo->reserve.size += (viodev->cmo.entitled - + VIO_CMO_MIN_ENT); + /* Calculated used excess entitlement */ + if (viodev->cmo.allocated > viodev->cmo.entitled) + need += viodev->cmo.allocated - viodev->cmo.entitled; + } + cmo->excess.size = cmo->entitled - cmo->reserve.size; + cmo->excess.free = cmo->excess.size - need; + + cancel_delayed_work(to_delayed_work(work)); + spin_unlock_irqrestore(&vio_cmo.lock, flags); +} + +static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag, + struct dma_attrs *attrs) +{ + struct vio_dev *viodev = to_vio_dev(dev); + void *ret; + + if (vio_cmo_alloc(viodev, roundup(size, PAGE_SIZE))) { + atomic_inc(&viodev->cmo.allocs_failed); + return NULL; + } + + ret = dma_iommu_ops.alloc(dev, size, dma_handle, flag, attrs); + if (unlikely(ret == NULL)) { + vio_cmo_dealloc(viodev, roundup(size, PAGE_SIZE)); + atomic_inc(&viodev->cmo.allocs_failed); + } + + return ret; +} + +static void vio_dma_iommu_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle, + struct dma_attrs *attrs) +{ + struct vio_dev *viodev = to_vio_dev(dev); + + dma_iommu_ops.free(dev, size, vaddr, dma_handle, attrs); + + vio_cmo_dealloc(viodev, roundup(size, PAGE_SIZE)); +} + +static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + struct vio_dev *viodev = to_vio_dev(dev); + dma_addr_t ret = DMA_ERROR_CODE; + + if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) { + atomic_inc(&viodev->cmo.allocs_failed); + return ret; + } + + ret = dma_iommu_ops.map_page(dev, page, offset, size, direction, attrs); + if (unlikely(dma_mapping_error(dev, ret))) { + vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE)); + atomic_inc(&viodev->cmo.allocs_failed); + } + + return ret; +} + +static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle, + size_t size, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + struct vio_dev *viodev = to_vio_dev(dev); + + dma_iommu_ops.unmap_page(dev, dma_handle, size, direction, attrs); + + vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE)); +} + +static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, + int nelems, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + struct vio_dev *viodev = to_vio_dev(dev); + struct scatterlist *sgl; + int ret, count = 0; + size_t alloc_size = 0; + + for (sgl = sglist; count < nelems; count++, sgl++) + alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE); + + if (vio_cmo_alloc(viodev, alloc_size)) { + atomic_inc(&viodev->cmo.allocs_failed); + return 0; + } + + ret = dma_iommu_ops.map_sg(dev, sglist, nelems, direction, attrs); + + if (unlikely(!ret)) { + vio_cmo_dealloc(viodev, alloc_size); + atomic_inc(&viodev->cmo.allocs_failed); + return ret; + } + + for (sgl = sglist, count = 0; count < ret; count++, sgl++) + alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE); + if (alloc_size) + vio_cmo_dealloc(viodev, alloc_size); + + return ret; +} + +static void vio_dma_iommu_unmap_sg(struct device *dev, + struct scatterlist *sglist, int nelems, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + struct vio_dev *viodev = to_vio_dev(dev); + struct scatterlist *sgl; + size_t alloc_size = 0; + int count = 0; + + for (sgl = sglist; count < nelems; count++, sgl++) + alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE); + + dma_iommu_ops.unmap_sg(dev, sglist, nelems, direction, attrs); + + vio_cmo_dealloc(viodev, alloc_size); +} + +static int vio_dma_iommu_dma_supported(struct device *dev, u64 mask) +{ + return dma_iommu_ops.dma_supported(dev, mask); +} + +static u64 vio_dma_get_required_mask(struct device *dev) +{ + return dma_iommu_ops.get_required_mask(dev); +} + +struct dma_map_ops vio_dma_mapping_ops = { + .alloc = vio_dma_iommu_alloc_coherent, + .free = vio_dma_iommu_free_coherent, + .map_sg = vio_dma_iommu_map_sg, + .unmap_sg = vio_dma_iommu_unmap_sg, + .map_page = vio_dma_iommu_map_page, + .unmap_page = vio_dma_iommu_unmap_page, + .dma_supported = vio_dma_iommu_dma_supported, + .get_required_mask = vio_dma_get_required_mask, +}; + +/** + * vio_cmo_set_dev_desired - Set desired entitlement for a device + * + * @viodev: struct vio_dev for device to alter + * @new_desired: new desired entitlement level in bytes + * + * For use by devices to request a change to their entitlement at runtime or + * through sysfs. The desired entitlement level is changed and a balancing + * of system resources is scheduled to run in the future. + */ +void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) +{ + unsigned long flags; + struct vio_cmo_dev_entry *dev_ent; + int found = 0; + + if (!firmware_has_feature(FW_FEATURE_CMO)) + return; + + spin_lock_irqsave(&vio_cmo.lock, flags); + if (desired < VIO_CMO_MIN_ENT) + desired = VIO_CMO_MIN_ENT; + + /* + * Changes will not be made for devices not in the device list. + * If it is not in the device list, then no driver is loaded + * for the device and it can not receive entitlement. + */ + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) + if (viodev == dev_ent->viodev) { + found = 1; + break; + } + if (!found) { + spin_unlock_irqrestore(&vio_cmo.lock, flags); + return; + } + + /* Increase/decrease in desired device entitlement */ + if (desired >= viodev->cmo.desired) { + /* Just bump the bus and device values prior to a balance*/ + vio_cmo.desired += desired - viodev->cmo.desired; + viodev->cmo.desired = desired; + } else { + /* Decrease bus and device values for desired entitlement */ + vio_cmo.desired -= viodev->cmo.desired - desired; + viodev->cmo.desired = desired; + /* + * If less entitlement is desired than current entitlement, move + * any reserve memory in the change region to the excess pool. + */ + if (viodev->cmo.entitled > desired) { + vio_cmo.reserve.size -= viodev->cmo.entitled - desired; + vio_cmo.excess.size += viodev->cmo.entitled - desired; + /* + * If entitlement moving from the reserve pool to the + * excess pool is currently unused, add to the excess + * free counter. + */ + if (viodev->cmo.allocated < viodev->cmo.entitled) + vio_cmo.excess.free += viodev->cmo.entitled - + max(viodev->cmo.allocated, desired); + viodev->cmo.entitled = desired; + } + } + schedule_delayed_work(&vio_cmo.balance_q, 0); + spin_unlock_irqrestore(&vio_cmo.lock, flags); +} + +/** + * vio_cmo_bus_probe - Handle CMO specific bus probe activities + * + * @viodev - Pointer to struct vio_dev for device + * + * Determine the devices IO memory entitlement needs, attempting + * to satisfy the system minimum entitlement at first and scheduling + * a balance operation to take care of the rest at a later time. + * + * Returns: 0 on success, -EINVAL when device doesn't support CMO, and + * -ENOMEM when entitlement is not available for device or + * device entry. + * + */ +static int vio_cmo_bus_probe(struct vio_dev *viodev) +{ + struct vio_cmo_dev_entry *dev_ent; + struct device *dev = &viodev->dev; + struct vio_driver *viodrv = to_vio_driver(dev->driver); + unsigned long flags; + size_t size; + + /* + * Check to see that device has a DMA window and configure + * entitlement for the device. + */ + if (of_get_property(viodev->dev.of_node, + "ibm,my-dma-window", NULL)) { + /* Check that the driver is CMO enabled and get desired DMA */ + if (!viodrv->get_desired_dma) { + dev_err(dev, "%s: device driver does not support CMO\n", + __func__); + return -EINVAL; + } + + viodev->cmo.desired = IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev)); + if (viodev->cmo.desired < VIO_CMO_MIN_ENT) + viodev->cmo.desired = VIO_CMO_MIN_ENT; + size = VIO_CMO_MIN_ENT; + + dev_ent = kmalloc(sizeof(struct vio_cmo_dev_entry), + GFP_KERNEL); + if (!dev_ent) + return -ENOMEM; + + dev_ent->viodev = viodev; + spin_lock_irqsave(&vio_cmo.lock, flags); + list_add(&dev_ent->list, &vio_cmo.device_list); + } else { + viodev->cmo.desired = 0; + size = 0; + spin_lock_irqsave(&vio_cmo.lock, flags); + } + + /* + * If the needs for vio_cmo.min have not changed since they + * were last set, the number of devices in the OF tree has + * been constant and the IO memory for this is already in + * the reserve pool. + */ + if (vio_cmo.min == ((vio_cmo_num_OF_devs() + 1) * + VIO_CMO_MIN_ENT)) { + /* Updated desired entitlement if device requires it */ + if (size) + vio_cmo.desired += (viodev->cmo.desired - + VIO_CMO_MIN_ENT); + } else { + size_t tmp; + + tmp = vio_cmo.spare + vio_cmo.excess.free; + if (tmp < size) { + dev_err(dev, "%s: insufficient free " + "entitlement to add device. " + "Need %lu, have %lu\n", __func__, + size, (vio_cmo.spare + tmp)); + spin_unlock_irqrestore(&vio_cmo.lock, flags); + return -ENOMEM; + } + + /* Use excess pool first to fulfill request */ + tmp = min(size, vio_cmo.excess.free); + vio_cmo.excess.free -= tmp; + vio_cmo.excess.size -= tmp; + vio_cmo.reserve.size += tmp; + + /* Use spare if excess pool was insufficient */ + vio_cmo.spare -= size - tmp; + + /* Update bus accounting */ + vio_cmo.min += size; + vio_cmo.desired += viodev->cmo.desired; + } + spin_unlock_irqrestore(&vio_cmo.lock, flags); + return 0; +} + +/** + * vio_cmo_bus_remove - Handle CMO specific bus removal activities + * + * @viodev - Pointer to struct vio_dev for device + * + * Remove the device from the cmo device list. The minimum entitlement + * will be reserved for the device as long as it is in the system. The + * rest of the entitlement the device had been allocated will be returned + * to the system. + */ +static void vio_cmo_bus_remove(struct vio_dev *viodev) +{ + struct vio_cmo_dev_entry *dev_ent; + unsigned long flags; + size_t tmp; + + spin_lock_irqsave(&vio_cmo.lock, flags); + if (viodev->cmo.allocated) { + dev_err(&viodev->dev, "%s: device had %lu bytes of IO " + "allocated after remove operation.\n", + __func__, viodev->cmo.allocated); + BUG(); + } + + /* + * Remove the device from the device list being maintained for + * CMO enabled devices. + */ + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) + if (viodev == dev_ent->viodev) { + list_del(&dev_ent->list); + kfree(dev_ent); + break; + } + + /* + * Devices may not require any entitlement and they do not need + * to be processed. Otherwise, return the device's entitlement + * back to the pools. + */ + if (viodev->cmo.entitled) { + /* + * This device has not yet left the OF tree, it's + * minimum entitlement remains in vio_cmo.min and + * vio_cmo.desired + */ + vio_cmo.desired -= (viodev->cmo.desired - VIO_CMO_MIN_ENT); + + /* + * Save min allocation for device in reserve as long + * as it exists in OF tree as determined by later + * balance operation + */ + viodev->cmo.entitled -= VIO_CMO_MIN_ENT; + + /* Replenish spare from freed reserve pool */ + if (viodev->cmo.entitled && (vio_cmo.spare < VIO_CMO_MIN_ENT)) { + tmp = min(viodev->cmo.entitled, (VIO_CMO_MIN_ENT - + vio_cmo.spare)); + vio_cmo.spare += tmp; + viodev->cmo.entitled -= tmp; + } + + /* Remaining reserve goes to excess pool */ + vio_cmo.excess.size += viodev->cmo.entitled; + vio_cmo.excess.free += viodev->cmo.entitled; + vio_cmo.reserve.size -= viodev->cmo.entitled; + + /* + * Until the device is removed it will keep a + * minimum entitlement; this will guarantee that + * a module unload/load will result in a success. + */ + viodev->cmo.entitled = VIO_CMO_MIN_ENT; + viodev->cmo.desired = VIO_CMO_MIN_ENT; + atomic_set(&viodev->cmo.allocs_failed, 0); + } + + spin_unlock_irqrestore(&vio_cmo.lock, flags); +} + +static void vio_cmo_set_dma_ops(struct vio_dev *viodev) +{ + set_dma_ops(&viodev->dev, &vio_dma_mapping_ops); +} + +/** + * vio_cmo_bus_init - CMO entitlement initialization at bus init time + * + * Set up the reserve and excess entitlement pools based on available + * system entitlement and the number of devices in the OF tree that + * require entitlement in the reserve pool. + */ +static void vio_cmo_bus_init(void) +{ + struct hvcall_mpp_data mpp_data; + int err; + + memset(&vio_cmo, 0, sizeof(struct vio_cmo)); + spin_lock_init(&vio_cmo.lock); + INIT_LIST_HEAD(&vio_cmo.device_list); + INIT_DELAYED_WORK(&vio_cmo.balance_q, vio_cmo_balance); + + /* Get current system entitlement */ + err = h_get_mpp(&mpp_data); + + /* + * On failure, continue with entitlement set to 0, will panic() + * later when spare is reserved. + */ + if (err != H_SUCCESS) { + printk(KERN_ERR "%s: unable to determine system IO "\ + "entitlement. (%d)\n", __func__, err); + vio_cmo.entitled = 0; + } else { + vio_cmo.entitled = mpp_data.entitled_mem; + } + + /* Set reservation and check against entitlement */ + vio_cmo.spare = VIO_CMO_MIN_ENT; + vio_cmo.reserve.size = vio_cmo.spare; + vio_cmo.reserve.size += (vio_cmo_num_OF_devs() * + VIO_CMO_MIN_ENT); + if (vio_cmo.reserve.size > vio_cmo.entitled) { + printk(KERN_ERR "%s: insufficient system entitlement\n", + __func__); + panic("%s: Insufficient system entitlement", __func__); + } + + /* Set the remaining accounting variables */ + vio_cmo.excess.size = vio_cmo.entitled - vio_cmo.reserve.size; + vio_cmo.excess.free = vio_cmo.excess.size; + vio_cmo.min = vio_cmo.reserve.size; + vio_cmo.desired = vio_cmo.reserve.size; +} + +/* sysfs device functions and data structures for CMO */ + +#define viodev_cmo_rd_attr(name) \ +static ssize_t viodev_cmo_##name##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + return sprintf(buf, "%lu\n", to_vio_dev(dev)->cmo.name); \ +} + +static ssize_t viodev_cmo_allocs_failed_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct vio_dev *viodev = to_vio_dev(dev); + return sprintf(buf, "%d\n", atomic_read(&viodev->cmo.allocs_failed)); +} + +static ssize_t viodev_cmo_allocs_failed_reset(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct vio_dev *viodev = to_vio_dev(dev); + atomic_set(&viodev->cmo.allocs_failed, 0); + return count; +} + +static ssize_t viodev_cmo_desired_set(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct vio_dev *viodev = to_vio_dev(dev); + size_t new_desired; + int ret; + + ret = strict_strtoul(buf, 10, &new_desired); + if (ret) + return ret; + + vio_cmo_set_dev_desired(viodev, new_desired); + return count; +} + +viodev_cmo_rd_attr(desired); +viodev_cmo_rd_attr(entitled); +viodev_cmo_rd_attr(allocated); + +static ssize_t name_show(struct device *, struct device_attribute *, char *); +static ssize_t devspec_show(struct device *, struct device_attribute *, char *); +static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, + char *buf); +static struct device_attribute vio_cmo_dev_attrs[] = { + __ATTR_RO(name), + __ATTR_RO(devspec), + __ATTR_RO(modalias), + __ATTR(cmo_desired, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH, + viodev_cmo_desired_show, viodev_cmo_desired_set), + __ATTR(cmo_entitled, S_IRUGO, viodev_cmo_entitled_show, NULL), + __ATTR(cmo_allocated, S_IRUGO, viodev_cmo_allocated_show, NULL), + __ATTR(cmo_allocs_failed, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH, + viodev_cmo_allocs_failed_show, viodev_cmo_allocs_failed_reset), + __ATTR_NULL +}; + +/* sysfs bus functions and data structures for CMO */ + +#define viobus_cmo_rd_attr(name) \ +static ssize_t \ +viobus_cmo_##name##_show(struct bus_type *bt, char *buf) \ +{ \ + return sprintf(buf, "%lu\n", vio_cmo.name); \ +} + +#define viobus_cmo_pool_rd_attr(name, var) \ +static ssize_t \ +viobus_cmo_##name##_pool_show_##var(struct bus_type *bt, char *buf) \ +{ \ + return sprintf(buf, "%lu\n", vio_cmo.name.var); \ +} + +static ssize_t viobus_cmo_high_reset(struct bus_type *bt, const char *buf, + size_t count) +{ + unsigned long flags; + + spin_lock_irqsave(&vio_cmo.lock, flags); + vio_cmo.high = vio_cmo.curr; + spin_unlock_irqrestore(&vio_cmo.lock, flags); + + return count; +} + +viobus_cmo_rd_attr(entitled); +viobus_cmo_pool_rd_attr(reserve, size); +viobus_cmo_pool_rd_attr(excess, size); +viobus_cmo_pool_rd_attr(excess, free); +viobus_cmo_rd_attr(spare); +viobus_cmo_rd_attr(min); +viobus_cmo_rd_attr(desired); +viobus_cmo_rd_attr(curr); +viobus_cmo_rd_attr(high); + +static struct bus_attribute vio_cmo_bus_attrs[] = { + __ATTR(cmo_entitled, S_IRUGO, viobus_cmo_entitled_show, NULL), + __ATTR(cmo_reserve_size, S_IRUGO, viobus_cmo_reserve_pool_show_size, NULL), + __ATTR(cmo_excess_size, S_IRUGO, viobus_cmo_excess_pool_show_size, NULL), + __ATTR(cmo_excess_free, S_IRUGO, viobus_cmo_excess_pool_show_free, NULL), + __ATTR(cmo_spare, S_IRUGO, viobus_cmo_spare_show, NULL), + __ATTR(cmo_min, S_IRUGO, viobus_cmo_min_show, NULL), + __ATTR(cmo_desired, S_IRUGO, viobus_cmo_desired_show, NULL), + __ATTR(cmo_curr, S_IRUGO, viobus_cmo_curr_show, NULL), + __ATTR(cmo_high, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH, + viobus_cmo_high_show, viobus_cmo_high_reset), + __ATTR_NULL +}; + +static void vio_cmo_sysfs_init(void) +{ + vio_bus_type.dev_attrs = vio_cmo_dev_attrs; + vio_bus_type.bus_attrs = vio_cmo_bus_attrs; +} +#else /* CONFIG_PPC_SMLPAR */ +int vio_cmo_entitlement_update(size_t new_entitlement) { return 0; } +void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) {} +static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; } +static void vio_cmo_bus_remove(struct vio_dev *viodev) {} +static void vio_cmo_set_dma_ops(struct vio_dev *viodev) {} +static void vio_cmo_bus_init(void) {} +static void vio_cmo_sysfs_init(void) { } +#endif /* CONFIG_PPC_SMLPAR */ +EXPORT_SYMBOL(vio_cmo_entitlement_update); +EXPORT_SYMBOL(vio_cmo_set_dev_desired); + +static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) +{ + const unsigned char *dma_window; + struct iommu_table *tbl; + unsigned long offset, size; + + dma_window = of_get_property(dev->dev.of_node, + "ibm,my-dma-window", NULL); + if (!dma_window) + return NULL; + + tbl = kzalloc(sizeof(*tbl), GFP_KERNEL); + if (tbl == NULL) + return NULL; + + of_parse_dma_window(dev->dev.of_node, dma_window, + &tbl->it_index, &offset, &size); + + /* TCE table size - measured in tce entries */ + tbl->it_size = size >> IOMMU_PAGE_SHIFT; + /* offset for VIO should always be 0 */ + tbl->it_offset = offset >> IOMMU_PAGE_SHIFT; + tbl->it_busno = 0; + tbl->it_type = TCE_VB; + tbl->it_blocksize = 16; + + return iommu_init_table(tbl, -1); +} + +/** + * vio_match_device: - Tell if a VIO device has a matching + * VIO device id structure. + * @ids: array of VIO device id structures to search in + * @dev: the VIO device structure to match against + * + * Used by a driver to check whether a VIO device present in the + * system is in its list of supported devices. Returns the matching + * vio_device_id structure or NULL if there is no match. + */ +static const struct vio_device_id *vio_match_device( + const struct vio_device_id *ids, const struct vio_dev *dev) +{ + while (ids->type[0] != '\0') { + if ((strncmp(dev->type, ids->type, strlen(ids->type)) == 0) && + of_device_is_compatible(dev->dev.of_node, + ids->compat)) + return ids; + ids++; + } + return NULL; +} + +/* + * Convert from struct device to struct vio_dev and pass to driver. + * dev->driver has already been set by generic code because vio_bus_match + * succeeded. + */ +static int vio_bus_probe(struct device *dev) +{ + struct vio_dev *viodev = to_vio_dev(dev); + struct vio_driver *viodrv = to_vio_driver(dev->driver); + const struct vio_device_id *id; + int error = -ENODEV; + + if (!viodrv->probe) + return error; + + id = vio_match_device(viodrv->id_table, viodev); + if (id) { + memset(&viodev->cmo, 0, sizeof(viodev->cmo)); + if (firmware_has_feature(FW_FEATURE_CMO)) { + error = vio_cmo_bus_probe(viodev); + if (error) + return error; + } + error = viodrv->probe(viodev, id); + if (error && firmware_has_feature(FW_FEATURE_CMO)) + vio_cmo_bus_remove(viodev); + } + + return error; +} + +/* convert from struct device to struct vio_dev and pass to driver. */ +static int vio_bus_remove(struct device *dev) +{ + struct vio_dev *viodev = to_vio_dev(dev); + struct vio_driver *viodrv = to_vio_driver(dev->driver); + struct device *devptr; + int ret = 1; + + /* + * Hold a reference to the device after the remove function is called + * to allow for CMO accounting cleanup for the device. + */ + devptr = get_device(dev); + + if (viodrv->remove) + ret = viodrv->remove(viodev); + + if (!ret && firmware_has_feature(FW_FEATURE_CMO)) + vio_cmo_bus_remove(viodev); + + put_device(devptr); + return ret; +} + +/** + * vio_register_driver: - Register a new vio driver + * @drv: The vio_driver structure to be registered. + */ +int __vio_register_driver(struct vio_driver *viodrv, struct module *owner, + const char *mod_name) +{ + pr_debug("%s: driver %s registering\n", __func__, viodrv->name); + + /* fill in 'struct driver' fields */ + viodrv->driver.name = viodrv->name; + viodrv->driver.pm = viodrv->pm; + viodrv->driver.bus = &vio_bus_type; + viodrv->driver.owner = owner; + viodrv->driver.mod_name = mod_name; + + return driver_register(&viodrv->driver); +} +EXPORT_SYMBOL(__vio_register_driver); + +/** + * vio_unregister_driver - Remove registration of vio driver. + * @driver: The vio_driver struct to be removed form registration + */ +void vio_unregister_driver(struct vio_driver *viodrv) +{ + driver_unregister(&viodrv->driver); +} +EXPORT_SYMBOL(vio_unregister_driver); + +/* vio_dev refcount hit 0 */ +static void __devinit vio_dev_release(struct device *dev) +{ + struct iommu_table *tbl = get_iommu_table_base(dev); + + if (tbl) + iommu_free_table(tbl, dev->of_node ? + dev->of_node->full_name : dev_name(dev)); + of_node_put(dev->of_node); + kfree(to_vio_dev(dev)); +} + +/** + * vio_register_device_node: - Register a new vio device. + * @of_node: The OF node for this device. + * + * Creates and initializes a vio_dev structure from the data in + * of_node and adds it to the list of virtual devices. + * Returns a pointer to the created vio_dev or NULL if node has + * NULL device_type or compatible fields. + */ +struct vio_dev *vio_register_device_node(struct device_node *of_node) +{ + struct vio_dev *viodev; + const unsigned int *unit_address; + + /* we need the 'device_type' property, in order to match with drivers */ + if (of_node->type == NULL) { + printk(KERN_WARNING "%s: node %s missing 'device_type'\n", + __func__, + of_node->name ? of_node->name : "<unknown>"); + return NULL; + } + + unit_address = of_get_property(of_node, "reg", NULL); + if (unit_address == NULL) { + printk(KERN_WARNING "%s: node %s missing 'reg'\n", + __func__, + of_node->name ? of_node->name : "<unknown>"); + return NULL; + } + + /* allocate a vio_dev for this node */ + viodev = kzalloc(sizeof(struct vio_dev), GFP_KERNEL); + if (viodev == NULL) + return NULL; + + viodev->irq = irq_of_parse_and_map(of_node, 0); + + dev_set_name(&viodev->dev, "%x", *unit_address); + viodev->name = of_node->name; + viodev->type = of_node->type; + viodev->unit_address = *unit_address; + viodev->dev.of_node = of_node_get(of_node); + + if (firmware_has_feature(FW_FEATURE_CMO)) + vio_cmo_set_dma_ops(viodev); + else + set_dma_ops(&viodev->dev, &dma_iommu_ops); + set_iommu_table_base(&viodev->dev, vio_build_iommu_table(viodev)); + set_dev_node(&viodev->dev, of_node_to_nid(of_node)); + + /* init generic 'struct device' fields: */ + viodev->dev.parent = &vio_bus_device.dev; + viodev->dev.bus = &vio_bus_type; + viodev->dev.release = vio_dev_release; + /* needed to ensure proper operation of coherent allocations + * later, in case driver doesn't set it explicitly */ + dma_set_mask(&viodev->dev, DMA_BIT_MASK(64)); + dma_set_coherent_mask(&viodev->dev, DMA_BIT_MASK(64)); + + /* register with generic device framework */ + if (device_register(&viodev->dev)) { + printk(KERN_ERR "%s: failed to register device %s\n", + __func__, dev_name(&viodev->dev)); + put_device(&viodev->dev); + return NULL; + } + + return viodev; +} +EXPORT_SYMBOL(vio_register_device_node); + +/** + * vio_bus_init: - Initialize the virtual IO bus + */ +static int __init vio_bus_init(void) +{ + int err; + struct device_node *node_vroot; + + if (firmware_has_feature(FW_FEATURE_CMO)) + vio_cmo_sysfs_init(); + + err = bus_register(&vio_bus_type); + if (err) { + printk(KERN_ERR "failed to register VIO bus\n"); + return err; + } + + /* + * The fake parent of all vio devices, just to give us + * a nice directory + */ + err = device_register(&vio_bus_device.dev); + if (err) { + printk(KERN_WARNING "%s: device_register returned %i\n", + __func__, err); + return err; + } + + if (firmware_has_feature(FW_FEATURE_CMO)) + vio_cmo_bus_init(); + + node_vroot = of_find_node_by_name(NULL, "vdevice"); + if (node_vroot) { + struct device_node *of_node; + + /* + * Create struct vio_devices for each virtual device in + * the device tree. Drivers will associate with them later. + */ + for (of_node = node_vroot->child; of_node != NULL; + of_node = of_node->sibling) + vio_register_device_node(of_node); + of_node_put(node_vroot); + } + + return 0; +} +__initcall(vio_bus_init); + +static ssize_t name_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%s\n", to_vio_dev(dev)->name); +} + +static ssize_t devspec_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct device_node *of_node = dev->of_node; + + return sprintf(buf, "%s\n", of_node ? of_node->full_name : "none"); +} + +static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + const struct vio_dev *vio_dev = to_vio_dev(dev); + struct device_node *dn; + const char *cp; + + dn = dev->of_node; + if (!dn) + return -ENODEV; + cp = of_get_property(dn, "compatible", NULL); + if (!cp) + return -ENODEV; + + return sprintf(buf, "vio:T%sS%s\n", vio_dev->type, cp); +} + +static struct device_attribute vio_dev_attrs[] = { + __ATTR_RO(name), + __ATTR_RO(devspec), + __ATTR_RO(modalias), + __ATTR_NULL +}; + +void __devinit vio_unregister_device(struct vio_dev *viodev) +{ + device_unregister(&viodev->dev); +} +EXPORT_SYMBOL(vio_unregister_device); + +static int vio_bus_match(struct device *dev, struct device_driver *drv) +{ + const struct vio_dev *vio_dev = to_vio_dev(dev); + struct vio_driver *vio_drv = to_vio_driver(drv); + const struct vio_device_id *ids = vio_drv->id_table; + + return (ids != NULL) && (vio_match_device(ids, vio_dev) != NULL); +} + +static int vio_hotplug(struct device *dev, struct kobj_uevent_env *env) +{ + const struct vio_dev *vio_dev = to_vio_dev(dev); + struct device_node *dn; + const char *cp; + + dn = dev->of_node; + if (!dn) + return -ENODEV; + cp = of_get_property(dn, "compatible", NULL); + if (!cp) + return -ENODEV; + + add_uevent_var(env, "MODALIAS=vio:T%sS%s", vio_dev->type, cp); + return 0; +} + +static struct bus_type vio_bus_type = { + .name = "vio", + .dev_attrs = vio_dev_attrs, + .uevent = vio_hotplug, + .match = vio_bus_match, + .probe = vio_bus_probe, + .remove = vio_bus_remove, +}; + +/** + * vio_get_attribute: - get attribute for virtual device + * @vdev: The vio device to get property. + * @which: The property/attribute to be extracted. + * @length: Pointer to length of returned data size (unused if NULL). + * + * Calls prom.c's of_get_property() to return the value of the + * attribute specified by @which +*/ +const void *vio_get_attribute(struct vio_dev *vdev, char *which, int *length) +{ + return of_get_property(vdev->dev.of_node, which, length); +} +EXPORT_SYMBOL(vio_get_attribute); + +#ifdef CONFIG_PPC_PSERIES +/* vio_find_name() - internal because only vio.c knows how we formatted the + * kobject name + */ +static struct vio_dev *vio_find_name(const char *name) +{ + struct device *found; + + found = bus_find_device_by_name(&vio_bus_type, NULL, name); + if (!found) + return NULL; + + return to_vio_dev(found); +} + +/** + * vio_find_node - find an already-registered vio_dev + * @vnode: device_node of the virtual device we're looking for + */ +struct vio_dev *vio_find_node(struct device_node *vnode) +{ + const uint32_t *unit_address; + char kobj_name[20]; + + /* construct the kobject name from the device node */ + unit_address = of_get_property(vnode, "reg", NULL); + if (!unit_address) + return NULL; + snprintf(kobj_name, sizeof(kobj_name), "%x", *unit_address); + + return vio_find_name(kobj_name); +} +EXPORT_SYMBOL(vio_find_node); + +int vio_enable_interrupts(struct vio_dev *dev) +{ + int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE); + if (rc != H_SUCCESS) + printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc); + return rc; +} +EXPORT_SYMBOL(vio_enable_interrupts); + +int vio_disable_interrupts(struct vio_dev *dev) +{ + int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE); + if (rc != H_SUCCESS) + printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc); + return rc; +} +EXPORT_SYMBOL(vio_disable_interrupts); +#endif /* CONFIG_PPC_PSERIES */ diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S new file mode 100644 index 00000000..65d1c08c --- /dev/null +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -0,0 +1,262 @@ +#ifdef CONFIG_PPC64 +#define PROVIDE32(x) PROVIDE(__unused__##x) +#else +#define PROVIDE32(x) PROVIDE(x) +#endif +#include <asm/page.h> +#include <asm-generic/vmlinux.lds.h> +#include <asm/cache.h> +#include <asm/thread_info.h> + +ENTRY(_stext) + +PHDRS { + kernel PT_LOAD FLAGS(7); /* RWX */ + notes PT_NOTE FLAGS(0); + dummy PT_NOTE FLAGS(0); + + /* binutils < 2.18 has a bug that makes it misbehave when taking an + ELF file with all segments at load address 0 as input. This + happens when running "strip" on vmlinux, because of the AT() magic + in this linker script. People using GCC >= 4.2 won't run into + this problem, because the "build-id" support will put some data + into the "notes" segment (at a non-zero load address). + + To work around this, we force some data into both the "dummy" + segment and the kernel segment, so the dummy segment will get a + non-zero load address. It's not enough to always create the + "notes" segment, since if nothing gets assigned to it, its load + address will be zero. */ +} + +#ifdef CONFIG_PPC64 +OUTPUT_ARCH(powerpc:common64) +jiffies = jiffies_64; +#else +OUTPUT_ARCH(powerpc:common) +jiffies = jiffies_64 + 4; +#endif +SECTIONS +{ + . = 0; + reloc_start = .; + + . = KERNELBASE; + +/* + * Text, read only data and other permanent read-only sections + */ + + /* Text and gots */ + .text : AT(ADDR(.text) - LOAD_OFFSET) { + ALIGN_FUNCTION(); + HEAD_TEXT + _text = .; + /* careful! __ftr_alt_* sections need to be close to .text */ + *(.text .fixup __ftr_alt_* .ref.text) + SCHED_TEXT + LOCK_TEXT + KPROBES_TEXT + IRQENTRY_TEXT + +#ifdef CONFIG_PPC32 + *(.got1) + __got2_start = .; + *(.got2) + __got2_end = .; +#endif /* CONFIG_PPC32 */ + + } :kernel + + . = ALIGN(PAGE_SIZE); + _etext = .; + PROVIDE32 (etext = .); + + /* Read-only data */ + RODATA + + EXCEPTION_TABLE(0) + + NOTES :kernel :notes + + /* The dummy segment contents for the bug workaround mentioned above + near PHDRS. */ + .dummy : AT(ADDR(.dummy) - LOAD_OFFSET) { + LONG(0) + LONG(0) + LONG(0) + } :kernel :dummy + +/* + * Init sections discarded at runtime + */ + . = ALIGN(PAGE_SIZE); + __init_begin = .; + INIT_TEXT_SECTION(PAGE_SIZE) :kernel + + /* .exit.text is discarded at runtime, not link time, + * to deal with references from __bug_table + */ + .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { + EXIT_TEXT + } + + .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { + INIT_DATA + __vtop_table_begin = .; + *(.vtop_fixup); + __vtop_table_end = .; + __ptov_table_begin = .; + *(.ptov_fixup); + __ptov_table_end = .; + } + + .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { + INIT_SETUP(16) + } + + .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { + INIT_CALLS + } + + .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { + CON_INITCALL + } + + SECURITY_INIT + + . = ALIGN(8); + __ftr_fixup : AT(ADDR(__ftr_fixup) - LOAD_OFFSET) { + __start___ftr_fixup = .; + *(__ftr_fixup) + __stop___ftr_fixup = .; + } + . = ALIGN(8); + __mmu_ftr_fixup : AT(ADDR(__mmu_ftr_fixup) - LOAD_OFFSET) { + __start___mmu_ftr_fixup = .; + *(__mmu_ftr_fixup) + __stop___mmu_ftr_fixup = .; + } + . = ALIGN(8); + __lwsync_fixup : AT(ADDR(__lwsync_fixup) - LOAD_OFFSET) { + __start___lwsync_fixup = .; + *(__lwsync_fixup) + __stop___lwsync_fixup = .; + } +#ifdef CONFIG_PPC64 + . = ALIGN(8); + __fw_ftr_fixup : AT(ADDR(__fw_ftr_fixup) - LOAD_OFFSET) { + __start___fw_ftr_fixup = .; + *(__fw_ftr_fixup) + __stop___fw_ftr_fixup = .; + } +#endif + .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { + INIT_RAM_FS + } + + PERCPU_SECTION(L1_CACHE_BYTES) + + . = ALIGN(8); + .machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) { + __machine_desc_start = . ; + *(.machine.desc) + __machine_desc_end = . ; + } +#ifdef CONFIG_RELOCATABLE + . = ALIGN(8); + .dynsym : AT(ADDR(.dynsym) - LOAD_OFFSET) + { +#ifdef CONFIG_RELOCATABLE_PPC32 + __dynamic_symtab = .; +#endif + *(.dynsym) + } + .dynstr : AT(ADDR(.dynstr) - LOAD_OFFSET) { *(.dynstr) } + .dynamic : AT(ADDR(.dynamic) - LOAD_OFFSET) + { + __dynamic_start = .; + *(.dynamic) + } + .hash : AT(ADDR(.hash) - LOAD_OFFSET) { *(.hash) } + .interp : AT(ADDR(.interp) - LOAD_OFFSET) { *(.interp) } + .rela.dyn : AT(ADDR(.rela.dyn) - LOAD_OFFSET) + { + __rela_dyn_start = .; + *(.rela*) + } +#endif + + /* freed after init ends here */ + . = ALIGN(PAGE_SIZE); + __init_end = .; + +/* + * And now the various read/write data + */ + + . = ALIGN(PAGE_SIZE); + _sdata = .; + +#ifdef CONFIG_PPC32 + .data : AT(ADDR(.data) - LOAD_OFFSET) { + DATA_DATA + *(.sdata) + *(.got.plt) *(.got) + } +#else + .data : AT(ADDR(.data) - LOAD_OFFSET) { + DATA_DATA + *(.data.rel*) + *(.toc1) + *(.branch_lt) + } + + .opd : AT(ADDR(.opd) - LOAD_OFFSET) { + *(.opd) + } + + .got : AT(ADDR(.got) - LOAD_OFFSET) { + __toc_start = .; + *(.got) + *(.toc) + } +#endif + + /* The initial task and kernel stack */ + INIT_TASK_DATA_SECTION(THREAD_SIZE) + + .data..page_aligned : AT(ADDR(.data..page_aligned) - LOAD_OFFSET) { + PAGE_ALIGNED_DATA(PAGE_SIZE) + } + + .data..cacheline_aligned : AT(ADDR(.data..cacheline_aligned) - LOAD_OFFSET) { + CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES) + } + + .data..read_mostly : AT(ADDR(.data..read_mostly) - LOAD_OFFSET) { + READ_MOSTLY_DATA(L1_CACHE_BYTES) + } + + . = ALIGN(PAGE_SIZE); + .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { + NOSAVE_DATA + } + + . = ALIGN(PAGE_SIZE); + _edata = .; + PROVIDE32 (edata = .); + +/* + * And finally the bss + */ + + BSS_SECTION(0, 0, 0) + + . = ALIGN(PAGE_SIZE); + _end = . ; + PROVIDE32 (end = .); + + /* Sections to be discarded. */ + DISCARDS +} |