summaryrefslogtreecommitdiff
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/Makefile165
-rw-r--r--arch/powerpc/kernel/align.c973
-rw-r--r--arch/powerpc/kernel/asm-offsets.c619
-rw-r--r--arch/powerpc/kernel/audit.c83
-rw-r--r--arch/powerpc/kernel/btext.c922
-rw-r--r--arch/powerpc/kernel/cacheinfo.c838
-rw-r--r--arch/powerpc/kernel/cacheinfo.h8
-rw-r--r--arch/powerpc/kernel/clock.c82
-rw-r--r--arch/powerpc/kernel/compat_audit.c43
-rw-r--r--arch/powerpc/kernel/cpu_setup_44x.S74
-rw-r--r--arch/powerpc/kernel/cpu_setup_6xx.S489
-rw-r--r--arch/powerpc/kernel/cpu_setup_a2.S120
-rw-r--r--arch/powerpc/kernel/cpu_setup_fsl_booke.S98
-rw-r--r--arch/powerpc/kernel/cpu_setup_pa6t.S44
-rw-r--r--arch/powerpc/kernel/cpu_setup_power7.S95
-rw-r--r--arch/powerpc/kernel/cpu_setup_ppc970.S210
-rw-r--r--arch/powerpc/kernel/cputable.c2183
-rw-r--r--arch/powerpc/kernel/crash.c368
-rw-r--r--arch/powerpc/kernel/crash_dump.c159
-rw-r--r--arch/powerpc/kernel/dbell.c53
-rw-r--r--arch/powerpc/kernel/dma-iommu.c119
-rw-r--r--arch/powerpc/kernel/dma-swiotlb.c106
-rw-r--r--arch/powerpc/kernel/dma.c230
-rw-r--r--arch/powerpc/kernel/entry_32.S1412
-rw-r--r--arch/powerpc/kernel/entry_64.S1152
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S1326
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S1066
-rw-r--r--arch/powerpc/kernel/fadump.c1317
-rw-r--r--arch/powerpc/kernel/firmware.c22
-rw-r--r--arch/powerpc/kernel/fpu.S177
-rw-r--r--arch/powerpc/kernel/fsl_booke_entry_mapping.S235
-rw-r--r--arch/powerpc/kernel/ftrace.c610
-rw-r--r--arch/powerpc/kernel/head_32.S1296
-rw-r--r--arch/powerpc/kernel/head_40x.S998
-rw-r--r--arch/powerpc/kernel/head_44x.S1285
-rw-r--r--arch/powerpc/kernel/head_64.S789
-rw-r--r--arch/powerpc/kernel/head_8xx.S979
-rw-r--r--arch/powerpc/kernel/head_booke.h427
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S1084
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c363
-rw-r--r--arch/powerpc/kernel/ibmebus.c760
-rw-r--r--arch/powerpc/kernel/idle.c168
-rw-r--r--arch/powerpc/kernel/idle_6xx.S197
-rw-r--r--arch/powerpc/kernel/idle_book3e.S73
-rw-r--r--arch/powerpc/kernel/idle_e500.S106
-rw-r--r--arch/powerpc/kernel/idle_power4.S73
-rw-r--r--arch/powerpc/kernel/idle_power7.S116
-rw-r--r--arch/powerpc/kernel/init_task.c29
-rw-r--r--arch/powerpc/kernel/io-workarounds.c189
-rw-r--r--arch/powerpc/kernel/io.c207
-rw-r--r--arch/powerpc/kernel/iomap.c132
-rw-r--r--arch/powerpc/kernel/iommu.c685
-rw-r--r--arch/powerpc/kernel/irq.c680
-rw-r--r--arch/powerpc/kernel/isa-bridge.c266
-rw-r--r--arch/powerpc/kernel/jump_label.c25
-rw-r--r--arch/powerpc/kernel/kgdb.c473
-rw-r--r--arch/powerpc/kernel/kprobes.c574
-rw-r--r--arch/powerpc/kernel/kvm.c811
-rw-r--r--arch/powerpc/kernel/kvm_emul.S358
-rw-r--r--arch/powerpc/kernel/l2cr_6xx.S470
-rw-r--r--arch/powerpc/kernel/legacy_serial.c616
-rw-r--r--arch/powerpc/kernel/lparcfg.c718
-rw-r--r--arch/powerpc/kernel/machine_kexec.c251
-rw-r--r--arch/powerpc/kernel/machine_kexec_32.c69
-rw-r--r--arch/powerpc/kernel/machine_kexec_64.c404
-rw-r--r--arch/powerpc/kernel/misc.S123
-rw-r--r--arch/powerpc/kernel/misc_32.S1010
-rw-r--r--arch/powerpc/kernel/misc_64.S633
-rw-r--r--arch/powerpc/kernel/module.c80
-rw-r--r--arch/powerpc/kernel/module_32.c308
-rw-r--r--arch/powerpc/kernel/module_64.c448
-rw-r--r--arch/powerpc/kernel/msi.c43
-rw-r--r--arch/powerpc/kernel/nvram_64.c564
-rw-r--r--arch/powerpc/kernel/of_platform.c126
-rw-r--r--arch/powerpc/kernel/paca.c219
-rw-r--r--arch/powerpc/kernel/pci-common.c1702
-rw-r--r--arch/powerpc/kernel/pci_32.c310
-rw-r--r--arch/powerpc/kernel/pci_64.c272
-rw-r--r--arch/powerpc/kernel/pci_dn.c165
-rw-r--r--arch/powerpc/kernel/pci_of_scan.c373
-rw-r--r--arch/powerpc/kernel/pmc.c102
-rw-r--r--arch/powerpc/kernel/ppc32.h139
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c192
-rw-r--r--arch/powerpc/kernel/ppc_save_regs.S75
-rw-r--r--arch/powerpc/kernel/proc_powerpc.c125
-rw-r--r--arch/powerpc/kernel/process.c1339
-rw-r--r--arch/powerpc/kernel/prom.c891
-rw-r--r--arch/powerpc/kernel/prom_init.c2997
-rw-r--r--arch/powerpc/kernel/prom_init_check.sh80
-rw-r--r--arch/powerpc/kernel/prom_parse.c34
-rw-r--r--arch/powerpc/kernel/ptrace.c1757
-rw-r--r--arch/powerpc/kernel/ptrace32.c337
-rw-r--r--arch/powerpc/kernel/reloc_32.S208
-rw-r--r--arch/powerpc/kernel/reloc_64.S87
-rw-r--r--arch/powerpc/kernel/rtas-proc.c790
-rw-r--r--arch/powerpc/kernel/rtas-rtc.c112
-rw-r--r--arch/powerpc/kernel/rtas.c1088
-rw-r--r--arch/powerpc/kernel/rtas_flash.c807
-rw-r--r--arch/powerpc/kernel/rtas_pci.c304
-rw-r--r--arch/powerpc/kernel/rtasd.c555
-rw-r--r--arch/powerpc/kernel/setup-common.c733
-rw-r--r--arch/powerpc/kernel/setup.h9
-rw-r--r--arch/powerpc/kernel/setup_32.c354
-rw-r--r--arch/powerpc/kernel/setup_64.c691
-rw-r--r--arch/powerpc/kernel/signal.c206
-rw-r--r--arch/powerpc/kernel/signal.h57
-rw-r--r--arch/powerpc/kernel/signal_32.c1306
-rw-r--r--arch/powerpc/kernel/signal_64.c479
-rw-r--r--arch/powerpc/kernel/smp-tbsync.c171
-rw-r--r--arch/powerpc/kernel/smp.c785
-rw-r--r--arch/powerpc/kernel/softemu8xx.c199
-rw-r--r--arch/powerpc/kernel/stacktrace.c63
-rw-r--r--arch/powerpc/kernel/suspend.c25
-rw-r--r--arch/powerpc/kernel/swsusp.c38
-rw-r--r--arch/powerpc/kernel/swsusp_32.S350
-rw-r--r--arch/powerpc/kernel/swsusp_64.c24
-rw-r--r--arch/powerpc/kernel/swsusp_asm64.S228
-rw-r--r--arch/powerpc/kernel/swsusp_booke.S193
-rw-r--r--arch/powerpc/kernel/sys_ppc32.c626
-rw-r--r--arch/powerpc/kernel/syscalls.c138
-rw-r--r--arch/powerpc/kernel/sysfs.c666
-rw-r--r--arch/powerpc/kernel/systbl.S47
-rw-r--r--arch/powerpc/kernel/systbl_chk.c58
-rw-r--r--arch/powerpc/kernel/systbl_chk.sh33
-rw-r--r--arch/powerpc/kernel/tau_6xx.c270
-rw-r--r--arch/powerpc/kernel/time.c1036
-rw-r--r--arch/powerpc/kernel/traps.c1627
-rw-r--r--arch/powerpc/kernel/udbg.c206
-rw-r--r--arch/powerpc/kernel/udbg_16550.c351
-rw-r--r--arch/powerpc/kernel/vdso.c829
-rw-r--r--arch/powerpc/kernel/vdso32/Makefile56
-rw-r--r--arch/powerpc/kernel/vdso32/cacheflush.S85
-rw-r--r--arch/powerpc/kernel/vdso32/datapage.S85
-rw-r--r--arch/powerpc/kernel/vdso32/gettimeofday.S266
-rw-r--r--arch/powerpc/kernel/vdso32/note.S25
-rw-r--r--arch/powerpc/kernel/vdso32/sigtramp.S299
-rw-r--r--arch/powerpc/kernel/vdso32/vdso32.lds.S153
-rw-r--r--arch/powerpc/kernel/vdso32/vdso32_wrapper.S14
-rw-r--r--arch/powerpc/kernel/vdso64/Makefile51
-rw-r--r--arch/powerpc/kernel/vdso64/cacheflush.S84
-rw-r--r--arch/powerpc/kernel/vdso64/datapage.S85
-rw-r--r--arch/powerpc/kernel/vdso64/gettimeofday.S218
-rw-r--r--arch/powerpc/kernel/vdso64/note.S1
-rw-r--r--arch/powerpc/kernel/vdso64/sigtramp.S297
-rw-r--r--arch/powerpc/kernel/vdso64/vdso64.lds.S152
-rw-r--r--arch/powerpc/kernel/vdso64/vdso64_wrapper.S14
-rw-r--r--arch/powerpc/kernel/vecemu.c345
-rw-r--r--arch/powerpc/kernel/vector.S407
-rw-r--r--arch/powerpc/kernel/vio.c1467
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S262
150 files changed, 65628 insertions, 0 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
new file mode 100644
index 00000000..f5808a35
--- /dev/null
+++ b/arch/powerpc/kernel/Makefile
@@ -0,0 +1,165 @@
+#
+# Makefile for the linux kernel.
+#
+
+CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
+
+subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
+
+ifeq ($(CONFIG_PPC64),y)
+CFLAGS_prom_init.o += -mno-minimal-toc
+endif
+ifeq ($(CONFIG_PPC32),y)
+CFLAGS_prom_init.o += -fPIC
+CFLAGS_btext.o += -fPIC
+endif
+
+ifdef CONFIG_FUNCTION_TRACER
+# Do not trace early boot code
+CFLAGS_REMOVE_cputable.o = -pg -mno-sched-epilog
+CFLAGS_REMOVE_prom_init.o = -pg -mno-sched-epilog
+CFLAGS_REMOVE_btext.o = -pg -mno-sched-epilog
+CFLAGS_REMOVE_prom.o = -pg -mno-sched-epilog
+# do not trace tracer code
+CFLAGS_REMOVE_ftrace.o = -pg -mno-sched-epilog
+# timers used by tracing
+CFLAGS_REMOVE_time.o = -pg -mno-sched-epilog
+endif
+
+obj-y := cputable.o ptrace.o syscalls.o \
+ irq.o align.o signal_32.o pmc.o vdso.o \
+ init_task.o process.o systbl.o idle.o \
+ signal.o sysfs.o cacheinfo.o time.o \
+ prom.o traps.o setup-common.o \
+ udbg.o misc.o io.o dma.o \
+ misc_$(CONFIG_WORD_SIZE).o vdso32/
+obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
+ signal_64.o ptrace32.o \
+ paca.o nvram_64.o firmware.o
+obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
+obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
+obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power7.o
+obj64-$(CONFIG_RELOCATABLE) += reloc_64.o
+obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o
+obj-$(CONFIG_PPC_A2) += cpu_setup_a2.o
+obj-$(CONFIG_PPC64) += vdso64/
+obj-$(CONFIG_ALTIVEC) += vecemu.o
+obj-$(CONFIG_PPC_970_NAP) += idle_power4.o
+obj-$(CONFIG_PPC_P7_NAP) += idle_power7.o
+obj-$(CONFIG_PPC_OF) += of_platform.o prom_parse.o
+obj-$(CONFIG_PPC_CLOCK) += clock.o
+procfs-y := proc_powerpc.o
+obj-$(CONFIG_PROC_FS) += $(procfs-y)
+rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI) := rtas_pci.o
+obj-$(CONFIG_PPC_RTAS) += rtas.o rtas-rtc.o $(rtaspci-y-y)
+obj-$(CONFIG_PPC_RTAS_DAEMON) += rtasd.o
+obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o
+obj-$(CONFIG_RTAS_PROC) += rtas-proc.o
+obj-$(CONFIG_LPARCFG) += lparcfg.o
+obj-$(CONFIG_IBMVIO) += vio.o
+obj-$(CONFIG_IBMEBUS) += ibmebus.o
+obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
+obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
+obj-$(CONFIG_FA_DUMP) += fadump.o
+ifeq ($(CONFIG_PPC32),y)
+obj-$(CONFIG_E500) += idle_e500.o
+endif
+obj-$(CONFIG_6xx) += idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o
+obj-$(CONFIG_TAU) += tau_6xx.o
+obj-$(CONFIG_HIBERNATION) += swsusp.o suspend.o
+ifeq ($(CONFIG_FSL_BOOKE),y)
+obj-$(CONFIG_HIBERNATION) += swsusp_booke.o
+else
+obj-$(CONFIG_HIBERNATION) += swsusp_$(CONFIG_WORD_SIZE).o
+endif
+obj64-$(CONFIG_HIBERNATION) += swsusp_asm64.o
+obj-$(CONFIG_MODULES) += module.o module_$(CONFIG_WORD_SIZE).o
+obj-$(CONFIG_44x) += cpu_setup_44x.o
+obj-$(CONFIG_PPC_FSL_BOOK3E) += cpu_setup_fsl_booke.o dbell.o
+obj-$(CONFIG_PPC_BOOK3E_64) += dbell.o
+obj-$(CONFIG_JUMP_LABEL) += jump_label.o
+
+extra-y := head_$(CONFIG_WORD_SIZE).o
+extra-$(CONFIG_40x) := head_40x.o
+extra-$(CONFIG_44x) := head_44x.o
+extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o
+extra-$(CONFIG_8xx) := head_8xx.o
+extra-y += vmlinux.lds
+
+obj-$(CONFIG_RELOCATABLE_PPC32) += reloc_32.o
+
+obj-$(CONFIG_PPC32) += entry_32.o setup_32.o
+obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o
+obj-$(CONFIG_KGDB) += kgdb.o
+obj-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init.o
+obj-$(CONFIG_MODULES) += ppc_ksyms.o
+obj-$(CONFIG_BOOTX_TEXT) += btext.o
+obj-$(CONFIG_SMP) += smp.o
+obj-$(CONFIG_KPROBES) += kprobes.o
+obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o
+obj-$(CONFIG_STACKTRACE) += stacktrace.o
+obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o
+
+pci64-$(CONFIG_PPC64) += pci_dn.o isa-bridge.o
+obj-$(CONFIG_PCI) += pci_$(CONFIG_WORD_SIZE).o $(pci64-y) \
+ pci-common.o pci_of_scan.o
+obj-$(CONFIG_PCI_MSI) += msi.o
+obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o \
+ machine_kexec_$(CONFIG_WORD_SIZE).o
+obj-$(CONFIG_AUDIT) += audit.o
+obj64-$(CONFIG_AUDIT) += compat_audit.o
+
+obj-$(CONFIG_PPC_IO_WORKAROUNDS) += io-workarounds.o
+
+obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
+obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
+obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
+
+obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o
+
+ifneq ($(CONFIG_PPC_INDIRECT_IO),y)
+obj-y += iomap.o
+endif
+
+obj-$(CONFIG_PPC64) += $(obj64-y)
+obj-$(CONFIG_PPC32) += $(obj32-y)
+
+ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),)
+obj-y += ppc_save_regs.o
+endif
+
+obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o
+
+# Disable GCOV in odd or sensitive code
+GCOV_PROFILE_prom_init.o := n
+GCOV_PROFILE_ftrace.o := n
+GCOV_PROFILE_machine_kexec_64.o := n
+GCOV_PROFILE_machine_kexec_32.o := n
+GCOV_PROFILE_kprobes.o := n
+
+extra-$(CONFIG_PPC_FPU) += fpu.o
+extra-$(CONFIG_ALTIVEC) += vector.o
+extra-$(CONFIG_PPC64) += entry_64.o
+
+extra-y += systbl_chk.i
+$(obj)/systbl.o: systbl_chk
+
+quiet_cmd_systbl_chk = CALL $<
+ cmd_systbl_chk = $(CONFIG_SHELL) $< $(obj)/systbl_chk.i
+
+PHONY += systbl_chk
+systbl_chk: $(src)/systbl_chk.sh $(obj)/systbl_chk.i
+ $(call cmd,systbl_chk)
+
+ifeq ($(CONFIG_PPC_OF_BOOT_TRAMPOLINE),y)
+$(obj)/built-in.o: prom_init_check
+
+quiet_cmd_prom_init_check = CALL $<
+ cmd_prom_init_check = $(CONFIG_SHELL) $< "$(NM)" "$(obj)/prom_init.o"
+
+PHONY += prom_init_check
+prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o
+ $(call cmd,prom_init_check)
+endif
+
+clean-files := vmlinux.lds
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
new file mode 100644
index 00000000..ee5b690a
--- /dev/null
+++ b/arch/powerpc/kernel/align.c
@@ -0,0 +1,973 @@
+/* align.c - handle alignment exceptions for the Power PC.
+ *
+ * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
+ * Copyright (c) 1998-1999 TiVo, Inc.
+ * PowerPC 403GCX modifications.
+ * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
+ * PowerPC 403GCX/405GP modifications.
+ * Copyright (c) 2001-2002 PPC64 team, IBM Corp
+ * 64-bit and Power4 support
+ * Copyright (c) 2005 Benjamin Herrenschmidt, IBM Corp
+ * <benh@kernel.crashing.org>
+ * Merge ppc32 and ppc64 implementations
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+#include <asm/cache.h>
+#include <asm/cputable.h>
+#include <asm/emulated_ops.h>
+#include <asm/switch_to.h>
+
+struct aligninfo {
+ unsigned char len;
+ unsigned char flags;
+};
+
+#define IS_XFORM(inst) (((inst) >> 26) == 31)
+#define IS_DSFORM(inst) (((inst) >> 26) >= 56)
+
+#define INVALID { 0, 0 }
+
+/* Bits in the flags field */
+#define LD 0 /* load */
+#define ST 1 /* store */
+#define SE 2 /* sign-extend value, or FP ld/st as word */
+#define F 4 /* to/from fp regs */
+#define U 8 /* update index register */
+#define M 0x10 /* multiple load/store */
+#define SW 0x20 /* byte swap */
+#define S 0x40 /* single-precision fp or... */
+#define SX 0x40 /* ... byte count in XER */
+#define HARD 0x80 /* string, stwcx. */
+#define E4 0x40 /* SPE endianness is word */
+#define E8 0x80 /* SPE endianness is double word */
+#define SPLT 0x80 /* VSX SPLAT load */
+
+/* DSISR bits reported for a DCBZ instruction: */
+#define DCBZ 0x5f /* 8xx/82xx dcbz faults when cache not enabled */
+
+#define SWAP(a, b) (t = (a), (a) = (b), (b) = t)
+
+/*
+ * The PowerPC stores certain bits of the instruction that caused the
+ * alignment exception in the DSISR register. This array maps those
+ * bits to information about the operand length and what the
+ * instruction would do.
+ */
+static struct aligninfo aligninfo[128] = {
+ { 4, LD }, /* 00 0 0000: lwz / lwarx */
+ INVALID, /* 00 0 0001 */
+ { 4, ST }, /* 00 0 0010: stw */
+ INVALID, /* 00 0 0011 */
+ { 2, LD }, /* 00 0 0100: lhz */
+ { 2, LD+SE }, /* 00 0 0101: lha */
+ { 2, ST }, /* 00 0 0110: sth */
+ { 4, LD+M }, /* 00 0 0111: lmw */
+ { 4, LD+F+S }, /* 00 0 1000: lfs */
+ { 8, LD+F }, /* 00 0 1001: lfd */
+ { 4, ST+F+S }, /* 00 0 1010: stfs */
+ { 8, ST+F }, /* 00 0 1011: stfd */
+ INVALID, /* 00 0 1100 */
+ { 8, LD }, /* 00 0 1101: ld/ldu/lwa */
+ INVALID, /* 00 0 1110 */
+ { 8, ST }, /* 00 0 1111: std/stdu */
+ { 4, LD+U }, /* 00 1 0000: lwzu */
+ INVALID, /* 00 1 0001 */
+ { 4, ST+U }, /* 00 1 0010: stwu */
+ INVALID, /* 00 1 0011 */
+ { 2, LD+U }, /* 00 1 0100: lhzu */
+ { 2, LD+SE+U }, /* 00 1 0101: lhau */
+ { 2, ST+U }, /* 00 1 0110: sthu */
+ { 4, ST+M }, /* 00 1 0111: stmw */
+ { 4, LD+F+S+U }, /* 00 1 1000: lfsu */
+ { 8, LD+F+U }, /* 00 1 1001: lfdu */
+ { 4, ST+F+S+U }, /* 00 1 1010: stfsu */
+ { 8, ST+F+U }, /* 00 1 1011: stfdu */
+ { 16, LD+F }, /* 00 1 1100: lfdp */
+ INVALID, /* 00 1 1101 */
+ { 16, ST+F }, /* 00 1 1110: stfdp */
+ INVALID, /* 00 1 1111 */
+ { 8, LD }, /* 01 0 0000: ldx */
+ INVALID, /* 01 0 0001 */
+ { 8, ST }, /* 01 0 0010: stdx */
+ INVALID, /* 01 0 0011 */
+ INVALID, /* 01 0 0100 */
+ { 4, LD+SE }, /* 01 0 0101: lwax */
+ INVALID, /* 01 0 0110 */
+ INVALID, /* 01 0 0111 */
+ { 4, LD+M+HARD+SX }, /* 01 0 1000: lswx */
+ { 4, LD+M+HARD }, /* 01 0 1001: lswi */
+ { 4, ST+M+HARD+SX }, /* 01 0 1010: stswx */
+ { 4, ST+M+HARD }, /* 01 0 1011: stswi */
+ INVALID, /* 01 0 1100 */
+ { 8, LD+U }, /* 01 0 1101: ldu */
+ INVALID, /* 01 0 1110 */
+ { 8, ST+U }, /* 01 0 1111: stdu */
+ { 8, LD+U }, /* 01 1 0000: ldux */
+ INVALID, /* 01 1 0001 */
+ { 8, ST+U }, /* 01 1 0010: stdux */
+ INVALID, /* 01 1 0011 */
+ INVALID, /* 01 1 0100 */
+ { 4, LD+SE+U }, /* 01 1 0101: lwaux */
+ INVALID, /* 01 1 0110 */
+ INVALID, /* 01 1 0111 */
+ INVALID, /* 01 1 1000 */
+ INVALID, /* 01 1 1001 */
+ INVALID, /* 01 1 1010 */
+ INVALID, /* 01 1 1011 */
+ INVALID, /* 01 1 1100 */
+ INVALID, /* 01 1 1101 */
+ INVALID, /* 01 1 1110 */
+ INVALID, /* 01 1 1111 */
+ INVALID, /* 10 0 0000 */
+ INVALID, /* 10 0 0001 */
+ INVALID, /* 10 0 0010: stwcx. */
+ INVALID, /* 10 0 0011 */
+ INVALID, /* 10 0 0100 */
+ INVALID, /* 10 0 0101 */
+ INVALID, /* 10 0 0110 */
+ INVALID, /* 10 0 0111 */
+ { 4, LD+SW }, /* 10 0 1000: lwbrx */
+ INVALID, /* 10 0 1001 */
+ { 4, ST+SW }, /* 10 0 1010: stwbrx */
+ INVALID, /* 10 0 1011 */
+ { 2, LD+SW }, /* 10 0 1100: lhbrx */
+ { 4, LD+SE }, /* 10 0 1101 lwa */
+ { 2, ST+SW }, /* 10 0 1110: sthbrx */
+ INVALID, /* 10 0 1111 */
+ INVALID, /* 10 1 0000 */
+ INVALID, /* 10 1 0001 */
+ INVALID, /* 10 1 0010 */
+ INVALID, /* 10 1 0011 */
+ INVALID, /* 10 1 0100 */
+ INVALID, /* 10 1 0101 */
+ INVALID, /* 10 1 0110 */
+ INVALID, /* 10 1 0111 */
+ INVALID, /* 10 1 1000 */
+ INVALID, /* 10 1 1001 */
+ INVALID, /* 10 1 1010 */
+ INVALID, /* 10 1 1011 */
+ INVALID, /* 10 1 1100 */
+ INVALID, /* 10 1 1101 */
+ INVALID, /* 10 1 1110 */
+ { 0, ST+HARD }, /* 10 1 1111: dcbz */
+ { 4, LD }, /* 11 0 0000: lwzx */
+ INVALID, /* 11 0 0001 */
+ { 4, ST }, /* 11 0 0010: stwx */
+ INVALID, /* 11 0 0011 */
+ { 2, LD }, /* 11 0 0100: lhzx */
+ { 2, LD+SE }, /* 11 0 0101: lhax */
+ { 2, ST }, /* 11 0 0110: sthx */
+ INVALID, /* 11 0 0111 */
+ { 4, LD+F+S }, /* 11 0 1000: lfsx */
+ { 8, LD+F }, /* 11 0 1001: lfdx */
+ { 4, ST+F+S }, /* 11 0 1010: stfsx */
+ { 8, ST+F }, /* 11 0 1011: stfdx */
+ { 16, LD+F }, /* 11 0 1100: lfdpx */
+ { 4, LD+F+SE }, /* 11 0 1101: lfiwax */
+ { 16, ST+F }, /* 11 0 1110: stfdpx */
+ { 4, ST+F }, /* 11 0 1111: stfiwx */
+ { 4, LD+U }, /* 11 1 0000: lwzux */
+ INVALID, /* 11 1 0001 */
+ { 4, ST+U }, /* 11 1 0010: stwux */
+ INVALID, /* 11 1 0011 */
+ { 2, LD+U }, /* 11 1 0100: lhzux */
+ { 2, LD+SE+U }, /* 11 1 0101: lhaux */
+ { 2, ST+U }, /* 11 1 0110: sthux */
+ INVALID, /* 11 1 0111 */
+ { 4, LD+F+S+U }, /* 11 1 1000: lfsux */
+ { 8, LD+F+U }, /* 11 1 1001: lfdux */
+ { 4, ST+F+S+U }, /* 11 1 1010: stfsux */
+ { 8, ST+F+U }, /* 11 1 1011: stfdux */
+ INVALID, /* 11 1 1100 */
+ { 4, LD+F }, /* 11 1 1101: lfiwzx */
+ INVALID, /* 11 1 1110 */
+ INVALID, /* 11 1 1111 */
+};
+
+/*
+ * Create a DSISR value from the instruction
+ */
+static inline unsigned make_dsisr(unsigned instr)
+{
+ unsigned dsisr;
+
+
+ /* bits 6:15 --> 22:31 */
+ dsisr = (instr & 0x03ff0000) >> 16;
+
+ if (IS_XFORM(instr)) {
+ /* bits 29:30 --> 15:16 */
+ dsisr |= (instr & 0x00000006) << 14;
+ /* bit 25 --> 17 */
+ dsisr |= (instr & 0x00000040) << 8;
+ /* bits 21:24 --> 18:21 */
+ dsisr |= (instr & 0x00000780) << 3;
+ } else {
+ /* bit 5 --> 17 */
+ dsisr |= (instr & 0x04000000) >> 12;
+ /* bits 1: 4 --> 18:21 */
+ dsisr |= (instr & 0x78000000) >> 17;
+ /* bits 30:31 --> 12:13 */
+ if (IS_DSFORM(instr))
+ dsisr |= (instr & 0x00000003) << 18;
+ }
+
+ return dsisr;
+}
+
+/*
+ * The dcbz (data cache block zero) instruction
+ * gives an alignment fault if used on non-cacheable
+ * memory. We handle the fault mainly for the
+ * case when we are running with the cache disabled
+ * for debugging.
+ */
+static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr)
+{
+ long __user *p;
+ int i, size;
+
+#ifdef __powerpc64__
+ size = ppc64_caches.dline_size;
+#else
+ size = L1_CACHE_BYTES;
+#endif
+ p = (long __user *) (regs->dar & -size);
+ if (user_mode(regs) && !access_ok(VERIFY_WRITE, p, size))
+ return -EFAULT;
+ for (i = 0; i < size / sizeof(long); ++i)
+ if (__put_user_inatomic(0, p+i))
+ return -EFAULT;
+ return 1;
+}
+
+/*
+ * Emulate load & store multiple instructions
+ * On 64-bit machines, these instructions only affect/use the
+ * bottom 4 bytes of each register, and the loads clear the
+ * top 4 bytes of the affected register.
+ */
+#ifdef CONFIG_PPC64
+#define REG_BYTE(rp, i) *((u8 *)((rp) + ((i) >> 2)) + ((i) & 3) + 4)
+#else
+#define REG_BYTE(rp, i) *((u8 *)(rp) + (i))
+#endif
+
+#define SWIZ_PTR(p) ((unsigned char __user *)((p) ^ swiz))
+
+static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr,
+ unsigned int reg, unsigned int nb,
+ unsigned int flags, unsigned int instr,
+ unsigned long swiz)
+{
+ unsigned long *rptr;
+ unsigned int nb0, i, bswiz;
+ unsigned long p;
+
+ /*
+ * We do not try to emulate 8 bytes multiple as they aren't really
+ * available in our operating environments and we don't try to
+ * emulate multiples operations in kernel land as they should never
+ * be used/generated there at least not on unaligned boundaries
+ */
+ if (unlikely((nb > 4) || !user_mode(regs)))
+ return 0;
+
+ /* lmw, stmw, lswi/x, stswi/x */
+ nb0 = 0;
+ if (flags & HARD) {
+ if (flags & SX) {
+ nb = regs->xer & 127;
+ if (nb == 0)
+ return 1;
+ } else {
+ unsigned long pc = regs->nip ^ (swiz & 4);
+
+ if (__get_user_inatomic(instr,
+ (unsigned int __user *)pc))
+ return -EFAULT;
+ if (swiz == 0 && (flags & SW))
+ instr = cpu_to_le32(instr);
+ nb = (instr >> 11) & 0x1f;
+ if (nb == 0)
+ nb = 32;
+ }
+ if (nb + reg * 4 > 128) {
+ nb0 = nb + reg * 4 - 128;
+ nb = 128 - reg * 4;
+ }
+ } else {
+ /* lwm, stmw */
+ nb = (32 - reg) * 4;
+ }
+
+ if (!access_ok((flags & ST ? VERIFY_WRITE: VERIFY_READ), addr, nb+nb0))
+ return -EFAULT; /* bad address */
+
+ rptr = &regs->gpr[reg];
+ p = (unsigned long) addr;
+ bswiz = (flags & SW)? 3: 0;
+
+ if (!(flags & ST)) {
+ /*
+ * This zeroes the top 4 bytes of the affected registers
+ * in 64-bit mode, and also zeroes out any remaining
+ * bytes of the last register for lsw*.
+ */
+ memset(rptr, 0, ((nb + 3) / 4) * sizeof(unsigned long));
+ if (nb0 > 0)
+ memset(&regs->gpr[0], 0,
+ ((nb0 + 3) / 4) * sizeof(unsigned long));
+
+ for (i = 0; i < nb; ++i, ++p)
+ if (__get_user_inatomic(REG_BYTE(rptr, i ^ bswiz),
+ SWIZ_PTR(p)))
+ return -EFAULT;
+ if (nb0 > 0) {
+ rptr = &regs->gpr[0];
+ addr += nb;
+ for (i = 0; i < nb0; ++i, ++p)
+ if (__get_user_inatomic(REG_BYTE(rptr,
+ i ^ bswiz),
+ SWIZ_PTR(p)))
+ return -EFAULT;
+ }
+
+ } else {
+ for (i = 0; i < nb; ++i, ++p)
+ if (__put_user_inatomic(REG_BYTE(rptr, i ^ bswiz),
+ SWIZ_PTR(p)))
+ return -EFAULT;
+ if (nb0 > 0) {
+ rptr = &regs->gpr[0];
+ addr += nb;
+ for (i = 0; i < nb0; ++i, ++p)
+ if (__put_user_inatomic(REG_BYTE(rptr,
+ i ^ bswiz),
+ SWIZ_PTR(p)))
+ return -EFAULT;
+ }
+ }
+ return 1;
+}
+
+/*
+ * Emulate floating-point pair loads and stores.
+ * Only POWER6 has these instructions, and it does true little-endian,
+ * so we don't need the address swizzling.
+ */
+static int emulate_fp_pair(unsigned char __user *addr, unsigned int reg,
+ unsigned int flags)
+{
+ char *ptr0 = (char *) &current->thread.TS_FPR(reg);
+ char *ptr1 = (char *) &current->thread.TS_FPR(reg+1);
+ int i, ret, sw = 0;
+
+ if (!(flags & F))
+ return 0;
+ if (reg & 1)
+ return 0; /* invalid form: FRS/FRT must be even */
+ if (flags & SW)
+ sw = 7;
+ ret = 0;
+ for (i = 0; i < 8; ++i) {
+ if (!(flags & ST)) {
+ ret |= __get_user(ptr0[i^sw], addr + i);
+ ret |= __get_user(ptr1[i^sw], addr + i + 8);
+ } else {
+ ret |= __put_user(ptr0[i^sw], addr + i);
+ ret |= __put_user(ptr1[i^sw], addr + i + 8);
+ }
+ }
+ if (ret)
+ return -EFAULT;
+ return 1; /* exception handled and fixed up */
+}
+
+#ifdef CONFIG_SPE
+
+static struct aligninfo spe_aligninfo[32] = {
+ { 8, LD+E8 }, /* 0 00 00: evldd[x] */
+ { 8, LD+E4 }, /* 0 00 01: evldw[x] */
+ { 8, LD }, /* 0 00 10: evldh[x] */
+ INVALID, /* 0 00 11 */
+ { 2, LD }, /* 0 01 00: evlhhesplat[x] */
+ INVALID, /* 0 01 01 */
+ { 2, LD }, /* 0 01 10: evlhhousplat[x] */
+ { 2, LD+SE }, /* 0 01 11: evlhhossplat[x] */
+ { 4, LD }, /* 0 10 00: evlwhe[x] */
+ INVALID, /* 0 10 01 */
+ { 4, LD }, /* 0 10 10: evlwhou[x] */
+ { 4, LD+SE }, /* 0 10 11: evlwhos[x] */
+ { 4, LD+E4 }, /* 0 11 00: evlwwsplat[x] */
+ INVALID, /* 0 11 01 */
+ { 4, LD }, /* 0 11 10: evlwhsplat[x] */
+ INVALID, /* 0 11 11 */
+
+ { 8, ST+E8 }, /* 1 00 00: evstdd[x] */
+ { 8, ST+E4 }, /* 1 00 01: evstdw[x] */
+ { 8, ST }, /* 1 00 10: evstdh[x] */
+ INVALID, /* 1 00 11 */
+ INVALID, /* 1 01 00 */
+ INVALID, /* 1 01 01 */
+ INVALID, /* 1 01 10 */
+ INVALID, /* 1 01 11 */
+ { 4, ST }, /* 1 10 00: evstwhe[x] */
+ INVALID, /* 1 10 01 */
+ { 4, ST }, /* 1 10 10: evstwho[x] */
+ INVALID, /* 1 10 11 */
+ { 4, ST+E4 }, /* 1 11 00: evstwwe[x] */
+ INVALID, /* 1 11 01 */
+ { 4, ST+E4 }, /* 1 11 10: evstwwo[x] */
+ INVALID, /* 1 11 11 */
+};
+
+#define EVLDD 0x00
+#define EVLDW 0x01
+#define EVLDH 0x02
+#define EVLHHESPLAT 0x04
+#define EVLHHOUSPLAT 0x06
+#define EVLHHOSSPLAT 0x07
+#define EVLWHE 0x08
+#define EVLWHOU 0x0A
+#define EVLWHOS 0x0B
+#define EVLWWSPLAT 0x0C
+#define EVLWHSPLAT 0x0E
+#define EVSTDD 0x10
+#define EVSTDW 0x11
+#define EVSTDH 0x12
+#define EVSTWHE 0x18
+#define EVSTWHO 0x1A
+#define EVSTWWE 0x1C
+#define EVSTWWO 0x1E
+
+/*
+ * Emulate SPE loads and stores.
+ * Only Book-E has these instructions, and it does true little-endian,
+ * so we don't need the address swizzling.
+ */
+static int emulate_spe(struct pt_regs *regs, unsigned int reg,
+ unsigned int instr)
+{
+ int t, ret;
+ union {
+ u64 ll;
+ u32 w[2];
+ u16 h[4];
+ u8 v[8];
+ } data, temp;
+ unsigned char __user *p, *addr;
+ unsigned long *evr = &current->thread.evr[reg];
+ unsigned int nb, flags;
+
+ instr = (instr >> 1) & 0x1f;
+
+ /* DAR has the operand effective address */
+ addr = (unsigned char __user *)regs->dar;
+
+ nb = spe_aligninfo[instr].len;
+ flags = spe_aligninfo[instr].flags;
+
+ /* Verify the address of the operand */
+ if (unlikely(user_mode(regs) &&
+ !access_ok((flags & ST ? VERIFY_WRITE : VERIFY_READ),
+ addr, nb)))
+ return -EFAULT;
+
+ /* userland only */
+ if (unlikely(!user_mode(regs)))
+ return 0;
+
+ flush_spe_to_thread(current);
+
+ /* If we are loading, get the data from user space, else
+ * get it from register values
+ */
+ if (flags & ST) {
+ data.ll = 0;
+ switch (instr) {
+ case EVSTDD:
+ case EVSTDW:
+ case EVSTDH:
+ data.w[0] = *evr;
+ data.w[1] = regs->gpr[reg];
+ break;
+ case EVSTWHE:
+ data.h[2] = *evr >> 16;
+ data.h[3] = regs->gpr[reg] >> 16;
+ break;
+ case EVSTWHO:
+ data.h[2] = *evr & 0xffff;
+ data.h[3] = regs->gpr[reg] & 0xffff;
+ break;
+ case EVSTWWE:
+ data.w[1] = *evr;
+ break;
+ case EVSTWWO:
+ data.w[1] = regs->gpr[reg];
+ break;
+ default:
+ return -EINVAL;
+ }
+ } else {
+ temp.ll = data.ll = 0;
+ ret = 0;
+ p = addr;
+
+ switch (nb) {
+ case 8:
+ ret |= __get_user_inatomic(temp.v[0], p++);
+ ret |= __get_user_inatomic(temp.v[1], p++);
+ ret |= __get_user_inatomic(temp.v[2], p++);
+ ret |= __get_user_inatomic(temp.v[3], p++);
+ case 4:
+ ret |= __get_user_inatomic(temp.v[4], p++);
+ ret |= __get_user_inatomic(temp.v[5], p++);
+ case 2:
+ ret |= __get_user_inatomic(temp.v[6], p++);
+ ret |= __get_user_inatomic(temp.v[7], p++);
+ if (unlikely(ret))
+ return -EFAULT;
+ }
+
+ switch (instr) {
+ case EVLDD:
+ case EVLDW:
+ case EVLDH:
+ data.ll = temp.ll;
+ break;
+ case EVLHHESPLAT:
+ data.h[0] = temp.h[3];
+ data.h[2] = temp.h[3];
+ break;
+ case EVLHHOUSPLAT:
+ case EVLHHOSSPLAT:
+ data.h[1] = temp.h[3];
+ data.h[3] = temp.h[3];
+ break;
+ case EVLWHE:
+ data.h[0] = temp.h[2];
+ data.h[2] = temp.h[3];
+ break;
+ case EVLWHOU:
+ case EVLWHOS:
+ data.h[1] = temp.h[2];
+ data.h[3] = temp.h[3];
+ break;
+ case EVLWWSPLAT:
+ data.w[0] = temp.w[1];
+ data.w[1] = temp.w[1];
+ break;
+ case EVLWHSPLAT:
+ data.h[0] = temp.h[2];
+ data.h[1] = temp.h[2];
+ data.h[2] = temp.h[3];
+ data.h[3] = temp.h[3];
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ if (flags & SW) {
+ switch (flags & 0xf0) {
+ case E8:
+ SWAP(data.v[0], data.v[7]);
+ SWAP(data.v[1], data.v[6]);
+ SWAP(data.v[2], data.v[5]);
+ SWAP(data.v[3], data.v[4]);
+ break;
+ case E4:
+
+ SWAP(data.v[0], data.v[3]);
+ SWAP(data.v[1], data.v[2]);
+ SWAP(data.v[4], data.v[7]);
+ SWAP(data.v[5], data.v[6]);
+ break;
+ /* Its half word endian */
+ default:
+ SWAP(data.v[0], data.v[1]);
+ SWAP(data.v[2], data.v[3]);
+ SWAP(data.v[4], data.v[5]);
+ SWAP(data.v[6], data.v[7]);
+ break;
+ }
+ }
+
+ if (flags & SE) {
+ data.w[0] = (s16)data.h[1];
+ data.w[1] = (s16)data.h[3];
+ }
+
+ /* Store result to memory or update registers */
+ if (flags & ST) {
+ ret = 0;
+ p = addr;
+ switch (nb) {
+ case 8:
+ ret |= __put_user_inatomic(data.v[0], p++);
+ ret |= __put_user_inatomic(data.v[1], p++);
+ ret |= __put_user_inatomic(data.v[2], p++);
+ ret |= __put_user_inatomic(data.v[3], p++);
+ case 4:
+ ret |= __put_user_inatomic(data.v[4], p++);
+ ret |= __put_user_inatomic(data.v[5], p++);
+ case 2:
+ ret |= __put_user_inatomic(data.v[6], p++);
+ ret |= __put_user_inatomic(data.v[7], p++);
+ }
+ if (unlikely(ret))
+ return -EFAULT;
+ } else {
+ *evr = data.w[0];
+ regs->gpr[reg] = data.w[1];
+ }
+
+ return 1;
+}
+#endif /* CONFIG_SPE */
+
+#ifdef CONFIG_VSX
+/*
+ * Emulate VSX instructions...
+ */
+static int emulate_vsx(unsigned char __user *addr, unsigned int reg,
+ unsigned int areg, struct pt_regs *regs,
+ unsigned int flags, unsigned int length,
+ unsigned int elsize)
+{
+ char *ptr;
+ unsigned long *lptr;
+ int ret = 0;
+ int sw = 0;
+ int i, j;
+
+ flush_vsx_to_thread(current);
+
+ if (reg < 32)
+ ptr = (char *) &current->thread.TS_FPR(reg);
+ else
+ ptr = (char *) &current->thread.vr[reg - 32];
+
+ lptr = (unsigned long *) ptr;
+
+ if (flags & SW)
+ sw = elsize-1;
+
+ for (j = 0; j < length; j += elsize) {
+ for (i = 0; i < elsize; ++i) {
+ if (flags & ST)
+ ret |= __put_user(ptr[i^sw], addr + i);
+ else
+ ret |= __get_user(ptr[i^sw], addr + i);
+ }
+ ptr += elsize;
+ addr += elsize;
+ }
+
+ if (!ret) {
+ if (flags & U)
+ regs->gpr[areg] = regs->dar;
+
+ /* Splat load copies the same data to top and bottom 8 bytes */
+ if (flags & SPLT)
+ lptr[1] = lptr[0];
+ /* For 8 byte loads, zero the top 8 bytes */
+ else if (!(flags & ST) && (8 == length))
+ lptr[1] = 0;
+ } else
+ return -EFAULT;
+
+ return 1;
+}
+#endif
+
+/*
+ * Called on alignment exception. Attempts to fixup
+ *
+ * Return 1 on success
+ * Return 0 if unable to handle the interrupt
+ * Return -EFAULT if data address is bad
+ */
+
+int fix_alignment(struct pt_regs *regs)
+{
+ unsigned int instr, nb, flags, instruction = 0;
+ unsigned int reg, areg;
+ unsigned int dsisr;
+ unsigned char __user *addr;
+ unsigned long p, swiz;
+ int ret, t;
+ union {
+ u64 ll;
+ double dd;
+ unsigned char v[8];
+ struct {
+ unsigned hi32;
+ int low32;
+ } x32;
+ struct {
+ unsigned char hi48[6];
+ short low16;
+ } x16;
+ } data;
+
+ /*
+ * We require a complete register set, if not, then our assembly
+ * is broken
+ */
+ CHECK_FULL_REGS(regs);
+
+ dsisr = regs->dsisr;
+
+ /* Some processors don't provide us with a DSISR we can use here,
+ * let's make one up from the instruction
+ */
+ if (cpu_has_feature(CPU_FTR_NODSISRALIGN)) {
+ unsigned long pc = regs->nip;
+
+ if (cpu_has_feature(CPU_FTR_PPC_LE) && (regs->msr & MSR_LE))
+ pc ^= 4;
+ if (unlikely(__get_user_inatomic(instr,
+ (unsigned int __user *)pc)))
+ return -EFAULT;
+ if (cpu_has_feature(CPU_FTR_REAL_LE) && (regs->msr & MSR_LE))
+ instr = cpu_to_le32(instr);
+ dsisr = make_dsisr(instr);
+ instruction = instr;
+ }
+
+ /* extract the operation and registers from the dsisr */
+ reg = (dsisr >> 5) & 0x1f; /* source/dest register */
+ areg = dsisr & 0x1f; /* register to update */
+
+#ifdef CONFIG_SPE
+ if ((instr >> 26) == 0x4) {
+ PPC_WARN_ALIGNMENT(spe, regs);
+ return emulate_spe(regs, reg, instr);
+ }
+#endif
+
+ instr = (dsisr >> 10) & 0x7f;
+ instr |= (dsisr >> 13) & 0x60;
+
+ /* Lookup the operation in our table */
+ nb = aligninfo[instr].len;
+ flags = aligninfo[instr].flags;
+
+ /* Byteswap little endian loads and stores */
+ swiz = 0;
+ if (regs->msr & MSR_LE) {
+ flags ^= SW;
+ /*
+ * So-called "PowerPC little endian" mode works by
+ * swizzling addresses rather than by actually doing
+ * any byte-swapping. To emulate this, we XOR each
+ * byte address with 7. We also byte-swap, because
+ * the processor's address swizzling depends on the
+ * operand size (it xors the address with 7 for bytes,
+ * 6 for halfwords, 4 for words, 0 for doublewords) but
+ * we will xor with 7 and load/store each byte separately.
+ */
+ if (cpu_has_feature(CPU_FTR_PPC_LE))
+ swiz = 7;
+ }
+
+ /* DAR has the operand effective address */
+ addr = (unsigned char __user *)regs->dar;
+
+#ifdef CONFIG_VSX
+ if ((instruction & 0xfc00003e) == 0x7c000018) {
+ unsigned int elsize;
+
+ /* Additional register addressing bit (64 VSX vs 32 FPR/GPR) */
+ reg |= (instruction & 0x1) << 5;
+ /* Simple inline decoder instead of a table */
+ /* VSX has only 8 and 16 byte memory accesses */
+ nb = 8;
+ if (instruction & 0x200)
+ nb = 16;
+
+ /* Vector stores in little-endian mode swap individual
+ elements, so process them separately */
+ elsize = 4;
+ if (instruction & 0x80)
+ elsize = 8;
+
+ flags = 0;
+ if (regs->msr & MSR_LE)
+ flags |= SW;
+ if (instruction & 0x100)
+ flags |= ST;
+ if (instruction & 0x040)
+ flags |= U;
+ /* splat load needs a special decoder */
+ if ((instruction & 0x400) == 0){
+ flags |= SPLT;
+ nb = 8;
+ }
+ PPC_WARN_ALIGNMENT(vsx, regs);
+ return emulate_vsx(addr, reg, areg, regs, flags, nb, elsize);
+ }
+#endif
+ /* A size of 0 indicates an instruction we don't support, with
+ * the exception of DCBZ which is handled as a special case here
+ */
+ if (instr == DCBZ) {
+ PPC_WARN_ALIGNMENT(dcbz, regs);
+ return emulate_dcbz(regs, addr);
+ }
+ if (unlikely(nb == 0))
+ return 0;
+
+ /* Load/Store Multiple instructions are handled in their own
+ * function
+ */
+ if (flags & M) {
+ PPC_WARN_ALIGNMENT(multiple, regs);
+ return emulate_multiple(regs, addr, reg, nb,
+ flags, instr, swiz);
+ }
+
+ /* Verify the address of the operand */
+ if (unlikely(user_mode(regs) &&
+ !access_ok((flags & ST ? VERIFY_WRITE : VERIFY_READ),
+ addr, nb)))
+ return -EFAULT;
+
+ /* Force the fprs into the save area so we can reference them */
+ if (flags & F) {
+ /* userland only */
+ if (unlikely(!user_mode(regs)))
+ return 0;
+ flush_fp_to_thread(current);
+ }
+
+ /* Special case for 16-byte FP loads and stores */
+ if (nb == 16) {
+ PPC_WARN_ALIGNMENT(fp_pair, regs);
+ return emulate_fp_pair(addr, reg, flags);
+ }
+
+ PPC_WARN_ALIGNMENT(unaligned, regs);
+
+ /* If we are loading, get the data from user space, else
+ * get it from register values
+ */
+ if (!(flags & ST)) {
+ data.ll = 0;
+ ret = 0;
+ p = (unsigned long) addr;
+ switch (nb) {
+ case 8:
+ ret |= __get_user_inatomic(data.v[0], SWIZ_PTR(p++));
+ ret |= __get_user_inatomic(data.v[1], SWIZ_PTR(p++));
+ ret |= __get_user_inatomic(data.v[2], SWIZ_PTR(p++));
+ ret |= __get_user_inatomic(data.v[3], SWIZ_PTR(p++));
+ case 4:
+ ret |= __get_user_inatomic(data.v[4], SWIZ_PTR(p++));
+ ret |= __get_user_inatomic(data.v[5], SWIZ_PTR(p++));
+ case 2:
+ ret |= __get_user_inatomic(data.v[6], SWIZ_PTR(p++));
+ ret |= __get_user_inatomic(data.v[7], SWIZ_PTR(p++));
+ if (unlikely(ret))
+ return -EFAULT;
+ }
+ } else if (flags & F) {
+ data.dd = current->thread.TS_FPR(reg);
+ if (flags & S) {
+ /* Single-precision FP store requires conversion... */
+#ifdef CONFIG_PPC_FPU
+ preempt_disable();
+ enable_kernel_fp();
+ cvt_df(&data.dd, (float *)&data.v[4]);
+ preempt_enable();
+#else
+ return 0;
+#endif
+ }
+ } else
+ data.ll = regs->gpr[reg];
+
+ if (flags & SW) {
+ switch (nb) {
+ case 8:
+ SWAP(data.v[0], data.v[7]);
+ SWAP(data.v[1], data.v[6]);
+ SWAP(data.v[2], data.v[5]);
+ SWAP(data.v[3], data.v[4]);
+ break;
+ case 4:
+ SWAP(data.v[4], data.v[7]);
+ SWAP(data.v[5], data.v[6]);
+ break;
+ case 2:
+ SWAP(data.v[6], data.v[7]);
+ break;
+ }
+ }
+
+ /* Perform other misc operations like sign extension
+ * or floating point single precision conversion
+ */
+ switch (flags & ~(U|SW)) {
+ case LD+SE: /* sign extending integer loads */
+ case LD+F+SE: /* sign extend for lfiwax */
+ if ( nb == 2 )
+ data.ll = data.x16.low16;
+ else /* nb must be 4 */
+ data.ll = data.x32.low32;
+ break;
+
+ /* Single-precision FP load requires conversion... */
+ case LD+F+S:
+#ifdef CONFIG_PPC_FPU
+ preempt_disable();
+ enable_kernel_fp();
+ cvt_fd((float *)&data.v[4], &data.dd);
+ preempt_enable();
+#else
+ return 0;
+#endif
+ break;
+ }
+
+ /* Store result to memory or update registers */
+ if (flags & ST) {
+ ret = 0;
+ p = (unsigned long) addr;
+ switch (nb) {
+ case 8:
+ ret |= __put_user_inatomic(data.v[0], SWIZ_PTR(p++));
+ ret |= __put_user_inatomic(data.v[1], SWIZ_PTR(p++));
+ ret |= __put_user_inatomic(data.v[2], SWIZ_PTR(p++));
+ ret |= __put_user_inatomic(data.v[3], SWIZ_PTR(p++));
+ case 4:
+ ret |= __put_user_inatomic(data.v[4], SWIZ_PTR(p++));
+ ret |= __put_user_inatomic(data.v[5], SWIZ_PTR(p++));
+ case 2:
+ ret |= __put_user_inatomic(data.v[6], SWIZ_PTR(p++));
+ ret |= __put_user_inatomic(data.v[7], SWIZ_PTR(p++));
+ }
+ if (unlikely(ret))
+ return -EFAULT;
+ } else if (flags & F)
+ current->thread.TS_FPR(reg) = data.dd;
+ else
+ regs->gpr[reg] = data.ll;
+
+ /* Update RA as needed */
+ if (flags & U)
+ regs->gpr[areg] = regs->dar;
+
+ return 1;
+}
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
new file mode 100644
index 00000000..34b8afe9
--- /dev/null
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -0,0 +1,619 @@
+/*
+ * This program is used to generate definitions needed by
+ * assembly language modules.
+ *
+ * We use the technique used in the OSF Mach kernel code:
+ * generate asm statements containing #defines,
+ * compile this file to assembler, and then extract the
+ * #defines from the assembly-language output.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/suspend.h>
+#include <linux/hrtimer.h>
+#ifdef CONFIG_PPC64
+#include <linux/time.h>
+#include <linux/hardirq.h>
+#endif
+#include <linux/kbuild.h>
+
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/rtas.h>
+#include <asm/vdso_datapage.h>
+#ifdef CONFIG_PPC64
+#include <asm/paca.h>
+#include <asm/lppaca.h>
+#include <asm/cache.h>
+#include <asm/compat.h>
+#include <asm/mmu.h>
+#include <asm/hvcall.h>
+#include <asm/xics.h>
+#endif
+#ifdef CONFIG_PPC_POWERNV
+#include <asm/opal.h>
+#endif
+#if defined(CONFIG_KVM) || defined(CONFIG_KVM_GUEST)
+#include <linux/kvm_host.h>
+#endif
+#if defined(CONFIG_KVM) && defined(CONFIG_PPC_BOOK3S)
+#include <asm/kvm_book3s.h>
+#endif
+
+#ifdef CONFIG_PPC32
+#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#include "head_booke.h"
+#endif
+#endif
+
+#if defined(CONFIG_PPC_FSL_BOOK3E)
+#include "../mm/mmu_decl.h"
+#endif
+
+int main(void)
+{
+ DEFINE(THREAD, offsetof(struct task_struct, thread));
+ DEFINE(MM, offsetof(struct task_struct, mm));
+ DEFINE(MMCONTEXTID, offsetof(struct mm_struct, context.id));
+#ifdef CONFIG_PPC64
+ DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context));
+ DEFINE(SIGSEGV, SIGSEGV);
+ DEFINE(NMI_MASK, NMI_MASK);
+ DEFINE(THREAD_DSCR, offsetof(struct thread_struct, dscr));
+#else
+ DEFINE(THREAD_INFO, offsetof(struct task_struct, stack));
+#endif /* CONFIG_PPC64 */
+
+ DEFINE(KSP, offsetof(struct thread_struct, ksp));
+ DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit));
+ DEFINE(PT_REGS, offsetof(struct thread_struct, regs));
+#ifdef CONFIG_BOOKE
+ DEFINE(THREAD_NORMSAVES, offsetof(struct thread_struct, normsave[0]));
+#endif
+ DEFINE(THREAD_FPEXC_MODE, offsetof(struct thread_struct, fpexc_mode));
+ DEFINE(THREAD_FPR0, offsetof(struct thread_struct, fpr[0]));
+ DEFINE(THREAD_FPSCR, offsetof(struct thread_struct, fpscr));
+#ifdef CONFIG_ALTIVEC
+ DEFINE(THREAD_VR0, offsetof(struct thread_struct, vr[0]));
+ DEFINE(THREAD_VRSAVE, offsetof(struct thread_struct, vrsave));
+ DEFINE(THREAD_VSCR, offsetof(struct thread_struct, vscr));
+ DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr));
+#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+ DEFINE(THREAD_VSR0, offsetof(struct thread_struct, fpr));
+ DEFINE(THREAD_USED_VSR, offsetof(struct thread_struct, used_vsr));
+#endif /* CONFIG_VSX */
+#ifdef CONFIG_PPC64
+ DEFINE(KSP_VSID, offsetof(struct thread_struct, ksp_vsid));
+#else /* CONFIG_PPC64 */
+ DEFINE(PGDIR, offsetof(struct thread_struct, pgdir));
+#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
+ DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, dbcr0));
+#endif
+#ifdef CONFIG_SPE
+ DEFINE(THREAD_EVR0, offsetof(struct thread_struct, evr[0]));
+ DEFINE(THREAD_ACC, offsetof(struct thread_struct, acc));
+ DEFINE(THREAD_SPEFSCR, offsetof(struct thread_struct, spefscr));
+ DEFINE(THREAD_USED_SPE, offsetof(struct thread_struct, used_spe));
+#endif /* CONFIG_SPE */
+#endif /* CONFIG_PPC64 */
+#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
+ DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu));
+#endif
+
+ DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
+ DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
+ DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
+ DEFINE(TI_TASK, offsetof(struct thread_info, task));
+ DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
+
+#ifdef CONFIG_PPC64
+ DEFINE(DCACHEL1LINESIZE, offsetof(struct ppc64_caches, dline_size));
+ DEFINE(DCACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_dline_size));
+ DEFINE(DCACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, dlines_per_page));
+ DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size));
+ DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size));
+ DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page));
+ /* paca */
+ DEFINE(PACA_SIZE, sizeof(struct paca_struct));
+ DEFINE(PACA_LOCK_TOKEN, offsetof(struct paca_struct, lock_token));
+ DEFINE(PACAPACAINDEX, offsetof(struct paca_struct, paca_index));
+ DEFINE(PACAPROCSTART, offsetof(struct paca_struct, cpu_start));
+ DEFINE(PACAKSAVE, offsetof(struct paca_struct, kstack));
+ DEFINE(PACACURRENT, offsetof(struct paca_struct, __current));
+ DEFINE(PACASAVEDMSR, offsetof(struct paca_struct, saved_msr));
+ DEFINE(PACASTABRR, offsetof(struct paca_struct, stab_rr));
+ DEFINE(PACAR1, offsetof(struct paca_struct, saved_r1));
+ DEFINE(PACATOC, offsetof(struct paca_struct, kernel_toc));
+ DEFINE(PACAKBASE, offsetof(struct paca_struct, kernelbase));
+ DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
+ DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
+ DEFINE(PACAIRQHAPPENED, offsetof(struct paca_struct, irq_happened));
+ DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
+#ifdef CONFIG_PPC_MM_SLICES
+ DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct,
+ context.low_slices_psize));
+ DEFINE(PACAHIGHSLICEPSIZE, offsetof(struct paca_struct,
+ context.high_slices_psize));
+ DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
+#endif /* CONFIG_PPC_MM_SLICES */
+
+#ifdef CONFIG_PPC_BOOK3E
+ DEFINE(PACAPGD, offsetof(struct paca_struct, pgd));
+ DEFINE(PACA_KERNELPGD, offsetof(struct paca_struct, kernel_pgd));
+ DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen));
+ DEFINE(PACA_EXTLB, offsetof(struct paca_struct, extlb));
+ DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc));
+ DEFINE(PACA_EXCRIT, offsetof(struct paca_struct, excrit));
+ DEFINE(PACA_EXDBG, offsetof(struct paca_struct, exdbg));
+ DEFINE(PACA_MC_STACK, offsetof(struct paca_struct, mc_kstack));
+ DEFINE(PACA_CRIT_STACK, offsetof(struct paca_struct, crit_kstack));
+ DEFINE(PACA_DBG_STACK, offsetof(struct paca_struct, dbg_kstack));
+#endif /* CONFIG_PPC_BOOK3E */
+
+#ifdef CONFIG_PPC_STD_MMU_64
+ DEFINE(PACASTABREAL, offsetof(struct paca_struct, stab_real));
+ DEFINE(PACASTABVIRT, offsetof(struct paca_struct, stab_addr));
+ DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
+ DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
+ DEFINE(PACAVMALLOCSLLP, offsetof(struct paca_struct, vmalloc_sllp));
+#ifdef CONFIG_PPC_MM_SLICES
+ DEFINE(MMUPSIZESLLP, offsetof(struct mmu_psize_def, sllp));
+#else
+ DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, context.sllp));
+#endif /* CONFIG_PPC_MM_SLICES */
+ DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen));
+ DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc));
+ DEFINE(PACA_EXSLB, offsetof(struct paca_struct, exslb));
+ DEFINE(PACALPPACAPTR, offsetof(struct paca_struct, lppaca_ptr));
+ DEFINE(PACA_SLBSHADOWPTR, offsetof(struct paca_struct, slb_shadow_ptr));
+ DEFINE(SLBSHADOW_STACKVSID,
+ offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid));
+ DEFINE(SLBSHADOW_STACKESID,
+ offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid));
+ DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area));
+ DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0));
+ DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1));
+ DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int));
+ DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int));
+ DEFINE(LPPACA_PMCINUSE, offsetof(struct lppaca, pmcregs_in_use));
+ DEFINE(LPPACA_DTLIDX, offsetof(struct lppaca, dtl_idx));
+ DEFINE(LPPACA_YIELDCOUNT, offsetof(struct lppaca, yield_count));
+ DEFINE(PACA_DTL_RIDX, offsetof(struct paca_struct, dtl_ridx));
+#endif /* CONFIG_PPC_STD_MMU_64 */
+ DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp));
+ DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
+ DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state));
+ DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime));
+ DEFINE(PACA_STARTTIME_USER, offsetof(struct paca_struct, starttime_user));
+ DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
+ DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));
+ DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save));
+ DEFINE(PACA_NAPSTATELOST, offsetof(struct paca_struct, nap_state_lost));
+#endif /* CONFIG_PPC64 */
+
+ /* RTAS */
+ DEFINE(RTASBASE, offsetof(struct rtas_t, base));
+ DEFINE(RTASENTRY, offsetof(struct rtas_t, entry));
+
+ /* Interrupt register frame */
+ DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE);
+ DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs));
+#ifdef CONFIG_PPC64
+ /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */
+ DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
+ DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
+
+ /* hcall statistics */
+ DEFINE(HCALL_STAT_SIZE, sizeof(struct hcall_stats));
+ DEFINE(HCALL_STAT_CALLS, offsetof(struct hcall_stats, num_calls));
+ DEFINE(HCALL_STAT_TB, offsetof(struct hcall_stats, tb_total));
+ DEFINE(HCALL_STAT_PURR, offsetof(struct hcall_stats, purr_total));
+#endif /* CONFIG_PPC64 */
+ DEFINE(GPR0, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[0]));
+ DEFINE(GPR1, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[1]));
+ DEFINE(GPR2, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[2]));
+ DEFINE(GPR3, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[3]));
+ DEFINE(GPR4, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[4]));
+ DEFINE(GPR5, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[5]));
+ DEFINE(GPR6, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[6]));
+ DEFINE(GPR7, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[7]));
+ DEFINE(GPR8, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[8]));
+ DEFINE(GPR9, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[9]));
+ DEFINE(GPR10, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[10]));
+ DEFINE(GPR11, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[11]));
+ DEFINE(GPR12, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[12]));
+ DEFINE(GPR13, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[13]));
+#ifndef CONFIG_PPC64
+ DEFINE(GPR14, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[14]));
+ DEFINE(GPR15, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[15]));
+ DEFINE(GPR16, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[16]));
+ DEFINE(GPR17, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[17]));
+ DEFINE(GPR18, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[18]));
+ DEFINE(GPR19, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[19]));
+ DEFINE(GPR20, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[20]));
+ DEFINE(GPR21, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[21]));
+ DEFINE(GPR22, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[22]));
+ DEFINE(GPR23, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[23]));
+ DEFINE(GPR24, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[24]));
+ DEFINE(GPR25, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[25]));
+ DEFINE(GPR26, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[26]));
+ DEFINE(GPR27, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[27]));
+ DEFINE(GPR28, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[28]));
+ DEFINE(GPR29, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[29]));
+ DEFINE(GPR30, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[30]));
+ DEFINE(GPR31, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[31]));
+#endif /* CONFIG_PPC64 */
+ /*
+ * Note: these symbols include _ because they overlap with special
+ * register names
+ */
+ DEFINE(_NIP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, nip));
+ DEFINE(_MSR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, msr));
+ DEFINE(_CTR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ctr));
+ DEFINE(_LINK, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, link));
+ DEFINE(_CCR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ccr));
+ DEFINE(_XER, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, xer));
+ DEFINE(_DAR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dar));
+ DEFINE(_DSISR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dsisr));
+ DEFINE(ORIG_GPR3, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, orig_gpr3));
+ DEFINE(RESULT, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, result));
+ DEFINE(_TRAP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, trap));
+#ifndef CONFIG_PPC64
+ DEFINE(_MQ, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, mq));
+ /*
+ * The PowerPC 400-class & Book-E processors have neither the DAR
+ * nor the DSISR SPRs. Hence, we overload them to hold the similar
+ * DEAR and ESR SPRs for such processors. For critical interrupts
+ * we use them to hold SRR0 and SRR1.
+ */
+ DEFINE(_DEAR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dar));
+ DEFINE(_ESR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dsisr));
+#else /* CONFIG_PPC64 */
+ DEFINE(SOFTE, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, softe));
+
+ /* These _only_ to be used with {PROM,RTAS}_FRAME_SIZE!!! */
+ DEFINE(_SRR0, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs));
+ DEFINE(_SRR1, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)+8);
+#endif /* CONFIG_PPC64 */
+
+#if defined(CONFIG_PPC32)
+#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+ DEFINE(EXC_LVL_SIZE, STACK_EXC_LVL_FRAME_SIZE);
+ DEFINE(MAS0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0));
+ /* we overload MMUCR for 44x on MAS0 since they are mutually exclusive */
+ DEFINE(MMUCR, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0));
+ DEFINE(MAS1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas1));
+ DEFINE(MAS2, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas2));
+ DEFINE(MAS3, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas3));
+ DEFINE(MAS6, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas6));
+ DEFINE(MAS7, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas7));
+ DEFINE(_SRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, srr0));
+ DEFINE(_SRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, srr1));
+ DEFINE(_CSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr0));
+ DEFINE(_CSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr1));
+ DEFINE(_DSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr0));
+ DEFINE(_DSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr1));
+ DEFINE(SAVED_KSP_LIMIT, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, saved_ksp_limit));
+#endif
+#endif
+ DEFINE(CLONE_VM, CLONE_VM);
+ DEFINE(CLONE_UNTRACED, CLONE_UNTRACED);
+
+#ifndef CONFIG_PPC64
+ DEFINE(MM_PGD, offsetof(struct mm_struct, pgd));
+#endif /* ! CONFIG_PPC64 */
+
+ /* About the CPU features table */
+ DEFINE(CPU_SPEC_FEATURES, offsetof(struct cpu_spec, cpu_features));
+ DEFINE(CPU_SPEC_SETUP, offsetof(struct cpu_spec, cpu_setup));
+ DEFINE(CPU_SPEC_RESTORE, offsetof(struct cpu_spec, cpu_restore));
+
+ DEFINE(pbe_address, offsetof(struct pbe, address));
+ DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
+ DEFINE(pbe_next, offsetof(struct pbe, next));
+
+#ifndef CONFIG_PPC64
+ DEFINE(TASK_SIZE, TASK_SIZE);
+ DEFINE(NUM_USER_SEGMENTS, TASK_SIZE>>28);
+#endif /* ! CONFIG_PPC64 */
+
+ /* datapage offsets for use by vdso */
+ DEFINE(CFG_TB_ORIG_STAMP, offsetof(struct vdso_data, tb_orig_stamp));
+ DEFINE(CFG_TB_TICKS_PER_SEC, offsetof(struct vdso_data, tb_ticks_per_sec));
+ DEFINE(CFG_TB_TO_XS, offsetof(struct vdso_data, tb_to_xs));
+ DEFINE(CFG_STAMP_XSEC, offsetof(struct vdso_data, stamp_xsec));
+ DEFINE(CFG_TB_UPDATE_COUNT, offsetof(struct vdso_data, tb_update_count));
+ DEFINE(CFG_TZ_MINUTEWEST, offsetof(struct vdso_data, tz_minuteswest));
+ DEFINE(CFG_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime));
+ DEFINE(CFG_SYSCALL_MAP32, offsetof(struct vdso_data, syscall_map_32));
+ DEFINE(WTOM_CLOCK_SEC, offsetof(struct vdso_data, wtom_clock_sec));
+ DEFINE(WTOM_CLOCK_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
+ DEFINE(STAMP_XTIME, offsetof(struct vdso_data, stamp_xtime));
+ DEFINE(STAMP_SEC_FRAC, offsetof(struct vdso_data, stamp_sec_fraction));
+ DEFINE(CFG_ICACHE_BLOCKSZ, offsetof(struct vdso_data, icache_block_size));
+ DEFINE(CFG_DCACHE_BLOCKSZ, offsetof(struct vdso_data, dcache_block_size));
+ DEFINE(CFG_ICACHE_LOGBLOCKSZ, offsetof(struct vdso_data, icache_log_block_size));
+ DEFINE(CFG_DCACHE_LOGBLOCKSZ, offsetof(struct vdso_data, dcache_log_block_size));
+#ifdef CONFIG_PPC64
+ DEFINE(CFG_SYSCALL_MAP64, offsetof(struct vdso_data, syscall_map_64));
+ DEFINE(TVAL64_TV_SEC, offsetof(struct timeval, tv_sec));
+ DEFINE(TVAL64_TV_USEC, offsetof(struct timeval, tv_usec));
+ DEFINE(TVAL32_TV_SEC, offsetof(struct compat_timeval, tv_sec));
+ DEFINE(TVAL32_TV_USEC, offsetof(struct compat_timeval, tv_usec));
+ DEFINE(TSPC64_TV_SEC, offsetof(struct timespec, tv_sec));
+ DEFINE(TSPC64_TV_NSEC, offsetof(struct timespec, tv_nsec));
+ DEFINE(TSPC32_TV_SEC, offsetof(struct compat_timespec, tv_sec));
+ DEFINE(TSPC32_TV_NSEC, offsetof(struct compat_timespec, tv_nsec));
+#else
+ DEFINE(TVAL32_TV_SEC, offsetof(struct timeval, tv_sec));
+ DEFINE(TVAL32_TV_USEC, offsetof(struct timeval, tv_usec));
+ DEFINE(TSPC32_TV_SEC, offsetof(struct timespec, tv_sec));
+ DEFINE(TSPC32_TV_NSEC, offsetof(struct timespec, tv_nsec));
+#endif
+ /* timeval/timezone offsets for use by vdso */
+ DEFINE(TZONE_TZ_MINWEST, offsetof(struct timezone, tz_minuteswest));
+ DEFINE(TZONE_TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
+
+ /* Other bits used by the vdso */
+ DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
+ DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
+ DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
+ DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
+
+#ifdef CONFIG_BUG
+ DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry));
+#endif
+
+ DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE);
+ DEFINE(PTE_SIZE, sizeof(pte_t));
+
+#ifdef CONFIG_KVM
+ DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
+ DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
+ DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
+ DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave));
+ DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fpr));
+ DEFINE(VCPU_FPSCR, offsetof(struct kvm_vcpu, arch.fpscr));
+#ifdef CONFIG_ALTIVEC
+ DEFINE(VCPU_VRS, offsetof(struct kvm_vcpu, arch.vr));
+ DEFINE(VCPU_VSCR, offsetof(struct kvm_vcpu, arch.vscr));
+#endif
+#ifdef CONFIG_VSX
+ DEFINE(VCPU_VSRS, offsetof(struct kvm_vcpu, arch.vsr));
+#endif
+ DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
+ DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
+ DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
+ DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
+ DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+ DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.shregs.msr));
+ DEFINE(VCPU_SRR0, offsetof(struct kvm_vcpu, arch.shregs.srr0));
+ DEFINE(VCPU_SRR1, offsetof(struct kvm_vcpu, arch.shregs.srr1));
+ DEFINE(VCPU_SPRG0, offsetof(struct kvm_vcpu, arch.shregs.sprg0));
+ DEFINE(VCPU_SPRG1, offsetof(struct kvm_vcpu, arch.shregs.sprg1));
+ DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2));
+ DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3));
+#endif
+ DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4));
+ DEFINE(VCPU_SHARED_SPRG5, offsetof(struct kvm_vcpu_arch_shared, sprg5));
+ DEFINE(VCPU_SHARED_SPRG6, offsetof(struct kvm_vcpu_arch_shared, sprg6));
+ DEFINE(VCPU_SHARED_SPRG7, offsetof(struct kvm_vcpu_arch_shared, sprg7));
+ DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid));
+ DEFINE(VCPU_SHADOW_PID1, offsetof(struct kvm_vcpu, arch.shadow_pid1));
+ DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared));
+ DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));
+ DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr));
+
+ DEFINE(VCPU_SHARED_MAS0, offsetof(struct kvm_vcpu_arch_shared, mas0));
+ DEFINE(VCPU_SHARED_MAS1, offsetof(struct kvm_vcpu_arch_shared, mas1));
+ DEFINE(VCPU_SHARED_MAS2, offsetof(struct kvm_vcpu_arch_shared, mas2));
+ DEFINE(VCPU_SHARED_MAS7_3, offsetof(struct kvm_vcpu_arch_shared, mas7_3));
+ DEFINE(VCPU_SHARED_MAS4, offsetof(struct kvm_vcpu_arch_shared, mas4));
+ DEFINE(VCPU_SHARED_MAS6, offsetof(struct kvm_vcpu_arch_shared, mas6));
+
+ /* book3s */
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+ DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid));
+ DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1));
+ DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid));
+ DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
+ DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1));
+ DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock));
+ DEFINE(KVM_ONLINE_CPUS, offsetof(struct kvm, online_vcpus.counter));
+ DEFINE(KVM_LAST_VCPU, offsetof(struct kvm, arch.last_vcpu));
+ DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr));
+ DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor));
+ DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v));
+ DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr));
+ DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
+#endif
+#ifdef CONFIG_PPC_BOOK3S
+ DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
+ DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
+ DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr));
+ DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr));
+ DEFINE(VCPU_DSCR, offsetof(struct kvm_vcpu, arch.dscr));
+ DEFINE(VCPU_AMR, offsetof(struct kvm_vcpu, arch.amr));
+ DEFINE(VCPU_UAMOR, offsetof(struct kvm_vcpu, arch.uamor));
+ DEFINE(VCPU_CTRL, offsetof(struct kvm_vcpu, arch.ctrl));
+ DEFINE(VCPU_DABR, offsetof(struct kvm_vcpu, arch.dabr));
+ DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags));
+ DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec));
+ DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires));
+ DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions));
+ DEFINE(VCPU_CEDED, offsetof(struct kvm_vcpu, arch.ceded));
+ DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded));
+ DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa));
+ DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr));
+ DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc));
+ DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb));
+ DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max));
+ DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr));
+ DEFINE(VCPU_LAST_CPU, offsetof(struct kvm_vcpu, arch.last_cpu));
+ DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr));
+ DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar));
+ DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
+ DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap));
+ DEFINE(VCPU_PTID, offsetof(struct kvm_vcpu, arch.ptid));
+ DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count));
+ DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count));
+ DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
+ DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
+ DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) -
+ offsetof(struct kvmppc_vcpu_book3s, vcpu));
+ DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige));
+ DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv));
+ DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb));
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_KVM_BOOK3S_PR
+# define SVCPU_FIELD(x, f) DEFINE(x, offsetof(struct paca_struct, shadow_vcpu.f))
+#else
+# define SVCPU_FIELD(x, f)
+#endif
+# define HSTATE_FIELD(x, f) DEFINE(x, offsetof(struct paca_struct, kvm_hstate.f))
+#else /* 32-bit */
+# define SVCPU_FIELD(x, f) DEFINE(x, offsetof(struct kvmppc_book3s_shadow_vcpu, f))
+# define HSTATE_FIELD(x, f) DEFINE(x, offsetof(struct kvmppc_book3s_shadow_vcpu, hstate.f))
+#endif
+
+ SVCPU_FIELD(SVCPU_CR, cr);
+ SVCPU_FIELD(SVCPU_XER, xer);
+ SVCPU_FIELD(SVCPU_CTR, ctr);
+ SVCPU_FIELD(SVCPU_LR, lr);
+ SVCPU_FIELD(SVCPU_PC, pc);
+ SVCPU_FIELD(SVCPU_R0, gpr[0]);
+ SVCPU_FIELD(SVCPU_R1, gpr[1]);
+ SVCPU_FIELD(SVCPU_R2, gpr[2]);
+ SVCPU_FIELD(SVCPU_R3, gpr[3]);
+ SVCPU_FIELD(SVCPU_R4, gpr[4]);
+ SVCPU_FIELD(SVCPU_R5, gpr[5]);
+ SVCPU_FIELD(SVCPU_R6, gpr[6]);
+ SVCPU_FIELD(SVCPU_R7, gpr[7]);
+ SVCPU_FIELD(SVCPU_R8, gpr[8]);
+ SVCPU_FIELD(SVCPU_R9, gpr[9]);
+ SVCPU_FIELD(SVCPU_R10, gpr[10]);
+ SVCPU_FIELD(SVCPU_R11, gpr[11]);
+ SVCPU_FIELD(SVCPU_R12, gpr[12]);
+ SVCPU_FIELD(SVCPU_R13, gpr[13]);
+ SVCPU_FIELD(SVCPU_FAULT_DSISR, fault_dsisr);
+ SVCPU_FIELD(SVCPU_FAULT_DAR, fault_dar);
+ SVCPU_FIELD(SVCPU_LAST_INST, last_inst);
+ SVCPU_FIELD(SVCPU_SHADOW_SRR1, shadow_srr1);
+#ifdef CONFIG_PPC_BOOK3S_32
+ SVCPU_FIELD(SVCPU_SR, sr);
+#endif
+#ifdef CONFIG_PPC64
+ SVCPU_FIELD(SVCPU_SLB, slb);
+ SVCPU_FIELD(SVCPU_SLB_MAX, slb_max);
+#endif
+
+ HSTATE_FIELD(HSTATE_HOST_R1, host_r1);
+ HSTATE_FIELD(HSTATE_HOST_R2, host_r2);
+ HSTATE_FIELD(HSTATE_HOST_MSR, host_msr);
+ HSTATE_FIELD(HSTATE_VMHANDLER, vmhandler);
+ HSTATE_FIELD(HSTATE_SCRATCH0, scratch0);
+ HSTATE_FIELD(HSTATE_SCRATCH1, scratch1);
+ HSTATE_FIELD(HSTATE_IN_GUEST, in_guest);
+ HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5);
+ HSTATE_FIELD(HSTATE_NAPPING, napping);
+
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+ HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
+ HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
+ HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
+ HSTATE_FIELD(HSTATE_MMCR, host_mmcr);
+ HSTATE_FIELD(HSTATE_PMC, host_pmc);
+ HSTATE_FIELD(HSTATE_PURR, host_purr);
+ HSTATE_FIELD(HSTATE_SPURR, host_spurr);
+ HSTATE_FIELD(HSTATE_DSCR, host_dscr);
+ HSTATE_FIELD(HSTATE_DABR, dabr);
+ HSTATE_FIELD(HSTATE_DECEXP, dec_expires);
+ DEFINE(IPI_PRIORITY, IPI_PRIORITY);
+#endif /* CONFIG_KVM_BOOK3S_64_HV */
+
+#else /* CONFIG_PPC_BOOK3S */
+ DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
+ DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
+ DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
+ DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
+ DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
+ DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
+ DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
+ DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
+#endif /* CONFIG_PPC_BOOK3S */
+#endif /* CONFIG_KVM */
+
+#ifdef CONFIG_KVM_GUEST
+ DEFINE(KVM_MAGIC_SCRATCH1, offsetof(struct kvm_vcpu_arch_shared,
+ scratch1));
+ DEFINE(KVM_MAGIC_SCRATCH2, offsetof(struct kvm_vcpu_arch_shared,
+ scratch2));
+ DEFINE(KVM_MAGIC_SCRATCH3, offsetof(struct kvm_vcpu_arch_shared,
+ scratch3));
+ DEFINE(KVM_MAGIC_INT, offsetof(struct kvm_vcpu_arch_shared,
+ int_pending));
+ DEFINE(KVM_MAGIC_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));
+ DEFINE(KVM_MAGIC_CRITICAL, offsetof(struct kvm_vcpu_arch_shared,
+ critical));
+ DEFINE(KVM_MAGIC_SR, offsetof(struct kvm_vcpu_arch_shared, sr));
+#endif
+
+#ifdef CONFIG_44x
+ DEFINE(PGD_T_LOG2, PGD_T_LOG2);
+ DEFINE(PTE_T_LOG2, PTE_T_LOG2);
+#endif
+#ifdef CONFIG_PPC_FSL_BOOK3E
+ DEFINE(TLBCAM_SIZE, sizeof(struct tlbcam));
+ DEFINE(TLBCAM_MAS0, offsetof(struct tlbcam, MAS0));
+ DEFINE(TLBCAM_MAS1, offsetof(struct tlbcam, MAS1));
+ DEFINE(TLBCAM_MAS2, offsetof(struct tlbcam, MAS2));
+ DEFINE(TLBCAM_MAS3, offsetof(struct tlbcam, MAS3));
+ DEFINE(TLBCAM_MAS7, offsetof(struct tlbcam, MAS7));
+#endif
+
+#if defined(CONFIG_KVM) && defined(CONFIG_SPE)
+ DEFINE(VCPU_EVR, offsetof(struct kvm_vcpu, arch.evr[0]));
+ DEFINE(VCPU_ACC, offsetof(struct kvm_vcpu, arch.acc));
+ DEFINE(VCPU_SPEFSCR, offsetof(struct kvm_vcpu, arch.spefscr));
+ DEFINE(VCPU_HOST_SPEFSCR, offsetof(struct kvm_vcpu, arch.host_spefscr));
+#endif
+
+#ifdef CONFIG_KVM_EXIT_TIMING
+ DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu,
+ arch.timing_exit.tv32.tbu));
+ DEFINE(VCPU_TIMING_EXIT_TBL, offsetof(struct kvm_vcpu,
+ arch.timing_exit.tv32.tbl));
+ DEFINE(VCPU_TIMING_LAST_ENTER_TBU, offsetof(struct kvm_vcpu,
+ arch.timing_last_enter.tv32.tbu));
+ DEFINE(VCPU_TIMING_LAST_ENTER_TBL, offsetof(struct kvm_vcpu,
+ arch.timing_last_enter.tv32.tbl));
+#endif
+
+#ifdef CONFIG_PPC_POWERNV
+ DEFINE(OPAL_MC_GPR3, offsetof(struct opal_machine_check_event, gpr3));
+ DEFINE(OPAL_MC_SRR0, offsetof(struct opal_machine_check_event, srr0));
+ DEFINE(OPAL_MC_SRR1, offsetof(struct opal_machine_check_event, srr1));
+ DEFINE(PACA_OPAL_MC_EVT, offsetof(struct paca_struct, opal_mc_evt));
+#endif
+
+ return 0;
+}
diff --git a/arch/powerpc/kernel/audit.c b/arch/powerpc/kernel/audit.c
new file mode 100644
index 00000000..a4dab7ca
--- /dev/null
+++ b/arch/powerpc/kernel/audit.c
@@ -0,0 +1,83 @@
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/audit.h>
+#include <asm/unistd.h>
+
+static unsigned dir_class[] = {
+#include <asm-generic/audit_dir_write.h>
+~0U
+};
+
+static unsigned read_class[] = {
+#include <asm-generic/audit_read.h>
+~0U
+};
+
+static unsigned write_class[] = {
+#include <asm-generic/audit_write.h>
+~0U
+};
+
+static unsigned chattr_class[] = {
+#include <asm-generic/audit_change_attr.h>
+~0U
+};
+
+static unsigned signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
+int audit_classify_arch(int arch)
+{
+#ifdef CONFIG_PPC64
+ if (arch == AUDIT_ARCH_PPC)
+ return 1;
+#endif
+ return 0;
+}
+
+int audit_classify_syscall(int abi, unsigned syscall)
+{
+#ifdef CONFIG_PPC64
+ extern int ppc32_classify_syscall(unsigned);
+ if (abi == AUDIT_ARCH_PPC)
+ return ppc32_classify_syscall(syscall);
+#endif
+ switch(syscall) {
+ case __NR_open:
+ return 2;
+ case __NR_openat:
+ return 3;
+ case __NR_socketcall:
+ return 4;
+ case __NR_execve:
+ return 5;
+ default:
+ return 0;
+ }
+}
+
+static int __init audit_classes_init(void)
+{
+#ifdef CONFIG_PPC64
+ extern __u32 ppc32_dir_class[];
+ extern __u32 ppc32_write_class[];
+ extern __u32 ppc32_read_class[];
+ extern __u32 ppc32_chattr_class[];
+ extern __u32 ppc32_signal_class[];
+ audit_register_class(AUDIT_CLASS_WRITE_32, ppc32_write_class);
+ audit_register_class(AUDIT_CLASS_READ_32, ppc32_read_class);
+ audit_register_class(AUDIT_CLASS_DIR_WRITE_32, ppc32_dir_class);
+ audit_register_class(AUDIT_CLASS_CHATTR_32, ppc32_chattr_class);
+ audit_register_class(AUDIT_CLASS_SIGNAL_32, ppc32_signal_class);
+#endif
+ audit_register_class(AUDIT_CLASS_WRITE, write_class);
+ audit_register_class(AUDIT_CLASS_READ, read_class);
+ audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class);
+ audit_register_class(AUDIT_CLASS_CHATTR, chattr_class);
+ audit_register_class(AUDIT_CLASS_SIGNAL, signal_class);
+ return 0;
+}
+
+__initcall(audit_classes_init);
diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
new file mode 100644
index 00000000..ac8f5273
--- /dev/null
+++ b/arch/powerpc/kernel/btext.c
@@ -0,0 +1,922 @@
+/*
+ * Procedures for drawing on the screen early on in the boot process.
+ *
+ * Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ */
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/memblock.h>
+
+#include <asm/sections.h>
+#include <asm/prom.h>
+#include <asm/btext.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/pgtable.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/udbg.h>
+
+#define NO_SCROLL
+
+#ifndef NO_SCROLL
+static void scrollscreen(void);
+#endif
+
+static void draw_byte(unsigned char c, long locX, long locY);
+static void draw_byte_32(unsigned char *bits, unsigned int *base, int rb);
+static void draw_byte_16(unsigned char *bits, unsigned int *base, int rb);
+static void draw_byte_8(unsigned char *bits, unsigned int *base, int rb);
+
+#define __force_data __attribute__((__section__(".data")))
+
+static int g_loc_X __force_data;
+static int g_loc_Y __force_data;
+static int g_max_loc_X __force_data;
+static int g_max_loc_Y __force_data;
+
+static int dispDeviceRowBytes __force_data;
+static int dispDeviceDepth __force_data;
+static int dispDeviceRect[4] __force_data;
+static unsigned char *dispDeviceBase __force_data;
+static unsigned char *logicalDisplayBase __force_data;
+
+unsigned long disp_BAT[2] __initdata = {0, 0};
+
+#define cmapsz (16*256)
+
+static unsigned char vga_font[cmapsz];
+
+int boot_text_mapped __force_data = 0;
+int force_printk_to_btext = 0;
+
+#ifdef CONFIG_PPC32
+/* Calc BAT values for mapping the display and store them
+ * in disp_BAT. Those values are then used from head.S to map
+ * the display during identify_machine() and MMU_Init()
+ *
+ * The display is mapped to virtual address 0xD0000000, rather
+ * than 1:1, because some some CHRP machines put the frame buffer
+ * in the region starting at 0xC0000000 (PAGE_OFFSET).
+ * This mapping is temporary and will disappear as soon as the
+ * setup done by MMU_Init() is applied.
+ *
+ * For now, we align the BAT and then map 8Mb on 601 and 16Mb
+ * on other PPCs. This may cause trouble if the framebuffer
+ * is really badly aligned, but I didn't encounter this case
+ * yet.
+ */
+void __init btext_prepare_BAT(void)
+{
+ unsigned long vaddr = PAGE_OFFSET + 0x10000000;
+ unsigned long addr;
+ unsigned long lowbits;
+
+ addr = (unsigned long)dispDeviceBase;
+ if (!addr) {
+ boot_text_mapped = 0;
+ return;
+ }
+ if (PVR_VER(mfspr(SPRN_PVR)) != 1) {
+ /* 603, 604, G3, G4, ... */
+ lowbits = addr & ~0xFF000000UL;
+ addr &= 0xFF000000UL;
+ disp_BAT[0] = vaddr | (BL_16M<<2) | 2;
+ disp_BAT[1] = addr | (_PAGE_NO_CACHE | _PAGE_GUARDED | BPP_RW);
+ } else {
+ /* 601 */
+ lowbits = addr & ~0xFF800000UL;
+ addr &= 0xFF800000UL;
+ disp_BAT[0] = vaddr | (_PAGE_NO_CACHE | PP_RWXX) | 4;
+ disp_BAT[1] = addr | BL_8M | 0x40;
+ }
+ logicalDisplayBase = (void *) (vaddr + lowbits);
+}
+#endif
+
+
+/* This function can be used to enable the early boot text when doing
+ * OF booting or within bootx init. It must be followed by a btext_unmap()
+ * call before the logical address becomes unusable
+ */
+void __init btext_setup_display(int width, int height, int depth, int pitch,
+ unsigned long address)
+{
+ g_loc_X = 0;
+ g_loc_Y = 0;
+ g_max_loc_X = width / 8;
+ g_max_loc_Y = height / 16;
+ logicalDisplayBase = (unsigned char *)address;
+ dispDeviceBase = (unsigned char *)address;
+ dispDeviceRowBytes = pitch;
+ dispDeviceDepth = depth == 15 ? 16 : depth;
+ dispDeviceRect[0] = dispDeviceRect[1] = 0;
+ dispDeviceRect[2] = width;
+ dispDeviceRect[3] = height;
+ boot_text_mapped = 1;
+}
+
+void __init btext_unmap(void)
+{
+ boot_text_mapped = 0;
+}
+
+/* Here's a small text engine to use during early boot
+ * or for debugging purposes
+ *
+ * todo:
+ *
+ * - build some kind of vgacon with it to enable early printk
+ * - move to a separate file
+ * - add a few video driver hooks to keep in sync with display
+ * changes.
+ */
+
+static void map_boot_text(void)
+{
+ unsigned long base, offset, size;
+ unsigned char *vbase;
+
+ /* By default, we are no longer mapped */
+ boot_text_mapped = 0;
+ if (dispDeviceBase == 0)
+ return;
+ base = ((unsigned long) dispDeviceBase) & 0xFFFFF000UL;
+ offset = ((unsigned long) dispDeviceBase) - base;
+ size = dispDeviceRowBytes * dispDeviceRect[3] + offset
+ + dispDeviceRect[0];
+ vbase = __ioremap(base, size, _PAGE_NO_CACHE);
+ if (vbase == 0)
+ return;
+ logicalDisplayBase = vbase + offset;
+ boot_text_mapped = 1;
+}
+
+int btext_initialize(struct device_node *np)
+{
+ unsigned int width, height, depth, pitch;
+ unsigned long address = 0;
+ const u32 *prop;
+
+ prop = of_get_property(np, "linux,bootx-width", NULL);
+ if (prop == NULL)
+ prop = of_get_property(np, "width", NULL);
+ if (prop == NULL)
+ return -EINVAL;
+ width = *prop;
+ prop = of_get_property(np, "linux,bootx-height", NULL);
+ if (prop == NULL)
+ prop = of_get_property(np, "height", NULL);
+ if (prop == NULL)
+ return -EINVAL;
+ height = *prop;
+ prop = of_get_property(np, "linux,bootx-depth", NULL);
+ if (prop == NULL)
+ prop = of_get_property(np, "depth", NULL);
+ if (prop == NULL)
+ return -EINVAL;
+ depth = *prop;
+ pitch = width * ((depth + 7) / 8);
+ prop = of_get_property(np, "linux,bootx-linebytes", NULL);
+ if (prop == NULL)
+ prop = of_get_property(np, "linebytes", NULL);
+ if (prop && *prop != 0xffffffffu)
+ pitch = *prop;
+ if (pitch == 1)
+ pitch = 0x1000;
+ prop = of_get_property(np, "linux,bootx-addr", NULL);
+ if (prop == NULL)
+ prop = of_get_property(np, "address", NULL);
+ if (prop)
+ address = *prop;
+
+ /* FIXME: Add support for PCI reg properties. Right now, only
+ * reliable on macs
+ */
+ if (address == 0)
+ return -EINVAL;
+
+ g_loc_X = 0;
+ g_loc_Y = 0;
+ g_max_loc_X = width / 8;
+ g_max_loc_Y = height / 16;
+ dispDeviceBase = (unsigned char *)address;
+ dispDeviceRowBytes = pitch;
+ dispDeviceDepth = depth == 15 ? 16 : depth;
+ dispDeviceRect[0] = dispDeviceRect[1] = 0;
+ dispDeviceRect[2] = width;
+ dispDeviceRect[3] = height;
+
+ map_boot_text();
+
+ return 0;
+}
+
+int __init btext_find_display(int allow_nonstdout)
+{
+ const char *name;
+ struct device_node *np = NULL;
+ int rc = -ENODEV;
+
+ name = of_get_property(of_chosen, "linux,stdout-path", NULL);
+ if (name != NULL) {
+ np = of_find_node_by_path(name);
+ if (np != NULL) {
+ if (strcmp(np->type, "display") != 0) {
+ printk("boot stdout isn't a display !\n");
+ of_node_put(np);
+ np = NULL;
+ }
+ }
+ }
+ if (np)
+ rc = btext_initialize(np);
+ if (rc == 0 || !allow_nonstdout)
+ return rc;
+
+ for_each_node_by_type(np, "display") {
+ if (of_get_property(np, "linux,opened", NULL)) {
+ printk("trying %s ...\n", np->full_name);
+ rc = btext_initialize(np);
+ printk("result: %d\n", rc);
+ }
+ if (rc == 0)
+ break;
+ }
+ return rc;
+}
+
+/* Calc the base address of a given point (x,y) */
+static unsigned char * calc_base(int x, int y)
+{
+ unsigned char *base;
+
+ base = logicalDisplayBase;
+ if (base == 0)
+ base = dispDeviceBase;
+ base += (x + dispDeviceRect[0]) * (dispDeviceDepth >> 3);
+ base += (y + dispDeviceRect[1]) * dispDeviceRowBytes;
+ return base;
+}
+
+/* Adjust the display to a new resolution */
+void btext_update_display(unsigned long phys, int width, int height,
+ int depth, int pitch)
+{
+ if (dispDeviceBase == 0)
+ return;
+
+ /* check it's the same frame buffer (within 256MB) */
+ if ((phys ^ (unsigned long)dispDeviceBase) & 0xf0000000)
+ return;
+
+ dispDeviceBase = (__u8 *) phys;
+ dispDeviceRect[0] = 0;
+ dispDeviceRect[1] = 0;
+ dispDeviceRect[2] = width;
+ dispDeviceRect[3] = height;
+ dispDeviceDepth = depth;
+ dispDeviceRowBytes = pitch;
+ if (boot_text_mapped) {
+ iounmap(logicalDisplayBase);
+ boot_text_mapped = 0;
+ }
+ map_boot_text();
+ g_loc_X = 0;
+ g_loc_Y = 0;
+ g_max_loc_X = width / 8;
+ g_max_loc_Y = height / 16;
+}
+EXPORT_SYMBOL(btext_update_display);
+
+void btext_clearscreen(void)
+{
+ unsigned int *base = (unsigned int *)calc_base(0, 0);
+ unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) *
+ (dispDeviceDepth >> 3)) >> 2;
+ int i,j;
+
+ for (i=0; i<(dispDeviceRect[3] - dispDeviceRect[1]); i++)
+ {
+ unsigned int *ptr = base;
+ for(j=width; j; --j)
+ *(ptr++) = 0;
+ base += (dispDeviceRowBytes >> 2);
+ }
+}
+
+void btext_flushscreen(void)
+{
+ unsigned int *base = (unsigned int *)calc_base(0, 0);
+ unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) *
+ (dispDeviceDepth >> 3)) >> 2;
+ int i,j;
+
+ for (i=0; i < (dispDeviceRect[3] - dispDeviceRect[1]); i++)
+ {
+ unsigned int *ptr = base;
+ for(j = width; j > 0; j -= 8) {
+ __asm__ __volatile__ ("dcbst 0,%0" :: "r" (ptr));
+ ptr += 8;
+ }
+ base += (dispDeviceRowBytes >> 2);
+ }
+ __asm__ __volatile__ ("sync" ::: "memory");
+}
+
+void btext_flushline(void)
+{
+ unsigned int *base = (unsigned int *)calc_base(0, g_loc_Y << 4);
+ unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) *
+ (dispDeviceDepth >> 3)) >> 2;
+ int i,j;
+
+ for (i=0; i < 16; i++)
+ {
+ unsigned int *ptr = base;
+ for(j = width; j > 0; j -= 8) {
+ __asm__ __volatile__ ("dcbst 0,%0" :: "r" (ptr));
+ ptr += 8;
+ }
+ base += (dispDeviceRowBytes >> 2);
+ }
+ __asm__ __volatile__ ("sync" ::: "memory");
+}
+
+
+#ifndef NO_SCROLL
+static void scrollscreen(void)
+{
+ unsigned int *src = (unsigned int *)calc_base(0,16);
+ unsigned int *dst = (unsigned int *)calc_base(0,0);
+ unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) *
+ (dispDeviceDepth >> 3)) >> 2;
+ int i,j;
+
+ for (i=0; i<(dispDeviceRect[3] - dispDeviceRect[1] - 16); i++)
+ {
+ unsigned int *src_ptr = src;
+ unsigned int *dst_ptr = dst;
+ for(j=width; j; --j)
+ *(dst_ptr++) = *(src_ptr++);
+ src += (dispDeviceRowBytes >> 2);
+ dst += (dispDeviceRowBytes >> 2);
+ }
+ for (i=0; i<16; i++)
+ {
+ unsigned int *dst_ptr = dst;
+ for(j=width; j; --j)
+ *(dst_ptr++) = 0;
+ dst += (dispDeviceRowBytes >> 2);
+ }
+}
+#endif /* ndef NO_SCROLL */
+
+void btext_drawchar(char c)
+{
+ int cline = 0;
+#ifdef NO_SCROLL
+ int x;
+#endif
+ if (!boot_text_mapped)
+ return;
+
+ switch (c) {
+ case '\b':
+ if (g_loc_X > 0)
+ --g_loc_X;
+ break;
+ case '\t':
+ g_loc_X = (g_loc_X & -8) + 8;
+ break;
+ case '\r':
+ g_loc_X = 0;
+ break;
+ case '\n':
+ g_loc_X = 0;
+ g_loc_Y++;
+ cline = 1;
+ break;
+ default:
+ draw_byte(c, g_loc_X++, g_loc_Y);
+ }
+ if (g_loc_X >= g_max_loc_X) {
+ g_loc_X = 0;
+ g_loc_Y++;
+ cline = 1;
+ }
+#ifndef NO_SCROLL
+ while (g_loc_Y >= g_max_loc_Y) {
+ scrollscreen();
+ g_loc_Y--;
+ }
+#else
+ /* wrap around from bottom to top of screen so we don't
+ waste time scrolling each line. -- paulus. */
+ if (g_loc_Y >= g_max_loc_Y)
+ g_loc_Y = 0;
+ if (cline) {
+ for (x = 0; x < g_max_loc_X; ++x)
+ draw_byte(' ', x, g_loc_Y);
+ }
+#endif
+}
+
+void btext_drawstring(const char *c)
+{
+ if (!boot_text_mapped)
+ return;
+ while (*c)
+ btext_drawchar(*c++);
+}
+
+void btext_drawtext(const char *c, unsigned int len)
+{
+ if (!boot_text_mapped)
+ return;
+ while (len--)
+ btext_drawchar(*c++);
+}
+
+void btext_drawhex(unsigned long v)
+{
+ if (!boot_text_mapped)
+ return;
+#ifdef CONFIG_PPC64
+ btext_drawchar(hex_asc_hi(v >> 56));
+ btext_drawchar(hex_asc_lo(v >> 56));
+ btext_drawchar(hex_asc_hi(v >> 48));
+ btext_drawchar(hex_asc_lo(v >> 48));
+ btext_drawchar(hex_asc_hi(v >> 40));
+ btext_drawchar(hex_asc_lo(v >> 40));
+ btext_drawchar(hex_asc_hi(v >> 32));
+ btext_drawchar(hex_asc_lo(v >> 32));
+#endif
+ btext_drawchar(hex_asc_hi(v >> 24));
+ btext_drawchar(hex_asc_lo(v >> 24));
+ btext_drawchar(hex_asc_hi(v >> 16));
+ btext_drawchar(hex_asc_lo(v >> 16));
+ btext_drawchar(hex_asc_hi(v >> 8));
+ btext_drawchar(hex_asc_lo(v >> 8));
+ btext_drawchar(hex_asc_hi(v));
+ btext_drawchar(hex_asc_lo(v));
+ btext_drawchar(' ');
+}
+
+static void draw_byte(unsigned char c, long locX, long locY)
+{
+ unsigned char *base = calc_base(locX << 3, locY << 4);
+ unsigned char *font = &vga_font[((unsigned int)c) * 16];
+ int rb = dispDeviceRowBytes;
+
+ switch(dispDeviceDepth) {
+ case 24:
+ case 32:
+ draw_byte_32(font, (unsigned int *)base, rb);
+ break;
+ case 15:
+ case 16:
+ draw_byte_16(font, (unsigned int *)base, rb);
+ break;
+ case 8:
+ draw_byte_8(font, (unsigned int *)base, rb);
+ break;
+ }
+}
+
+static unsigned int expand_bits_8[16] = {
+ 0x00000000,
+ 0x000000ff,
+ 0x0000ff00,
+ 0x0000ffff,
+ 0x00ff0000,
+ 0x00ff00ff,
+ 0x00ffff00,
+ 0x00ffffff,
+ 0xff000000,
+ 0xff0000ff,
+ 0xff00ff00,
+ 0xff00ffff,
+ 0xffff0000,
+ 0xffff00ff,
+ 0xffffff00,
+ 0xffffffff
+};
+
+static unsigned int expand_bits_16[4] = {
+ 0x00000000,
+ 0x0000ffff,
+ 0xffff0000,
+ 0xffffffff
+};
+
+
+static void draw_byte_32(unsigned char *font, unsigned int *base, int rb)
+{
+ int l, bits;
+ int fg = 0xFFFFFFFFUL;
+ int bg = 0x00000000UL;
+
+ for (l = 0; l < 16; ++l)
+ {
+ bits = *font++;
+ base[0] = (-(bits >> 7) & fg) ^ bg;
+ base[1] = (-((bits >> 6) & 1) & fg) ^ bg;
+ base[2] = (-((bits >> 5) & 1) & fg) ^ bg;
+ base[3] = (-((bits >> 4) & 1) & fg) ^ bg;
+ base[4] = (-((bits >> 3) & 1) & fg) ^ bg;
+ base[5] = (-((bits >> 2) & 1) & fg) ^ bg;
+ base[6] = (-((bits >> 1) & 1) & fg) ^ bg;
+ base[7] = (-(bits & 1) & fg) ^ bg;
+ base = (unsigned int *) ((char *)base + rb);
+ }
+}
+
+static void draw_byte_16(unsigned char *font, unsigned int *base, int rb)
+{
+ int l, bits;
+ int fg = 0xFFFFFFFFUL;
+ int bg = 0x00000000UL;
+ unsigned int *eb = (int *)expand_bits_16;
+
+ for (l = 0; l < 16; ++l)
+ {
+ bits = *font++;
+ base[0] = (eb[bits >> 6] & fg) ^ bg;
+ base[1] = (eb[(bits >> 4) & 3] & fg) ^ bg;
+ base[2] = (eb[(bits >> 2) & 3] & fg) ^ bg;
+ base[3] = (eb[bits & 3] & fg) ^ bg;
+ base = (unsigned int *) ((char *)base + rb);
+ }
+}
+
+static void draw_byte_8(unsigned char *font, unsigned int *base, int rb)
+{
+ int l, bits;
+ int fg = 0x0F0F0F0FUL;
+ int bg = 0x00000000UL;
+ unsigned int *eb = (int *)expand_bits_8;
+
+ for (l = 0; l < 16; ++l)
+ {
+ bits = *font++;
+ base[0] = (eb[bits >> 4] & fg) ^ bg;
+ base[1] = (eb[bits & 0xf] & fg) ^ bg;
+ base = (unsigned int *) ((char *)base + rb);
+ }
+}
+
+static unsigned char vga_font[cmapsz] = {
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x81, 0xa5, 0x81, 0x81, 0xbd,
+0x99, 0x81, 0x81, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xff,
+0xdb, 0xff, 0xff, 0xc3, 0xe7, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x6c, 0xfe, 0xfe, 0xfe, 0xfe, 0x7c, 0x38, 0x10,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x7c, 0xfe,
+0x7c, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18,
+0x3c, 0x3c, 0xe7, 0xe7, 0xe7, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x18, 0x3c, 0x7e, 0xff, 0xff, 0x7e, 0x18, 0x18, 0x3c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c,
+0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+0xff, 0xff, 0xe7, 0xc3, 0xc3, 0xe7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x42, 0x42, 0x66, 0x3c, 0x00,
+0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc3, 0x99, 0xbd,
+0xbd, 0x99, 0xc3, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x1e, 0x0e,
+0x1a, 0x32, 0x78, 0xcc, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x3c, 0x66, 0x66, 0x66, 0x66, 0x3c, 0x18, 0x7e, 0x18, 0x18,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x33, 0x3f, 0x30, 0x30, 0x30,
+0x30, 0x70, 0xf0, 0xe0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x63,
+0x7f, 0x63, 0x63, 0x63, 0x63, 0x67, 0xe7, 0xe6, 0xc0, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x18, 0x18, 0xdb, 0x3c, 0xe7, 0x3c, 0xdb, 0x18, 0x18,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfe, 0xf8,
+0xf0, 0xe0, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x06, 0x0e,
+0x1e, 0x3e, 0xfe, 0x3e, 0x1e, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66,
+0x66, 0x00, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0xdb,
+0xdb, 0xdb, 0x7b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x7c, 0xc6, 0x60, 0x38, 0x6c, 0xc6, 0xc6, 0x6c, 0x38, 0x0c, 0xc6,
+0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0xfe, 0xfe, 0xfe, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c,
+0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x7e, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x18, 0x0c, 0xfe, 0x0c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x60, 0xfe, 0x60, 0x30, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xc0,
+0xc0, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x24, 0x66, 0xff, 0x66, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x38, 0x7c, 0x7c, 0xfe, 0xfe, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xfe, 0x7c, 0x7c,
+0x38, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x24, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6c,
+0x6c, 0xfe, 0x6c, 0x6c, 0x6c, 0xfe, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00,
+0x18, 0x18, 0x7c, 0xc6, 0xc2, 0xc0, 0x7c, 0x06, 0x06, 0x86, 0xc6, 0x7c,
+0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, 0xc6, 0x0c, 0x18,
+0x30, 0x60, 0xc6, 0x86, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c,
+0x6c, 0x38, 0x76, 0xdc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x30, 0x30, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x30, 0x30, 0x30,
+0x30, 0x30, 0x18, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x18,
+0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x3c, 0xff, 0x3c, 0x66, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e,
+0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x02, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xce, 0xde, 0xf6, 0xe6, 0xc6, 0xc6, 0x7c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x38, 0x78, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6,
+0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x7c, 0xc6, 0x06, 0x06, 0x3c, 0x06, 0x06, 0x06, 0xc6, 0x7c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x1c, 0x3c, 0x6c, 0xcc, 0xfe,
+0x0c, 0x0c, 0x0c, 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0,
+0xc0, 0xc0, 0xfc, 0x06, 0x06, 0x06, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x38, 0x60, 0xc0, 0xc0, 0xfc, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0x06, 0x06, 0x0c, 0x18,
+0x30, 0x30, 0x30, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6,
+0xc6, 0xc6, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x06, 0x06, 0x0c, 0x78,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00,
+0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x18, 0x18, 0x00, 0x00, 0x00, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x06,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00,
+0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60,
+0x30, 0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x7c, 0xc6, 0xc6, 0x0c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xde, 0xde,
+0xde, 0xdc, 0xc0, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38,
+0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x66, 0x66, 0x66, 0x66, 0xfc,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0xc2, 0xc0, 0xc0, 0xc0,
+0xc0, 0xc2, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x6c,
+0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x6c, 0xf8, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, 0x60, 0x62, 0x66, 0xfe,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68,
+0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66,
+0xc2, 0xc0, 0xc0, 0xde, 0xc6, 0xc6, 0x66, 0x3a, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x0c,
+0x0c, 0x0c, 0x0c, 0x0c, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0xe6, 0x66, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0x66, 0xe6,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x60, 0x60, 0x60, 0x60, 0x60,
+0x60, 0x62, 0x66, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xe7,
+0xff, 0xff, 0xdb, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, 0xc6,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
+0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66,
+0x66, 0x66, 0x7c, 0x60, 0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xd6, 0xde, 0x7c,
+0x0c, 0x0e, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x6c,
+0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6,
+0xc6, 0x60, 0x38, 0x0c, 0x06, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0xff, 0xdb, 0x99, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
+0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3,
+0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x66,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x18,
+0x3c, 0x66, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3,
+0xc3, 0x66, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0xff, 0xc3, 0x86, 0x0c, 0x18, 0x30, 0x60, 0xc1, 0xc3, 0xff,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x30, 0x30, 0x30, 0x30, 0x30,
+0x30, 0x30, 0x30, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
+0xc0, 0xe0, 0x70, 0x38, 0x1c, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x3c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x3c,
+0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00,
+0x30, 0x30, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x0c, 0x7c,
+0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x60,
+0x60, 0x78, 0x6c, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc0, 0xc0, 0xc0, 0xc6, 0x7c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x0c, 0x0c, 0x3c, 0x6c, 0xcc,
+0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xf0,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xcc, 0xcc,
+0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0xcc, 0x78, 0x00, 0x00, 0x00, 0xe0, 0x60,
+0x60, 0x6c, 0x76, 0x66, 0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x18, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x06, 0x00, 0x0e, 0x06, 0x06,
+0x06, 0x06, 0x06, 0x06, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0xe0, 0x60,
+0x60, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, 0xff, 0xdb,
+0xdb, 0xdb, 0xdb, 0xdb, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x66, 0x66,
+0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x76, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0x0c, 0x1e, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x76, 0x66, 0x60, 0x60, 0x60, 0xf0,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0x60,
+0x38, 0x0c, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x30,
+0x30, 0xfc, 0x30, 0x30, 0x30, 0x30, 0x36, 0x1c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0xc3,
+0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0x3c, 0x66, 0xc3,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6,
+0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0xfe, 0xcc, 0x18, 0x30, 0x60, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x0e, 0x18, 0x18, 0x18, 0x70, 0x18, 0x18, 0x18, 0x18, 0x0e,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x00, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x18,
+0x18, 0x18, 0x0e, 0x18, 0x18, 0x18, 0x18, 0x70, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6,
+0xc6, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66,
+0xc2, 0xc0, 0xc0, 0xc0, 0xc2, 0x66, 0x3c, 0x0c, 0x06, 0x7c, 0x00, 0x00,
+0x00, 0x00, 0xcc, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x00, 0x7c, 0xc6, 0xfe,
+0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c,
+0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0xcc, 0x00, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0x78, 0x0c, 0x7c,
+0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38,
+0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x60, 0x60, 0x66, 0x3c, 0x0c, 0x06,
+0x3c, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xfe,
+0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00,
+0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x60, 0x30, 0x18, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x38, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, 0x66,
+0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x60, 0x30, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0xc6,
+0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38, 0x00,
+0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
+0x18, 0x30, 0x60, 0x00, 0xfe, 0x66, 0x60, 0x7c, 0x60, 0x60, 0x66, 0xfe,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6e, 0x3b, 0x1b,
+0x7e, 0xd8, 0xdc, 0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x6c,
+0xcc, 0xcc, 0xfe, 0xcc, 0xcc, 0xcc, 0xcc, 0xce, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x00, 0x7c, 0xc6, 0xc6,
+0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18,
+0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x30, 0x78, 0xcc, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0xcc, 0xcc, 0xcc,
+0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00,
+0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0x78, 0x00,
+0x00, 0xc6, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
+0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e,
+0xc3, 0xc0, 0xc0, 0xc0, 0xc3, 0x7e, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xe6, 0xfc,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0xff, 0x18,
+0xff, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66,
+0x7c, 0x62, 0x66, 0x6f, 0x66, 0x66, 0x66, 0xf3, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x0e, 0x1b, 0x18, 0x18, 0x18, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18,
+0xd8, 0x70, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0x78, 0x0c, 0x7c,
+0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30,
+0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x18, 0x30, 0x60, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0xcc, 0xcc, 0xcc,
+0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc,
+0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00,
+0x76, 0xdc, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x6c, 0x6c, 0x3e, 0x00, 0x7e, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c,
+0x38, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x30, 0x30, 0x00, 0x30, 0x30, 0x60, 0xc0, 0xc6, 0xc6, 0x7c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0,
+0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0xfe, 0x06, 0x06, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, 0x60, 0xce, 0x9b, 0x06,
+0x0c, 0x1f, 0x00, 0x00, 0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30,
+0x66, 0xce, 0x96, 0x3e, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18,
+0x00, 0x18, 0x18, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x6c, 0xd8, 0x6c, 0x36, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd8, 0x6c, 0x36,
+0x6c, 0xd8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x44, 0x11, 0x44,
+0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44,
+0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa,
+0x55, 0xaa, 0x55, 0xaa, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77,
+0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36,
+0x36, 0x36, 0x36, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x36, 0x36, 0x36, 0x36,
+0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x18, 0xf8,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36,
+0x36, 0xf6, 0x06, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x06, 0xf6,
+0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+0x36, 0xf6, 0x06, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xfe, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xff,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x37,
+0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+0x36, 0x37, 0x30, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36,
+0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xf7, 0x00, 0xff,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0xff, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36,
+0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xff,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36,
+0x36, 0xf7, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xff,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0xff, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x36, 0x36, 0x36, 0x36,
+0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x3f,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x1f, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f,
+0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+0x36, 0x36, 0x36, 0xff, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff,
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0, 0xf0, 0xf0, 0xf0,
+0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+0x0f, 0x0f, 0x0f, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x76, 0xdc, 0xd8, 0xd8, 0xd8, 0xdc, 0x76, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x78, 0xcc, 0xcc, 0xcc, 0xd8, 0xcc, 0xc6, 0xc6, 0xc6, 0xcc,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0xc6, 0xc0, 0xc0, 0xc0,
+0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0xfe, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0xfe, 0xc6, 0x60, 0x30, 0x18, 0x30, 0x60, 0xc6, 0xfe,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xd8, 0xd8,
+0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x66, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xc0, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x18, 0x3c, 0x66, 0x66,
+0x66, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38,
+0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0x6c, 0x38, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x38, 0x6c, 0xc6, 0xc6, 0xc6, 0x6c, 0x6c, 0x6c, 0x6c, 0xee,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x30, 0x18, 0x0c, 0x3e, 0x66,
+0x66, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x7e, 0xdb, 0xdb, 0xdb, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x03, 0x06, 0x7e, 0xdb, 0xdb, 0xf3, 0x7e, 0x60, 0xc0,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x30, 0x60, 0x60, 0x7c, 0x60,
+0x60, 0x60, 0x30, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c,
+0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, 0x18,
+0x18, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30,
+0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x00, 0x7e,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x1b, 0x1b, 0x1b, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x7e, 0x00, 0x18, 0x18, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x00,
+0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c,
+0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x0c, 0x0c,
+0x0c, 0x0c, 0x0c, 0xec, 0x6c, 0x6c, 0x3c, 0x1c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0xd8, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0xd8, 0x30, 0x60, 0xc8, 0xf8, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00,
+};
+
+void __init udbg_init_btext(void)
+{
+ /* If btext is enabled, we might have a BAT setup for early display,
+ * thus we do enable some very basic udbg output
+ */
+ udbg_putc = btext_drawchar;
+}
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
new file mode 100644
index 00000000..92c6b008
--- /dev/null
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -0,0 +1,838 @@
+/*
+ * Processor cache information made available to userspace via sysfs;
+ * intended to be compatible with x86 intel_cacheinfo implementation.
+ *
+ * Copyright 2008 IBM Corporation
+ * Author: Nathan Lynch
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/kobject.h>
+#include <linux/list.h>
+#include <linux/notifier.h>
+#include <linux/of.h>
+#include <linux/percpu.h>
+#include <linux/slab.h>
+#include <asm/prom.h>
+
+#include "cacheinfo.h"
+
+/* per-cpu object for tracking:
+ * - a "cache" kobject for the top-level directory
+ * - a list of "index" objects representing the cpu's local cache hierarchy
+ */
+struct cache_dir {
+ struct kobject *kobj; /* bare (not embedded) kobject for cache
+ * directory */
+ struct cache_index_dir *index; /* list of index objects */
+};
+
+/* "index" object: each cpu's cache directory has an index
+ * subdirectory corresponding to a cache object associated with the
+ * cpu. This object's lifetime is managed via the embedded kobject.
+ */
+struct cache_index_dir {
+ struct kobject kobj;
+ struct cache_index_dir *next; /* next index in parent directory */
+ struct cache *cache;
+};
+
+/* Template for determining which OF properties to query for a given
+ * cache type */
+struct cache_type_info {
+ const char *name;
+ const char *size_prop;
+
+ /* Allow for both [di]-cache-line-size and
+ * [di]-cache-block-size properties. According to the PowerPC
+ * Processor binding, -line-size should be provided if it
+ * differs from the cache block size (that which is operated
+ * on by cache instructions), so we look for -line-size first.
+ * See cache_get_line_size(). */
+
+ const char *line_size_props[2];
+ const char *nr_sets_prop;
+};
+
+/* These are used to index the cache_type_info array. */
+#define CACHE_TYPE_UNIFIED 0
+#define CACHE_TYPE_INSTRUCTION 1
+#define CACHE_TYPE_DATA 2
+
+static const struct cache_type_info cache_type_info[] = {
+ {
+ /* PowerPC Processor binding says the [di]-cache-*
+ * must be equal on unified caches, so just use
+ * d-cache properties. */
+ .name = "Unified",
+ .size_prop = "d-cache-size",
+ .line_size_props = { "d-cache-line-size",
+ "d-cache-block-size", },
+ .nr_sets_prop = "d-cache-sets",
+ },
+ {
+ .name = "Instruction",
+ .size_prop = "i-cache-size",
+ .line_size_props = { "i-cache-line-size",
+ "i-cache-block-size", },
+ .nr_sets_prop = "i-cache-sets",
+ },
+ {
+ .name = "Data",
+ .size_prop = "d-cache-size",
+ .line_size_props = { "d-cache-line-size",
+ "d-cache-block-size", },
+ .nr_sets_prop = "d-cache-sets",
+ },
+};
+
+/* Cache object: each instance of this corresponds to a distinct cache
+ * in the system. There are separate objects for Harvard caches: one
+ * each for instruction and data, and each refers to the same OF node.
+ * The refcount of the OF node is elevated for the lifetime of the
+ * cache object. A cache object is released when its shared_cpu_map
+ * is cleared (see cache_cpu_clear).
+ *
+ * A cache object is on two lists: an unsorted global list
+ * (cache_list) of cache objects; and a singly-linked list
+ * representing the local cache hierarchy, which is ordered by level
+ * (e.g. L1d -> L1i -> L2 -> L3).
+ */
+struct cache {
+ struct device_node *ofnode; /* OF node for this cache, may be cpu */
+ struct cpumask shared_cpu_map; /* online CPUs using this cache */
+ int type; /* split cache disambiguation */
+ int level; /* level not explicit in device tree */
+ struct list_head list; /* global list of cache objects */
+ struct cache *next_local; /* next cache of >= level */
+};
+
+static DEFINE_PER_CPU(struct cache_dir *, cache_dir_pcpu);
+
+/* traversal/modification of this list occurs only at cpu hotplug time;
+ * access is serialized by cpu hotplug locking
+ */
+static LIST_HEAD(cache_list);
+
+static struct cache_index_dir *kobj_to_cache_index_dir(struct kobject *k)
+{
+ return container_of(k, struct cache_index_dir, kobj);
+}
+
+static const char *cache_type_string(const struct cache *cache)
+{
+ return cache_type_info[cache->type].name;
+}
+
+static void __cpuinit cache_init(struct cache *cache, int type, int level, struct device_node *ofnode)
+{
+ cache->type = type;
+ cache->level = level;
+ cache->ofnode = of_node_get(ofnode);
+ INIT_LIST_HEAD(&cache->list);
+ list_add(&cache->list, &cache_list);
+}
+
+static struct cache *__cpuinit new_cache(int type, int level, struct device_node *ofnode)
+{
+ struct cache *cache;
+
+ cache = kzalloc(sizeof(*cache), GFP_KERNEL);
+ if (cache)
+ cache_init(cache, type, level, ofnode);
+
+ return cache;
+}
+
+static void release_cache_debugcheck(struct cache *cache)
+{
+ struct cache *iter;
+
+ list_for_each_entry(iter, &cache_list, list)
+ WARN_ONCE(iter->next_local == cache,
+ "cache for %s(%s) refers to cache for %s(%s)\n",
+ iter->ofnode->full_name,
+ cache_type_string(iter),
+ cache->ofnode->full_name,
+ cache_type_string(cache));
+}
+
+static void release_cache(struct cache *cache)
+{
+ if (!cache)
+ return;
+
+ pr_debug("freeing L%d %s cache for %s\n", cache->level,
+ cache_type_string(cache), cache->ofnode->full_name);
+
+ release_cache_debugcheck(cache);
+ list_del(&cache->list);
+ of_node_put(cache->ofnode);
+ kfree(cache);
+}
+
+static void cache_cpu_set(struct cache *cache, int cpu)
+{
+ struct cache *next = cache;
+
+ while (next) {
+ WARN_ONCE(cpumask_test_cpu(cpu, &next->shared_cpu_map),
+ "CPU %i already accounted in %s(%s)\n",
+ cpu, next->ofnode->full_name,
+ cache_type_string(next));
+ cpumask_set_cpu(cpu, &next->shared_cpu_map);
+ next = next->next_local;
+ }
+}
+
+static int cache_size(const struct cache *cache, unsigned int *ret)
+{
+ const char *propname;
+ const u32 *cache_size;
+
+ propname = cache_type_info[cache->type].size_prop;
+
+ cache_size = of_get_property(cache->ofnode, propname, NULL);
+ if (!cache_size)
+ return -ENODEV;
+
+ *ret = *cache_size;
+ return 0;
+}
+
+static int cache_size_kb(const struct cache *cache, unsigned int *ret)
+{
+ unsigned int size;
+
+ if (cache_size(cache, &size))
+ return -ENODEV;
+
+ *ret = size / 1024;
+ return 0;
+}
+
+/* not cache_line_size() because that's a macro in include/linux/cache.h */
+static int cache_get_line_size(const struct cache *cache, unsigned int *ret)
+{
+ const u32 *line_size;
+ int i, lim;
+
+ lim = ARRAY_SIZE(cache_type_info[cache->type].line_size_props);
+
+ for (i = 0; i < lim; i++) {
+ const char *propname;
+
+ propname = cache_type_info[cache->type].line_size_props[i];
+ line_size = of_get_property(cache->ofnode, propname, NULL);
+ if (line_size)
+ break;
+ }
+
+ if (!line_size)
+ return -ENODEV;
+
+ *ret = *line_size;
+ return 0;
+}
+
+static int cache_nr_sets(const struct cache *cache, unsigned int *ret)
+{
+ const char *propname;
+ const u32 *nr_sets;
+
+ propname = cache_type_info[cache->type].nr_sets_prop;
+
+ nr_sets = of_get_property(cache->ofnode, propname, NULL);
+ if (!nr_sets)
+ return -ENODEV;
+
+ *ret = *nr_sets;
+ return 0;
+}
+
+static int cache_associativity(const struct cache *cache, unsigned int *ret)
+{
+ unsigned int line_size;
+ unsigned int nr_sets;
+ unsigned int size;
+
+ if (cache_nr_sets(cache, &nr_sets))
+ goto err;
+
+ /* If the cache is fully associative, there is no need to
+ * check the other properties.
+ */
+ if (nr_sets == 1) {
+ *ret = 0;
+ return 0;
+ }
+
+ if (cache_get_line_size(cache, &line_size))
+ goto err;
+ if (cache_size(cache, &size))
+ goto err;
+
+ if (!(nr_sets > 0 && size > 0 && line_size > 0))
+ goto err;
+
+ *ret = (size / nr_sets) / line_size;
+ return 0;
+err:
+ return -ENODEV;
+}
+
+/* helper for dealing with split caches */
+static struct cache *cache_find_first_sibling(struct cache *cache)
+{
+ struct cache *iter;
+
+ if (cache->type == CACHE_TYPE_UNIFIED)
+ return cache;
+
+ list_for_each_entry(iter, &cache_list, list)
+ if (iter->ofnode == cache->ofnode && iter->next_local == cache)
+ return iter;
+
+ return cache;
+}
+
+/* return the first cache on a local list matching node */
+static struct cache *cache_lookup_by_node(const struct device_node *node)
+{
+ struct cache *cache = NULL;
+ struct cache *iter;
+
+ list_for_each_entry(iter, &cache_list, list) {
+ if (iter->ofnode != node)
+ continue;
+ cache = cache_find_first_sibling(iter);
+ break;
+ }
+
+ return cache;
+}
+
+static bool cache_node_is_unified(const struct device_node *np)
+{
+ return of_get_property(np, "cache-unified", NULL);
+}
+
+static struct cache *__cpuinit cache_do_one_devnode_unified(struct device_node *node, int level)
+{
+ struct cache *cache;
+
+ pr_debug("creating L%d ucache for %s\n", level, node->full_name);
+
+ cache = new_cache(CACHE_TYPE_UNIFIED, level, node);
+
+ return cache;
+}
+
+static struct cache *__cpuinit cache_do_one_devnode_split(struct device_node *node, int level)
+{
+ struct cache *dcache, *icache;
+
+ pr_debug("creating L%d dcache and icache for %s\n", level,
+ node->full_name);
+
+ dcache = new_cache(CACHE_TYPE_DATA, level, node);
+ icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node);
+
+ if (!dcache || !icache)
+ goto err;
+
+ dcache->next_local = icache;
+
+ return dcache;
+err:
+ release_cache(dcache);
+ release_cache(icache);
+ return NULL;
+}
+
+static struct cache *__cpuinit cache_do_one_devnode(struct device_node *node, int level)
+{
+ struct cache *cache;
+
+ if (cache_node_is_unified(node))
+ cache = cache_do_one_devnode_unified(node, level);
+ else
+ cache = cache_do_one_devnode_split(node, level);
+
+ return cache;
+}
+
+static struct cache *__cpuinit cache_lookup_or_instantiate(struct device_node *node, int level)
+{
+ struct cache *cache;
+
+ cache = cache_lookup_by_node(node);
+
+ WARN_ONCE(cache && cache->level != level,
+ "cache level mismatch on lookup (got %d, expected %d)\n",
+ cache->level, level);
+
+ if (!cache)
+ cache = cache_do_one_devnode(node, level);
+
+ return cache;
+}
+
+static void __cpuinit link_cache_lists(struct cache *smaller, struct cache *bigger)
+{
+ while (smaller->next_local) {
+ if (smaller->next_local == bigger)
+ return; /* already linked */
+ smaller = smaller->next_local;
+ }
+
+ smaller->next_local = bigger;
+}
+
+static void __cpuinit do_subsidiary_caches_debugcheck(struct cache *cache)
+{
+ WARN_ON_ONCE(cache->level != 1);
+ WARN_ON_ONCE(strcmp(cache->ofnode->type, "cpu"));
+}
+
+static void __cpuinit do_subsidiary_caches(struct cache *cache)
+{
+ struct device_node *subcache_node;
+ int level = cache->level;
+
+ do_subsidiary_caches_debugcheck(cache);
+
+ while ((subcache_node = of_find_next_cache_node(cache->ofnode))) {
+ struct cache *subcache;
+
+ level++;
+ subcache = cache_lookup_or_instantiate(subcache_node, level);
+ of_node_put(subcache_node);
+ if (!subcache)
+ break;
+
+ link_cache_lists(cache, subcache);
+ cache = subcache;
+ }
+}
+
+static struct cache *__cpuinit cache_chain_instantiate(unsigned int cpu_id)
+{
+ struct device_node *cpu_node;
+ struct cache *cpu_cache = NULL;
+
+ pr_debug("creating cache object(s) for CPU %i\n", cpu_id);
+
+ cpu_node = of_get_cpu_node(cpu_id, NULL);
+ WARN_ONCE(!cpu_node, "no OF node found for CPU %i\n", cpu_id);
+ if (!cpu_node)
+ goto out;
+
+ cpu_cache = cache_lookup_or_instantiate(cpu_node, 1);
+ if (!cpu_cache)
+ goto out;
+
+ do_subsidiary_caches(cpu_cache);
+
+ cache_cpu_set(cpu_cache, cpu_id);
+out:
+ of_node_put(cpu_node);
+
+ return cpu_cache;
+}
+
+static struct cache_dir *__cpuinit cacheinfo_create_cache_dir(unsigned int cpu_id)
+{
+ struct cache_dir *cache_dir;
+ struct device *dev;
+ struct kobject *kobj = NULL;
+
+ dev = get_cpu_device(cpu_id);
+ WARN_ONCE(!dev, "no dev for CPU %i\n", cpu_id);
+ if (!dev)
+ goto err;
+
+ kobj = kobject_create_and_add("cache", &dev->kobj);
+ if (!kobj)
+ goto err;
+
+ cache_dir = kzalloc(sizeof(*cache_dir), GFP_KERNEL);
+ if (!cache_dir)
+ goto err;
+
+ cache_dir->kobj = kobj;
+
+ WARN_ON_ONCE(per_cpu(cache_dir_pcpu, cpu_id) != NULL);
+
+ per_cpu(cache_dir_pcpu, cpu_id) = cache_dir;
+
+ return cache_dir;
+err:
+ kobject_put(kobj);
+ return NULL;
+}
+
+static void cache_index_release(struct kobject *kobj)
+{
+ struct cache_index_dir *index;
+
+ index = kobj_to_cache_index_dir(kobj);
+
+ pr_debug("freeing index directory for L%d %s cache\n",
+ index->cache->level, cache_type_string(index->cache));
+
+ kfree(index);
+}
+
+static ssize_t cache_index_show(struct kobject *k, struct attribute *attr, char *buf)
+{
+ struct kobj_attribute *kobj_attr;
+
+ kobj_attr = container_of(attr, struct kobj_attribute, attr);
+
+ return kobj_attr->show(k, kobj_attr, buf);
+}
+
+static struct cache *index_kobj_to_cache(struct kobject *k)
+{
+ struct cache_index_dir *index;
+
+ index = kobj_to_cache_index_dir(k);
+
+ return index->cache;
+}
+
+static ssize_t size_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+ unsigned int size_kb;
+ struct cache *cache;
+
+ cache = index_kobj_to_cache(k);
+
+ if (cache_size_kb(cache, &size_kb))
+ return -ENODEV;
+
+ return sprintf(buf, "%uK\n", size_kb);
+}
+
+static struct kobj_attribute cache_size_attr =
+ __ATTR(size, 0444, size_show, NULL);
+
+
+static ssize_t line_size_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+ unsigned int line_size;
+ struct cache *cache;
+
+ cache = index_kobj_to_cache(k);
+
+ if (cache_get_line_size(cache, &line_size))
+ return -ENODEV;
+
+ return sprintf(buf, "%u\n", line_size);
+}
+
+static struct kobj_attribute cache_line_size_attr =
+ __ATTR(coherency_line_size, 0444, line_size_show, NULL);
+
+static ssize_t nr_sets_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+ unsigned int nr_sets;
+ struct cache *cache;
+
+ cache = index_kobj_to_cache(k);
+
+ if (cache_nr_sets(cache, &nr_sets))
+ return -ENODEV;
+
+ return sprintf(buf, "%u\n", nr_sets);
+}
+
+static struct kobj_attribute cache_nr_sets_attr =
+ __ATTR(number_of_sets, 0444, nr_sets_show, NULL);
+
+static ssize_t associativity_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+ unsigned int associativity;
+ struct cache *cache;
+
+ cache = index_kobj_to_cache(k);
+
+ if (cache_associativity(cache, &associativity))
+ return -ENODEV;
+
+ return sprintf(buf, "%u\n", associativity);
+}
+
+static struct kobj_attribute cache_assoc_attr =
+ __ATTR(ways_of_associativity, 0444, associativity_show, NULL);
+
+static ssize_t type_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+ struct cache *cache;
+
+ cache = index_kobj_to_cache(k);
+
+ return sprintf(buf, "%s\n", cache_type_string(cache));
+}
+
+static struct kobj_attribute cache_type_attr =
+ __ATTR(type, 0444, type_show, NULL);
+
+static ssize_t level_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+ struct cache_index_dir *index;
+ struct cache *cache;
+
+ index = kobj_to_cache_index_dir(k);
+ cache = index->cache;
+
+ return sprintf(buf, "%d\n", cache->level);
+}
+
+static struct kobj_attribute cache_level_attr =
+ __ATTR(level, 0444, level_show, NULL);
+
+static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+ struct cache_index_dir *index;
+ struct cache *cache;
+ int len;
+ int n = 0;
+
+ index = kobj_to_cache_index_dir(k);
+ cache = index->cache;
+ len = PAGE_SIZE - 2;
+
+ if (len > 1) {
+ n = cpumask_scnprintf(buf, len, &cache->shared_cpu_map);
+ buf[n++] = '\n';
+ buf[n] = '\0';
+ }
+ return n;
+}
+
+static struct kobj_attribute cache_shared_cpu_map_attr =
+ __ATTR(shared_cpu_map, 0444, shared_cpu_map_show, NULL);
+
+/* Attributes which should always be created -- the kobject/sysfs core
+ * does this automatically via kobj_type->default_attrs. This is the
+ * minimum data required to uniquely identify a cache.
+ */
+static struct attribute *cache_index_default_attrs[] = {
+ &cache_type_attr.attr,
+ &cache_level_attr.attr,
+ &cache_shared_cpu_map_attr.attr,
+ NULL,
+};
+
+/* Attributes which should be created if the cache device node has the
+ * right properties -- see cacheinfo_create_index_opt_attrs
+ */
+static struct kobj_attribute *cache_index_opt_attrs[] = {
+ &cache_size_attr,
+ &cache_line_size_attr,
+ &cache_nr_sets_attr,
+ &cache_assoc_attr,
+};
+
+static const struct sysfs_ops cache_index_ops = {
+ .show = cache_index_show,
+};
+
+static struct kobj_type cache_index_type = {
+ .release = cache_index_release,
+ .sysfs_ops = &cache_index_ops,
+ .default_attrs = cache_index_default_attrs,
+};
+
+static void __cpuinit cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
+{
+ const char *cache_name;
+ const char *cache_type;
+ struct cache *cache;
+ char *buf;
+ int i;
+
+ buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!buf)
+ return;
+
+ cache = dir->cache;
+ cache_name = cache->ofnode->full_name;
+ cache_type = cache_type_string(cache);
+
+ /* We don't want to create an attribute that can't provide a
+ * meaningful value. Check the return value of each optional
+ * attribute's ->show method before registering the
+ * attribute.
+ */
+ for (i = 0; i < ARRAY_SIZE(cache_index_opt_attrs); i++) {
+ struct kobj_attribute *attr;
+ ssize_t rc;
+
+ attr = cache_index_opt_attrs[i];
+
+ rc = attr->show(&dir->kobj, attr, buf);
+ if (rc <= 0) {
+ pr_debug("not creating %s attribute for "
+ "%s(%s) (rc = %zd)\n",
+ attr->attr.name, cache_name,
+ cache_type, rc);
+ continue;
+ }
+ if (sysfs_create_file(&dir->kobj, &attr->attr))
+ pr_debug("could not create %s attribute for %s(%s)\n",
+ attr->attr.name, cache_name, cache_type);
+ }
+
+ kfree(buf);
+}
+
+static void __cpuinit cacheinfo_create_index_dir(struct cache *cache, int index, struct cache_dir *cache_dir)
+{
+ struct cache_index_dir *index_dir;
+ int rc;
+
+ index_dir = kzalloc(sizeof(*index_dir), GFP_KERNEL);
+ if (!index_dir)
+ goto err;
+
+ index_dir->cache = cache;
+
+ rc = kobject_init_and_add(&index_dir->kobj, &cache_index_type,
+ cache_dir->kobj, "index%d", index);
+ if (rc)
+ goto err;
+
+ index_dir->next = cache_dir->index;
+ cache_dir->index = index_dir;
+
+ cacheinfo_create_index_opt_attrs(index_dir);
+
+ return;
+err:
+ kfree(index_dir);
+}
+
+static void __cpuinit cacheinfo_sysfs_populate(unsigned int cpu_id, struct cache *cache_list)
+{
+ struct cache_dir *cache_dir;
+ struct cache *cache;
+ int index = 0;
+
+ cache_dir = cacheinfo_create_cache_dir(cpu_id);
+ if (!cache_dir)
+ return;
+
+ cache = cache_list;
+ while (cache) {
+ cacheinfo_create_index_dir(cache, index, cache_dir);
+ index++;
+ cache = cache->next_local;
+ }
+}
+
+void __cpuinit cacheinfo_cpu_online(unsigned int cpu_id)
+{
+ struct cache *cache;
+
+ cache = cache_chain_instantiate(cpu_id);
+ if (!cache)
+ return;
+
+ cacheinfo_sysfs_populate(cpu_id, cache);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU /* functions needed for cpu offline */
+
+static struct cache *cache_lookup_by_cpu(unsigned int cpu_id)
+{
+ struct device_node *cpu_node;
+ struct cache *cache;
+
+ cpu_node = of_get_cpu_node(cpu_id, NULL);
+ WARN_ONCE(!cpu_node, "no OF node found for CPU %i\n", cpu_id);
+ if (!cpu_node)
+ return NULL;
+
+ cache = cache_lookup_by_node(cpu_node);
+ of_node_put(cpu_node);
+
+ return cache;
+}
+
+static void remove_index_dirs(struct cache_dir *cache_dir)
+{
+ struct cache_index_dir *index;
+
+ index = cache_dir->index;
+
+ while (index) {
+ struct cache_index_dir *next;
+
+ next = index->next;
+ kobject_put(&index->kobj);
+ index = next;
+ }
+}
+
+static void remove_cache_dir(struct cache_dir *cache_dir)
+{
+ remove_index_dirs(cache_dir);
+
+ kobject_put(cache_dir->kobj);
+
+ kfree(cache_dir);
+}
+
+static void cache_cpu_clear(struct cache *cache, int cpu)
+{
+ while (cache) {
+ struct cache *next = cache->next_local;
+
+ WARN_ONCE(!cpumask_test_cpu(cpu, &cache->shared_cpu_map),
+ "CPU %i not accounted in %s(%s)\n",
+ cpu, cache->ofnode->full_name,
+ cache_type_string(cache));
+
+ cpumask_clear_cpu(cpu, &cache->shared_cpu_map);
+
+ /* Release the cache object if all the cpus using it
+ * are offline */
+ if (cpumask_empty(&cache->shared_cpu_map))
+ release_cache(cache);
+
+ cache = next;
+ }
+}
+
+void cacheinfo_cpu_offline(unsigned int cpu_id)
+{
+ struct cache_dir *cache_dir;
+ struct cache *cache;
+
+ /* Prevent userspace from seeing inconsistent state - remove
+ * the sysfs hierarchy first */
+ cache_dir = per_cpu(cache_dir_pcpu, cpu_id);
+
+ /* careful, sysfs population may have failed */
+ if (cache_dir)
+ remove_cache_dir(cache_dir);
+
+ per_cpu(cache_dir_pcpu, cpu_id) = NULL;
+
+ /* clear the CPU's bit in its cache chain, possibly freeing
+ * cache objects */
+ cache = cache_lookup_by_cpu(cpu_id);
+ if (cache)
+ cache_cpu_clear(cache, cpu_id);
+}
+#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/arch/powerpc/kernel/cacheinfo.h b/arch/powerpc/kernel/cacheinfo.h
new file mode 100644
index 00000000..a7b74d36
--- /dev/null
+++ b/arch/powerpc/kernel/cacheinfo.h
@@ -0,0 +1,8 @@
+#ifndef _PPC_CACHEINFO_H
+#define _PPC_CACHEINFO_H
+
+/* These are just hooks for sysfs.c to use. */
+extern void cacheinfo_cpu_online(unsigned int cpu_id);
+extern void cacheinfo_cpu_offline(unsigned int cpu_id);
+
+#endif /* _PPC_CACHEINFO_H */
diff --git a/arch/powerpc/kernel/clock.c b/arch/powerpc/kernel/clock.c
new file mode 100644
index 00000000..a764b477
--- /dev/null
+++ b/arch/powerpc/kernel/clock.c
@@ -0,0 +1,82 @@
+/*
+ * Dummy clk implementations for powerpc.
+ * These need to be overridden in platform code.
+ */
+
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <asm/clk_interface.h>
+
+struct clk_interface clk_functions;
+
+struct clk *clk_get(struct device *dev, const char *id)
+{
+ if (clk_functions.clk_get)
+ return clk_functions.clk_get(dev, id);
+ return ERR_PTR(-ENOSYS);
+}
+EXPORT_SYMBOL(clk_get);
+
+void clk_put(struct clk *clk)
+{
+ if (clk_functions.clk_put)
+ clk_functions.clk_put(clk);
+}
+EXPORT_SYMBOL(clk_put);
+
+int clk_enable(struct clk *clk)
+{
+ if (clk_functions.clk_enable)
+ return clk_functions.clk_enable(clk);
+ return -ENOSYS;
+}
+EXPORT_SYMBOL(clk_enable);
+
+void clk_disable(struct clk *clk)
+{
+ if (clk_functions.clk_disable)
+ clk_functions.clk_disable(clk);
+}
+EXPORT_SYMBOL(clk_disable);
+
+unsigned long clk_get_rate(struct clk *clk)
+{
+ if (clk_functions.clk_get_rate)
+ return clk_functions.clk_get_rate(clk);
+ return 0;
+}
+EXPORT_SYMBOL(clk_get_rate);
+
+long clk_round_rate(struct clk *clk, unsigned long rate)
+{
+ if (clk_functions.clk_round_rate)
+ return clk_functions.clk_round_rate(clk, rate);
+ return -ENOSYS;
+}
+EXPORT_SYMBOL(clk_round_rate);
+
+int clk_set_rate(struct clk *clk, unsigned long rate)
+{
+ if (clk_functions.clk_set_rate)
+ return clk_functions.clk_set_rate(clk, rate);
+ return -ENOSYS;
+}
+EXPORT_SYMBOL(clk_set_rate);
+
+struct clk *clk_get_parent(struct clk *clk)
+{
+ if (clk_functions.clk_get_parent)
+ return clk_functions.clk_get_parent(clk);
+ return ERR_PTR(-ENOSYS);
+}
+EXPORT_SYMBOL(clk_get_parent);
+
+int clk_set_parent(struct clk *clk, struct clk *parent)
+{
+ if (clk_functions.clk_set_parent)
+ return clk_functions.clk_set_parent(clk, parent);
+ return -ENOSYS;
+}
+EXPORT_SYMBOL(clk_set_parent);
diff --git a/arch/powerpc/kernel/compat_audit.c b/arch/powerpc/kernel/compat_audit.c
new file mode 100644
index 00000000..108ff14e
--- /dev/null
+++ b/arch/powerpc/kernel/compat_audit.c
@@ -0,0 +1,43 @@
+#undef __powerpc64__
+#include <asm/unistd.h>
+
+unsigned ppc32_dir_class[] = {
+#include <asm-generic/audit_dir_write.h>
+~0U
+};
+
+unsigned ppc32_chattr_class[] = {
+#include <asm-generic/audit_change_attr.h>
+~0U
+};
+
+unsigned ppc32_write_class[] = {
+#include <asm-generic/audit_write.h>
+~0U
+};
+
+unsigned ppc32_read_class[] = {
+#include <asm-generic/audit_read.h>
+~0U
+};
+
+unsigned ppc32_signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
+int ppc32_classify_syscall(unsigned syscall)
+{
+ switch(syscall) {
+ case __NR_open:
+ return 2;
+ case __NR_openat:
+ return 3;
+ case __NR_socketcall:
+ return 4;
+ case __NR_execve:
+ return 5;
+ default:
+ return 1;
+ }
+}
diff --git a/arch/powerpc/kernel/cpu_setup_44x.S b/arch/powerpc/kernel/cpu_setup_44x.S
new file mode 100644
index 00000000..e32b4a9a
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_44x.S
@@ -0,0 +1,74 @@
+/*
+ * This file contains low level CPU setup functions.
+ * Valentine Barshak <vbarshak@ru.mvista.com>
+ * MontaVista Software, Inc (c) 2007
+ *
+ * Based on cpu_setup_6xx code by
+ * Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+
+_GLOBAL(__setup_cpu_440ep)
+ b __init_fpu_44x
+_GLOBAL(__setup_cpu_440epx)
+ mflr r4
+ bl __init_fpu_44x
+ bl __plb_disable_wrp
+ bl __fixup_440A_mcheck
+ mtlr r4
+ blr
+_GLOBAL(__setup_cpu_440grx)
+ mflr r4
+ bl __plb_disable_wrp
+ bl __fixup_440A_mcheck
+ mtlr r4
+ blr
+_GLOBAL(__setup_cpu_460ex)
+_GLOBAL(__setup_cpu_460gt)
+_GLOBAL(__setup_cpu_460sx)
+_GLOBAL(__setup_cpu_apm821xx)
+ mflr r4
+ bl __init_fpu_44x
+ bl __fixup_440A_mcheck
+ mtlr r4
+ blr
+
+_GLOBAL(__setup_cpu_440x5)
+_GLOBAL(__setup_cpu_440gx)
+_GLOBAL(__setup_cpu_440spe)
+ b __fixup_440A_mcheck
+
+/* enable APU between CPU and FPU */
+_GLOBAL(__init_fpu_44x)
+ mfspr r3,SPRN_CCR0
+ /* Clear DAPUIB flag in CCR0 */
+ rlwinm r3,r3,0,12,10
+ mtspr SPRN_CCR0,r3
+ isync
+ blr
+
+/*
+ * Workaround for the incorrect write to DDR SDRAM errata.
+ * The write address can be corrupted during writes to
+ * DDR SDRAM when write pipelining is enabled on PLB0.
+ * Disable write pipelining here.
+ */
+#define DCRN_PLB4A0_ACR 0x81
+
+_GLOBAL(__plb_disable_wrp)
+ mfdcr r3,DCRN_PLB4A0_ACR
+ /* clear WRP bit in PLB4A0_ACR */
+ rlwinm r3,r3,0,8,6
+ mtdcr DCRN_PLB4A0_ACR,r3
+ isync
+ blr
+
diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S
new file mode 100644
index 00000000..f8cd9fba
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_6xx.S
@@ -0,0 +1,489 @@
+/*
+ * This file contains low level CPU setup functions.
+ * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cache.h>
+#include <asm/mmu.h>
+
+_GLOBAL(__setup_cpu_603)
+ mflr r5
+BEGIN_MMU_FTR_SECTION
+ li r10,0
+ mtspr SPRN_SPRG_603_LRU,r10 /* init SW LRU tracking */
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
+BEGIN_FTR_SECTION
+ bl __init_fpu_registers
+END_FTR_SECTION_IFCLR(CPU_FTR_FPU_UNAVAILABLE)
+ bl setup_common_caches
+ mtlr r5
+ blr
+_GLOBAL(__setup_cpu_604)
+ mflr r5
+ bl setup_common_caches
+ bl setup_604_hid0
+ mtlr r5
+ blr
+_GLOBAL(__setup_cpu_750)
+ mflr r5
+ bl __init_fpu_registers
+ bl setup_common_caches
+ bl setup_750_7400_hid0
+ mtlr r5
+ blr
+_GLOBAL(__setup_cpu_750cx)
+ mflr r5
+ bl __init_fpu_registers
+ bl setup_common_caches
+ bl setup_750_7400_hid0
+ bl setup_750cx
+ mtlr r5
+ blr
+_GLOBAL(__setup_cpu_750fx)
+ mflr r5
+ bl __init_fpu_registers
+ bl setup_common_caches
+ bl setup_750_7400_hid0
+ bl setup_750fx
+ mtlr r5
+ blr
+_GLOBAL(__setup_cpu_7400)
+ mflr r5
+ bl __init_fpu_registers
+ bl setup_7400_workarounds
+ bl setup_common_caches
+ bl setup_750_7400_hid0
+ mtlr r5
+ blr
+_GLOBAL(__setup_cpu_7410)
+ mflr r5
+ bl __init_fpu_registers
+ bl setup_7410_workarounds
+ bl setup_common_caches
+ bl setup_750_7400_hid0
+ li r3,0
+ mtspr SPRN_L2CR2,r3
+ mtlr r5
+ blr
+_GLOBAL(__setup_cpu_745x)
+ mflr r5
+ bl setup_common_caches
+ bl setup_745x_specifics
+ mtlr r5
+ blr
+
+/* Enable caches for 603's, 604, 750 & 7400 */
+setup_common_caches:
+ mfspr r11,SPRN_HID0
+ andi. r0,r11,HID0_DCE
+ ori r11,r11,HID0_ICE|HID0_DCE
+ ori r8,r11,HID0_ICFI
+ bne 1f /* don't invalidate the D-cache */
+ ori r8,r8,HID0_DCI /* unless it wasn't enabled */
+1: sync
+ mtspr SPRN_HID0,r8 /* enable and invalidate caches */
+ sync
+ mtspr SPRN_HID0,r11 /* enable caches */
+ sync
+ isync
+ blr
+
+/* 604, 604e, 604ev, ...
+ * Enable superscalar execution & branch history table
+ */
+setup_604_hid0:
+ mfspr r11,SPRN_HID0
+ ori r11,r11,HID0_SIED|HID0_BHTE
+ ori r8,r11,HID0_BTCD
+ sync
+ mtspr SPRN_HID0,r8 /* flush branch target address cache */
+ sync /* on 604e/604r */
+ mtspr SPRN_HID0,r11
+ sync
+ isync
+ blr
+
+/* 7400 <= rev 2.7 and 7410 rev = 1.0 suffer from some
+ * erratas we work around here.
+ * Moto MPC710CE.pdf describes them, those are errata
+ * #3, #4 and #5
+ * Note that we assume the firmware didn't choose to
+ * apply other workarounds (there are other ones documented
+ * in the .pdf). It appear that Apple firmware only works
+ * around #3 and with the same fix we use. We may want to
+ * check if the CPU is using 60x bus mode in which case
+ * the workaround for errata #4 is useless. Also, we may
+ * want to explicitly clear HID0_NOPDST as this is not
+ * needed once we have applied workaround #5 (though it's
+ * not set by Apple's firmware at least).
+ */
+setup_7400_workarounds:
+ mfpvr r3
+ rlwinm r3,r3,0,20,31
+ cmpwi 0,r3,0x0207
+ ble 1f
+ blr
+setup_7410_workarounds:
+ mfpvr r3
+ rlwinm r3,r3,0,20,31
+ cmpwi 0,r3,0x0100
+ bnelr
+1:
+ mfspr r11,SPRN_MSSSR0
+ /* Errata #3: Set L1OPQ_SIZE to 0x10 */
+ rlwinm r11,r11,0,9,6
+ oris r11,r11,0x0100
+ /* Errata #4: Set L2MQ_SIZE to 1 (check for MPX mode first ?) */
+ oris r11,r11,0x0002
+ /* Errata #5: Set DRLT_SIZE to 0x01 */
+ rlwinm r11,r11,0,5,2
+ oris r11,r11,0x0800
+ sync
+ mtspr SPRN_MSSSR0,r11
+ sync
+ isync
+ blr
+
+/* 740/750/7400/7410
+ * Enable Store Gathering (SGE), Address Brodcast (ABE),
+ * Branch History Table (BHTE), Branch Target ICache (BTIC)
+ * Dynamic Power Management (DPM), Speculative (SPD)
+ * Clear Instruction cache throttling (ICTC)
+ */
+setup_750_7400_hid0:
+ mfspr r11,SPRN_HID0
+ ori r11,r11,HID0_SGE | HID0_ABE | HID0_BHTE | HID0_BTIC
+ oris r11,r11,HID0_DPM@h
+BEGIN_FTR_SECTION
+ xori r11,r11,HID0_BTIC
+END_FTR_SECTION_IFSET(CPU_FTR_NO_BTIC)
+BEGIN_FTR_SECTION
+ xoris r11,r11,HID0_DPM@h /* disable dynamic power mgmt */
+END_FTR_SECTION_IFSET(CPU_FTR_NO_DPM)
+ li r3,HID0_SPD
+ andc r11,r11,r3 /* clear SPD: enable speculative */
+ li r3,0
+ mtspr SPRN_ICTC,r3 /* Instruction Cache Throttling off */
+ isync
+ mtspr SPRN_HID0,r11
+ sync
+ isync
+ blr
+
+/* 750cx specific
+ * Looks like we have to disable NAP feature for some PLL settings...
+ * (waiting for confirmation)
+ */
+setup_750cx:
+ mfspr r10, SPRN_HID1
+ rlwinm r10,r10,4,28,31
+ cmpwi cr0,r10,7
+ cmpwi cr1,r10,9
+ cmpwi cr2,r10,11
+ cror 4*cr0+eq,4*cr0+eq,4*cr1+eq
+ cror 4*cr0+eq,4*cr0+eq,4*cr2+eq
+ bnelr
+ lwz r6,CPU_SPEC_FEATURES(r4)
+ li r7,CPU_FTR_CAN_NAP
+ andc r6,r6,r7
+ stw r6,CPU_SPEC_FEATURES(r4)
+ blr
+
+/* 750fx specific
+ */
+setup_750fx:
+ blr
+
+/* MPC 745x
+ * Enable Store Gathering (SGE), Branch Folding (FOLD)
+ * Branch History Table (BHTE), Branch Target ICache (BTIC)
+ * Dynamic Power Management (DPM), Speculative (SPD)
+ * Ensure our data cache instructions really operate.
+ * Timebase has to be running or we wouldn't have made it here,
+ * just ensure we don't disable it.
+ * Clear Instruction cache throttling (ICTC)
+ * Enable L2 HW prefetch
+ */
+setup_745x_specifics:
+ /* We check for the presence of an L3 cache setup by
+ * the firmware. If any, we disable NAP capability as
+ * it's known to be bogus on rev 2.1 and earlier
+ */
+BEGIN_FTR_SECTION
+ mfspr r11,SPRN_L3CR
+ andis. r11,r11,L3CR_L3E@h
+ beq 1f
+END_FTR_SECTION_IFSET(CPU_FTR_L3CR)
+ lwz r6,CPU_SPEC_FEATURES(r4)
+ andi. r0,r6,CPU_FTR_L3_DISABLE_NAP
+ beq 1f
+ li r7,CPU_FTR_CAN_NAP
+ andc r6,r6,r7
+ stw r6,CPU_SPEC_FEATURES(r4)
+1:
+ mfspr r11,SPRN_HID0
+
+ /* All of the bits we have to set.....
+ */
+ ori r11,r11,HID0_SGE | HID0_FOLD | HID0_BHTE
+ ori r11,r11,HID0_LRSTK | HID0_BTIC
+ oris r11,r11,HID0_DPM@h
+BEGIN_MMU_FTR_SECTION
+ oris r11,r11,HID0_HIGH_BAT@h
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+BEGIN_FTR_SECTION
+ xori r11,r11,HID0_BTIC
+END_FTR_SECTION_IFSET(CPU_FTR_NO_BTIC)
+BEGIN_FTR_SECTION
+ xoris r11,r11,HID0_DPM@h /* disable dynamic power mgmt */
+END_FTR_SECTION_IFSET(CPU_FTR_NO_DPM)
+
+ /* All of the bits we have to clear....
+ */
+ li r3,HID0_SPD | HID0_NOPDST | HID0_NOPTI
+ andc r11,r11,r3 /* clear SPD: enable speculative */
+ li r3,0
+
+ mtspr SPRN_ICTC,r3 /* Instruction Cache Throttling off */
+ isync
+ mtspr SPRN_HID0,r11
+ sync
+ isync
+
+ /* Enable L2 HW prefetch, if L2 is enabled
+ */
+ mfspr r3,SPRN_L2CR
+ andis. r3,r3,L2CR_L2E@h
+ beqlr
+ mfspr r3,SPRN_MSSCR0
+ ori r3,r3,3
+ sync
+ mtspr SPRN_MSSCR0,r3
+ sync
+ isync
+ blr
+
+/*
+ * Initialize the FPU registers. This is needed to work around an errata
+ * in some 750 cpus where using a not yet initialized FPU register after
+ * power on reset may hang the CPU
+ */
+_GLOBAL(__init_fpu_registers)
+ mfmsr r10
+ ori r11,r10,MSR_FP
+ mtmsr r11
+ isync
+ addis r9,r3,empty_zero_page@ha
+ addi r9,r9,empty_zero_page@l
+ REST_32FPRS(0,r9)
+ sync
+ mtmsr r10
+ isync
+ blr
+
+
+/* Definitions for the table use to save CPU states */
+#define CS_HID0 0
+#define CS_HID1 4
+#define CS_HID2 8
+#define CS_MSSCR0 12
+#define CS_MSSSR0 16
+#define CS_ICTRL 20
+#define CS_LDSTCR 24
+#define CS_LDSTDB 28
+#define CS_SIZE 32
+
+ .data
+ .balign L1_CACHE_BYTES
+cpu_state_storage:
+ .space CS_SIZE
+ .balign L1_CACHE_BYTES,0
+ .text
+
+/* Called in normal context to backup CPU 0 state. This
+ * does not include cache settings. This function is also
+ * called for machine sleep. This does not include the MMU
+ * setup, BATs, etc... but rather the "special" registers
+ * like HID0, HID1, MSSCR0, etc...
+ */
+_GLOBAL(__save_cpu_setup)
+ /* Some CR fields are volatile, we back it up all */
+ mfcr r7
+
+ /* Get storage ptr */
+ lis r5,cpu_state_storage@h
+ ori r5,r5,cpu_state_storage@l
+
+ /* Save HID0 (common to all CONFIG_6xx cpus) */
+ mfspr r3,SPRN_HID0
+ stw r3,CS_HID0(r5)
+
+ /* Now deal with CPU type dependent registers */
+ mfspr r3,SPRN_PVR
+ srwi r3,r3,16
+ cmplwi cr0,r3,0x8000 /* 7450 */
+ cmplwi cr1,r3,0x000c /* 7400 */
+ cmplwi cr2,r3,0x800c /* 7410 */
+ cmplwi cr3,r3,0x8001 /* 7455 */
+ cmplwi cr4,r3,0x8002 /* 7457 */
+ cmplwi cr5,r3,0x8003 /* 7447A */
+ cmplwi cr6,r3,0x7000 /* 750FX */
+ cmplwi cr7,r3,0x8004 /* 7448 */
+ /* cr1 is 7400 || 7410 */
+ cror 4*cr1+eq,4*cr1+eq,4*cr2+eq
+ /* cr0 is 74xx */
+ cror 4*cr0+eq,4*cr0+eq,4*cr3+eq
+ cror 4*cr0+eq,4*cr0+eq,4*cr4+eq
+ cror 4*cr0+eq,4*cr0+eq,4*cr1+eq
+ cror 4*cr0+eq,4*cr0+eq,4*cr5+eq
+ cror 4*cr0+eq,4*cr0+eq,4*cr7+eq
+ bne 1f
+ /* Backup 74xx specific regs */
+ mfspr r4,SPRN_MSSCR0
+ stw r4,CS_MSSCR0(r5)
+ mfspr r4,SPRN_MSSSR0
+ stw r4,CS_MSSSR0(r5)
+ beq cr1,1f
+ /* Backup 745x specific registers */
+ mfspr r4,SPRN_HID1
+ stw r4,CS_HID1(r5)
+ mfspr r4,SPRN_ICTRL
+ stw r4,CS_ICTRL(r5)
+ mfspr r4,SPRN_LDSTCR
+ stw r4,CS_LDSTCR(r5)
+ mfspr r4,SPRN_LDSTDB
+ stw r4,CS_LDSTDB(r5)
+1:
+ bne cr6,1f
+ /* Backup 750FX specific registers */
+ mfspr r4,SPRN_HID1
+ stw r4,CS_HID1(r5)
+ /* If rev 2.x, backup HID2 */
+ mfspr r3,SPRN_PVR
+ andi. r3,r3,0xff00
+ cmpwi cr0,r3,0x0200
+ bne 1f
+ mfspr r4,SPRN_HID2
+ stw r4,CS_HID2(r5)
+1:
+ mtcr r7
+ blr
+
+/* Called with no MMU context (typically MSR:IR/DR off) to
+ * restore CPU state as backed up by the previous
+ * function. This does not include cache setting
+ */
+_GLOBAL(__restore_cpu_setup)
+ /* Some CR fields are volatile, we back it up all */
+ mfcr r7
+
+ /* Get storage ptr */
+ lis r5,(cpu_state_storage-KERNELBASE)@h
+ ori r5,r5,cpu_state_storage@l
+
+ /* Restore HID0 */
+ lwz r3,CS_HID0(r5)
+ sync
+ isync
+ mtspr SPRN_HID0,r3
+ sync
+ isync
+
+ /* Now deal with CPU type dependent registers */
+ mfspr r3,SPRN_PVR
+ srwi r3,r3,16
+ cmplwi cr0,r3,0x8000 /* 7450 */
+ cmplwi cr1,r3,0x000c /* 7400 */
+ cmplwi cr2,r3,0x800c /* 7410 */
+ cmplwi cr3,r3,0x8001 /* 7455 */
+ cmplwi cr4,r3,0x8002 /* 7457 */
+ cmplwi cr5,r3,0x8003 /* 7447A */
+ cmplwi cr6,r3,0x7000 /* 750FX */
+ cmplwi cr7,r3,0x8004 /* 7448 */
+ /* cr1 is 7400 || 7410 */
+ cror 4*cr1+eq,4*cr1+eq,4*cr2+eq
+ /* cr0 is 74xx */
+ cror 4*cr0+eq,4*cr0+eq,4*cr3+eq
+ cror 4*cr0+eq,4*cr0+eq,4*cr4+eq
+ cror 4*cr0+eq,4*cr0+eq,4*cr1+eq
+ cror 4*cr0+eq,4*cr0+eq,4*cr5+eq
+ cror 4*cr0+eq,4*cr0+eq,4*cr7+eq
+ bne 2f
+ /* Restore 74xx specific regs */
+ lwz r4,CS_MSSCR0(r5)
+ sync
+ mtspr SPRN_MSSCR0,r4
+ sync
+ isync
+ lwz r4,CS_MSSSR0(r5)
+ sync
+ mtspr SPRN_MSSSR0,r4
+ sync
+ isync
+ bne cr2,1f
+ /* Clear 7410 L2CR2 */
+ li r4,0
+ mtspr SPRN_L2CR2,r4
+1: beq cr1,2f
+ /* Restore 745x specific registers */
+ lwz r4,CS_HID1(r5)
+ sync
+ mtspr SPRN_HID1,r4
+ isync
+ sync
+ lwz r4,CS_ICTRL(r5)
+ sync
+ mtspr SPRN_ICTRL,r4
+ isync
+ sync
+ lwz r4,CS_LDSTCR(r5)
+ sync
+ mtspr SPRN_LDSTCR,r4
+ isync
+ sync
+ lwz r4,CS_LDSTDB(r5)
+ sync
+ mtspr SPRN_LDSTDB,r4
+ isync
+ sync
+2: bne cr6,1f
+ /* Restore 750FX specific registers
+ * that is restore HID2 on rev 2.x and PLL config & switch
+ * to PLL 0 on all
+ */
+ /* If rev 2.x, restore HID2 with low voltage bit cleared */
+ mfspr r3,SPRN_PVR
+ andi. r3,r3,0xff00
+ cmpwi cr0,r3,0x0200
+ bne 4f
+ lwz r4,CS_HID2(r5)
+ rlwinm r4,r4,0,19,17
+ mtspr SPRN_HID2,r4
+ sync
+4:
+ lwz r4,CS_HID1(r5)
+ rlwinm r5,r4,0,16,14
+ mtspr SPRN_HID1,r5
+ /* Wait for PLL to stabilize */
+ mftbl r5
+3: mftbl r6
+ sub r6,r6,r5
+ cmplwi cr0,r6,10000
+ ble 3b
+ /* Setup final PLL */
+ mtspr SPRN_HID1,r4
+1:
+ mtcr r7
+ blr
+
diff --git a/arch/powerpc/kernel/cpu_setup_a2.S b/arch/powerpc/kernel/cpu_setup_a2.S
new file mode 100644
index 00000000..ebc62f42
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_a2.S
@@ -0,0 +1,120 @@
+/*
+ * A2 specific assembly support code
+ *
+ * Copyright 2009 Ben Herrenschmidt, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/asm-offsets.h>
+#include <asm/ppc_asm.h>
+#include <asm/ppc-opcode.h>
+#include <asm/processor.h>
+#include <asm/reg_a2.h>
+#include <asm/reg.h>
+#include <asm/thread_info.h>
+
+/*
+ * Disable thdid and class fields in ERATs to bump PID to full 14 bits capacity.
+ * This also prevents external LPID accesses but that isn't a problem when not a
+ * guest. Under PV, this setting will be ignored and MMUCR will return the right
+ * number of PID bits we can use.
+ */
+#define MMUCR1_EXTEND_PID \
+ (MMUCR1_ICTID | MMUCR1_ITTID | MMUCR1_DCTID | \
+ MMUCR1_DTTID | MMUCR1_DCCD)
+
+/*
+ * Use extended PIDs if enabled.
+ * Don't clear the ERATs on context sync events and enable I & D LRU.
+ * Enable ERAT back invalidate when tlbwe overwrites an entry.
+ */
+#define INITIAL_MMUCR1 \
+ (MMUCR1_EXTEND_PID | MMUCR1_CSINV_NEVER | MMUCR1_IRRE | \
+ MMUCR1_DRRE | MMUCR1_TLBWE_BINV)
+
+_GLOBAL(__setup_cpu_a2)
+ /* Some of these are actually thread local and some are
+ * core local but doing it always won't hurt
+ */
+
+#ifdef CONFIG_PPC_ICSWX
+ /* Make sure ACOP starts out as zero */
+ li r3,0
+ mtspr SPRN_ACOP,r3
+
+ /* Skip the following if we are in Guest mode */
+ mfmsr r3
+ andis. r0,r3,MSR_GS@h
+ bne _icswx_skip_guest
+
+ /* Enable icswx instruction */
+ mfspr r3,SPRN_A2_CCR2
+ ori r3,r3,A2_CCR2_ENABLE_ICSWX
+ mtspr SPRN_A2_CCR2,r3
+
+ /* Unmask all CTs in HACOP */
+ li r3,-1
+ mtspr SPRN_HACOP,r3
+_icswx_skip_guest:
+#endif /* CONFIG_PPC_ICSWX */
+
+ /* Enable doorbell */
+ mfspr r3,SPRN_A2_CCR2
+ oris r3,r3,A2_CCR2_ENABLE_PC@h
+ mtspr SPRN_A2_CCR2,r3
+ isync
+
+ /* Setup CCR0 to disable power saving for now as it's busted
+ * in the current implementations. Setup CCR1 to wake on
+ * interrupts normally (we write the default value but who
+ * knows what FW may have clobbered...)
+ */
+ li r3,0
+ mtspr SPRN_A2_CCR0, r3
+ LOAD_REG_IMMEDIATE(r3,0x0f0f0f0f)
+ mtspr SPRN_A2_CCR1, r3
+
+ /* Initialise MMUCR1 */
+ lis r3,INITIAL_MMUCR1@h
+ ori r3,r3,INITIAL_MMUCR1@l
+ mtspr SPRN_MMUCR1,r3
+
+ /* Set MMUCR2 to enable 4K, 64K, 1M, 16M and 1G pages */
+ LOAD_REG_IMMEDIATE(r3, 0x000a7531)
+ mtspr SPRN_MMUCR2,r3
+
+ /* Set MMUCR3 to write all thids bit to the TLB */
+ LOAD_REG_IMMEDIATE(r3, 0x0000000f)
+ mtspr SPRN_MMUCR3,r3
+
+ /* Don't do ERAT stuff if running guest mode */
+ mfmsr r3
+ andis. r0,r3,MSR_GS@h
+ bne 1f
+
+ /* Now set the I-ERAT watermark to 15 */
+ lis r4,(MMUCR0_TLBSEL_I|MMUCR0_ECL)@h
+ mtspr SPRN_MMUCR0, r4
+ li r4,A2_IERAT_SIZE-1
+ PPC_ERATWE(r4,r4,3)
+
+ /* Now set the D-ERAT watermark to 31 */
+ lis r4,(MMUCR0_TLBSEL_D|MMUCR0_ECL)@h
+ mtspr SPRN_MMUCR0, r4
+ li r4,A2_DERAT_SIZE-1
+ PPC_ERATWE(r4,r4,3)
+
+ /* And invalidate the beast just in case. That won't get rid of
+ * a bolted entry though it will be in LRU and so will go away eventually
+ * but let's not bother for now
+ */
+ PPC_ERATILX(0,0,0)
+1:
+ blr
+
+_GLOBAL(__restore_cpu_a2)
+ b __setup_cpu_a2
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
new file mode 100644
index 00000000..8053db02
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -0,0 +1,98 @@
+/*
+ * This file contains low level CPU setup functions.
+ * Kumar Gala <galak@kernel.crashing.org>
+ * Copyright 2009 Freescale Semiconductor, Inc.
+ *
+ * Based on cpu_setup_6xx code by
+ * Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+
+_GLOBAL(__e500_icache_setup)
+ mfspr r0, SPRN_L1CSR1
+ andi. r3, r0, L1CSR1_ICE
+ bnelr /* Already enabled */
+ oris r0, r0, L1CSR1_CPE@h
+ ori r0, r0, (L1CSR1_ICFI | L1CSR1_ICLFR | L1CSR1_ICE)
+ mtspr SPRN_L1CSR1, r0 /* Enable I-Cache */
+ isync
+ blr
+
+_GLOBAL(__e500_dcache_setup)
+ mfspr r0, SPRN_L1CSR0
+ andi. r3, r0, L1CSR0_DCE
+ bnelr /* Already enabled */
+ msync
+ isync
+ li r0, 0
+ mtspr SPRN_L1CSR0, r0 /* Disable */
+ msync
+ isync
+ li r0, (L1CSR0_DCFI | L1CSR0_CLFC)
+ mtspr SPRN_L1CSR0, r0 /* Invalidate */
+ isync
+1: mfspr r0, SPRN_L1CSR0
+ andi. r3, r0, L1CSR0_CLFC
+ bne+ 1b /* Wait for lock bits reset */
+ oris r0, r0, L1CSR0_CPE@h
+ ori r0, r0, L1CSR0_DCE
+ msync
+ isync
+ mtspr SPRN_L1CSR0, r0 /* Enable */
+ isync
+ blr
+
+#ifdef CONFIG_PPC32
+_GLOBAL(__setup_cpu_e200)
+ /* enable dedicated debug exception handling resources (Debug APU) */
+ mfspr r3,SPRN_HID0
+ ori r3,r3,HID0_DAPUEN@l
+ mtspr SPRN_HID0,r3
+ b __setup_e200_ivors
+_GLOBAL(__setup_cpu_e500v1)
+_GLOBAL(__setup_cpu_e500v2)
+ mflr r4
+ bl __e500_icache_setup
+ bl __e500_dcache_setup
+ bl __setup_e500_ivors
+#ifdef CONFIG_FSL_RIO
+ /* Ensure that RFXE is set */
+ mfspr r3,SPRN_HID1
+ oris r3,r3,HID1_RFXE@h
+ mtspr SPRN_HID1,r3
+#endif
+ mtlr r4
+ blr
+_GLOBAL(__setup_cpu_e500mc)
+ mflr r4
+ bl __e500_icache_setup
+ bl __e500_dcache_setup
+ bl __setup_e500mc_ivors
+ mtlr r4
+ blr
+#endif
+/* Right now, restore and setup are the same thing */
+_GLOBAL(__restore_cpu_e5500)
+_GLOBAL(__setup_cpu_e5500)
+ mflr r4
+ bl __e500_icache_setup
+ bl __e500_dcache_setup
+#ifdef CONFIG_PPC_BOOK3E_64
+ bl .__setup_base_ivors
+ bl .setup_perfmon_ivor
+ bl .setup_doorbell_ivors
+ bl .setup_ehv_ivors
+#else
+ bl __setup_e500mc_ivors
+#endif
+ mtlr r4
+ blr
diff --git a/arch/powerpc/kernel/cpu_setup_pa6t.S b/arch/powerpc/kernel/cpu_setup_pa6t.S
new file mode 100644
index 00000000..d62cb9ca
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_pa6t.S
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2006-2007 PA Semi, Inc
+ *
+ * Maintained by: Olof Johansson <olof@lixom.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cache.h>
+
+/* Right now, restore and setup are the same thing */
+_GLOBAL(__restore_cpu_pa6t)
+_GLOBAL(__setup_cpu_pa6t)
+ /* Do nothing if not running in HV mode */
+ mfmsr r0
+ rldicl. r0,r0,4,63
+ beqlr
+
+ mfspr r0,SPRN_HID5
+ ori r0,r0,0x38
+ mtspr SPRN_HID5,r0
+
+ mfspr r0,SPRN_LPCR
+ ori r0,r0,0x7000
+ mtspr SPRN_LPCR,r0
+
+ blr
diff --git a/arch/powerpc/kernel/cpu_setup_power7.S b/arch/powerpc/kernel/cpu_setup_power7.S
new file mode 100644
index 00000000..76797c51
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_power7.S
@@ -0,0 +1,95 @@
+/*
+ * This file contains low level CPU setup functions.
+ * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cache.h>
+
+/* Entry: r3 = crap, r4 = ptr to cputable entry
+ *
+ * Note that we can be called twice for pseudo-PVRs
+ */
+_GLOBAL(__setup_cpu_power7)
+ mflr r11
+ bl __init_hvmode_206
+ mtlr r11
+ beqlr
+ li r0,0
+ mtspr SPRN_LPID,r0
+ bl __init_LPCR
+ bl __init_TLB
+ mtlr r11
+ blr
+
+_GLOBAL(__restore_cpu_power7)
+ mflr r11
+ mfmsr r3
+ rldicl. r0,r3,4,63
+ beqlr
+ li r0,0
+ mtspr SPRN_LPID,r0
+ bl __init_LPCR
+ bl __init_TLB
+ mtlr r11
+ blr
+
+__init_hvmode_206:
+ /* Disable CPU_FTR_HVMODE and exit if MSR:HV is not set */
+ mfmsr r3
+ rldicl. r0,r3,4,63
+ bnelr
+ ld r5,CPU_SPEC_FEATURES(r4)
+ LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE)
+ xor r5,r5,r6
+ std r5,CPU_SPEC_FEATURES(r4)
+ blr
+
+__init_LPCR:
+ /* Setup a sane LPCR:
+ *
+ * LPES = 0b01 (HSRR0/1 used for 0x500)
+ * PECE = 0b111
+ * DPFD = 4
+ * HDICE = 0
+ * VC = 0b100 (VPM0=1, VPM1=0, ISL=0)
+ * VRMASD = 0b10000 (L=1, LP=00)
+ *
+ * Other bits untouched for now
+ */
+ mfspr r3,SPRN_LPCR
+ li r5,1
+ rldimi r3,r5, LPCR_LPES_SH, 64-LPCR_LPES_SH-2
+ ori r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2)
+ li r5,4
+ rldimi r3,r5, LPCR_DPFD_SH, 64-LPCR_DPFD_SH-3
+ clrrdi r3,r3,1 /* clear HDICE */
+ li r5,4
+ rldimi r3,r5, LPCR_VC_SH, 0
+ li r5,0x10
+ rldimi r3,r5, LPCR_VRMASD_SH, 64-LPCR_VRMASD_SH-5
+ mtspr SPRN_LPCR,r3
+ isync
+ blr
+
+__init_TLB:
+ /* Clear the TLB */
+ li r6,128
+ mtctr r6
+ li r7,0xc00 /* IS field = 0b11 */
+ ptesync
+2: tlbiel r7
+ addi r7,r7,0x1000
+ bdnz 2b
+ ptesync
+1: blr
diff --git a/arch/powerpc/kernel/cpu_setup_ppc970.S b/arch/powerpc/kernel/cpu_setup_ppc970.S
new file mode 100644
index 00000000..12fac8df
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_ppc970.S
@@ -0,0 +1,210 @@
+/*
+ * This file contains low level CPU setup functions.
+ * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cache.h>
+
+_GLOBAL(__cpu_preinit_ppc970)
+ /* Do nothing if not running in HV mode */
+ mfmsr r0
+ rldicl. r0,r0,4,63
+ beqlr
+
+ /* Make sure HID4:rm_ci is off before MMU is turned off, that large
+ * pages are enabled with HID4:61 and clear HID5:DCBZ_size and
+ * HID5:DCBZ32_ill
+ */
+ li r0,0
+ mfspr r3,SPRN_HID4
+ rldimi r3,r0,40,23 /* clear bit 23 (rm_ci) */
+ rldimi r3,r0,2,61 /* clear bit 61 (lg_pg_en) */
+ sync
+ mtspr SPRN_HID4,r3
+ isync
+ sync
+ mfspr r3,SPRN_HID5
+ rldimi r3,r0,6,56 /* clear bits 56 & 57 (DCBZ*) */
+ sync
+ mtspr SPRN_HID5,r3
+ isync
+ sync
+
+ /* Setup some basic HID1 features */
+ mfspr r0,SPRN_HID1
+ li r3,0x1200 /* enable i-fetch cacheability */
+ sldi r3,r3,44 /* and prefetch */
+ or r0,r0,r3
+ mtspr SPRN_HID1,r0
+ mtspr SPRN_HID1,r0
+ isync
+
+ /* Clear HIOR */
+ li r0,0
+ sync
+ mtspr SPRN_HIOR,0 /* Clear interrupt prefix */
+ isync
+ blr
+
+/* Definitions for the table use to save CPU states */
+#define CS_HID0 0
+#define CS_HID1 8
+#define CS_HID4 16
+#define CS_HID5 24
+#define CS_SIZE 32
+
+ .data
+ .balign L1_CACHE_BYTES,0
+cpu_state_storage:
+ .space CS_SIZE
+ .balign L1_CACHE_BYTES,0
+ .text
+
+
+_GLOBAL(__setup_cpu_ppc970)
+ /* Do nothing if not running in HV mode */
+ mfmsr r0
+ rldicl. r0,r0,4,63
+ beq no_hv_mode
+
+ mfspr r0,SPRN_HID0
+ li r11,5 /* clear DOZE and SLEEP */
+ rldimi r0,r11,52,8 /* set NAP and DPM */
+ li r11,0
+ rldimi r0,r11,32,31 /* clear EN_ATTN */
+ b load_hids /* Jump to shared code */
+
+
+_GLOBAL(__setup_cpu_ppc970MP)
+ /* Do nothing if not running in HV mode */
+ mfmsr r0
+ rldicl. r0,r0,4,63
+ beq no_hv_mode
+
+ mfspr r0,SPRN_HID0
+ li r11,0x15 /* clear DOZE and SLEEP */
+ rldimi r0,r11,52,6 /* set DEEPNAP, NAP and DPM */
+ li r11,0
+ rldimi r0,r11,32,31 /* clear EN_ATTN */
+
+load_hids:
+ mtspr SPRN_HID0,r0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ sync
+ isync
+
+ /* Try to set LPES = 01 in HID4 */
+ mfspr r0,SPRN_HID4
+ clrldi r0,r0,1 /* clear LPES0 */
+ ori r0,r0,HID4_LPES1 /* set LPES1 */
+ sync
+ mtspr SPRN_HID4,r0
+ isync
+
+ /* Save away cpu state */
+ LOAD_REG_ADDR(r5,cpu_state_storage)
+
+ /* Save HID0,1,4 and 5 */
+ mfspr r3,SPRN_HID0
+ std r3,CS_HID0(r5)
+ mfspr r3,SPRN_HID1
+ std r3,CS_HID1(r5)
+ mfspr r4,SPRN_HID4
+ std r4,CS_HID4(r5)
+ mfspr r3,SPRN_HID5
+ std r3,CS_HID5(r5)
+
+ /* See if we successfully set LPES1 to 1; if not we are in Apple mode */
+ andi. r4,r4,HID4_LPES1
+ bnelr
+
+no_hv_mode:
+ /* Disable CPU_FTR_HVMODE and exit, since we don't have HV mode */
+ ld r5,CPU_SPEC_FEATURES(r4)
+ LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE)
+ andc r5,r5,r6
+ std r5,CPU_SPEC_FEATURES(r4)
+ blr
+
+/* Called with no MMU context (typically MSR:IR/DR off) to
+ * restore CPU state as backed up by the previous
+ * function. This does not include cache setting
+ */
+_GLOBAL(__restore_cpu_ppc970)
+ /* Do nothing if not running in HV mode */
+ mfmsr r0
+ rldicl. r0,r0,4,63
+ beqlr
+
+ LOAD_REG_ADDR(r5,cpu_state_storage)
+ /* Before accessing memory, we make sure rm_ci is clear */
+ li r0,0
+ mfspr r3,SPRN_HID4
+ rldimi r3,r0,40,23 /* clear bit 23 (rm_ci) */
+ sync
+ mtspr SPRN_HID4,r3
+ isync
+ sync
+
+ /* Clear interrupt prefix */
+ li r0,0
+ sync
+ mtspr SPRN_HIOR,0
+ isync
+
+ /* Restore HID0 */
+ ld r3,CS_HID0(r5)
+ sync
+ isync
+ mtspr SPRN_HID0,r3
+ mfspr r3,SPRN_HID0
+ mfspr r3,SPRN_HID0
+ mfspr r3,SPRN_HID0
+ mfspr r3,SPRN_HID0
+ mfspr r3,SPRN_HID0
+ mfspr r3,SPRN_HID0
+ sync
+ isync
+
+ /* Restore HID1 */
+ ld r3,CS_HID1(r5)
+ sync
+ isync
+ mtspr SPRN_HID1,r3
+ mtspr SPRN_HID1,r3
+ sync
+ isync
+
+ /* Restore HID4 */
+ ld r3,CS_HID4(r5)
+ sync
+ isync
+ mtspr SPRN_HID4,r3
+ sync
+ isync
+
+ /* Restore HID5 */
+ ld r3,CS_HID5(r5)
+ sync
+ isync
+ mtspr SPRN_HID5,r3
+ sync
+ isync
+ blr
+
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
new file mode 100644
index 00000000..455faa38
--- /dev/null
+++ b/arch/powerpc/kernel/cputable.c
@@ -0,0 +1,2183 @@
+/*
+ * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * Modifications for ppc64:
+ * Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/threads.h>
+#include <linux/init.h>
+#include <linux/export.h>
+
+#include <asm/oprofile_impl.h>
+#include <asm/cputable.h>
+#include <asm/prom.h> /* for PTRRELOC on ARCH=ppc */
+#include <asm/mmu.h>
+#include <asm/setup.h>
+
+struct cpu_spec* cur_cpu_spec = NULL;
+EXPORT_SYMBOL(cur_cpu_spec);
+
+/* The platform string corresponding to the real PVR */
+const char *powerpc_base_platform;
+
+/* NOTE:
+ * Unlike ppc32, ppc64 will only call this once for the boot CPU, it's
+ * the responsibility of the appropriate CPU save/restore functions to
+ * eventually copy these settings over. Those save/restore aren't yet
+ * part of the cputable though. That has to be fixed for both ppc32
+ * and ppc64
+ */
+#ifdef CONFIG_PPC32
+extern void __setup_cpu_e200(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_e500v1(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_e500v2(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_e500mc(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_440ep(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_440epx(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_440gx(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_440grx(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_440spe(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_440x5(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_460ex(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_460gt(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_460sx(unsigned long offset, struct cpu_spec *spec);
+extern void __setup_cpu_apm821xx(unsigned long offset, struct cpu_spec *spec);
+extern void __setup_cpu_603(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_604(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_750(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_750cx(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_750fx(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_7400(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_7410(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_745x(unsigned long offset, struct cpu_spec* spec);
+#endif /* CONFIG_PPC32 */
+#ifdef CONFIG_PPC64
+extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_ppc970MP(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_pa6t(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_a2(unsigned long offset, struct cpu_spec* spec);
+extern void __restore_cpu_pa6t(void);
+extern void __restore_cpu_ppc970(void);
+extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec);
+extern void __restore_cpu_power7(void);
+extern void __restore_cpu_a2(void);
+#endif /* CONFIG_PPC64 */
+#if defined(CONFIG_E500)
+extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec);
+extern void __restore_cpu_e5500(void);
+#endif /* CONFIG_E500 */
+
+/* This table only contains "desktop" CPUs, it need to be filled with embedded
+ * ones as well...
+ */
+#define COMMON_USER (PPC_FEATURE_32 | PPC_FEATURE_HAS_FPU | \
+ PPC_FEATURE_HAS_MMU)
+#define COMMON_USER_PPC64 (COMMON_USER | PPC_FEATURE_64)
+#define COMMON_USER_POWER4 (COMMON_USER_PPC64 | PPC_FEATURE_POWER4)
+#define COMMON_USER_POWER5 (COMMON_USER_PPC64 | PPC_FEATURE_POWER5 |\
+ PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP)
+#define COMMON_USER_POWER5_PLUS (COMMON_USER_PPC64 | PPC_FEATURE_POWER5_PLUS|\
+ PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP)
+#define COMMON_USER_POWER6 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_05 |\
+ PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
+ PPC_FEATURE_TRUE_LE | \
+ PPC_FEATURE_PSERIES_PERFMON_COMPAT)
+#define COMMON_USER_POWER7 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\
+ PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
+ PPC_FEATURE_TRUE_LE | \
+ PPC_FEATURE_PSERIES_PERFMON_COMPAT)
+#define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\
+ PPC_FEATURE_TRUE_LE | \
+ PPC_FEATURE_HAS_ALTIVEC_COMP)
+#ifdef CONFIG_PPC_BOOK3E_64
+#define COMMON_USER_BOOKE (COMMON_USER_PPC64 | PPC_FEATURE_BOOKE)
+#else
+#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
+ PPC_FEATURE_BOOKE)
+#endif
+
+static struct cpu_spec __initdata cpu_specs[] = {
+#ifdef CONFIG_PPC_BOOK3S_64
+ { /* Power3 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00400000,
+ .cpu_name = "POWER3 (630)",
+ .cpu_features = CPU_FTRS_POWER3,
+ .cpu_user_features = COMMON_USER_PPC64|PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/power3",
+ .oprofile_type = PPC_OPROFILE_RS64,
+ .platform = "power3",
+ },
+ { /* Power3+ */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00410000,
+ .cpu_name = "POWER3 (630+)",
+ .cpu_features = CPU_FTRS_POWER3,
+ .cpu_user_features = COMMON_USER_PPC64|PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/power3",
+ .oprofile_type = PPC_OPROFILE_RS64,
+ .platform = "power3",
+ },
+ { /* Northstar */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00330000,
+ .cpu_name = "RS64-II (northstar)",
+ .cpu_features = CPU_FTRS_RS64,
+ .cpu_user_features = COMMON_USER_PPC64,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/rs64",
+ .oprofile_type = PPC_OPROFILE_RS64,
+ .platform = "rs64",
+ },
+ { /* Pulsar */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00340000,
+ .cpu_name = "RS64-III (pulsar)",
+ .cpu_features = CPU_FTRS_RS64,
+ .cpu_user_features = COMMON_USER_PPC64,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/rs64",
+ .oprofile_type = PPC_OPROFILE_RS64,
+ .platform = "rs64",
+ },
+ { /* I-star */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00360000,
+ .cpu_name = "RS64-III (icestar)",
+ .cpu_features = CPU_FTRS_RS64,
+ .cpu_user_features = COMMON_USER_PPC64,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/rs64",
+ .oprofile_type = PPC_OPROFILE_RS64,
+ .platform = "rs64",
+ },
+ { /* S-star */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00370000,
+ .cpu_name = "RS64-IV (sstar)",
+ .cpu_features = CPU_FTRS_RS64,
+ .cpu_user_features = COMMON_USER_PPC64,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/rs64",
+ .oprofile_type = PPC_OPROFILE_RS64,
+ .platform = "rs64",
+ },
+ { /* Power4 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00350000,
+ .cpu_name = "POWER4 (gp)",
+ .cpu_features = CPU_FTRS_POWER4,
+ .cpu_user_features = COMMON_USER_POWER4,
+ .mmu_features = MMU_FTRS_POWER4,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/power4",
+ .oprofile_type = PPC_OPROFILE_POWER4,
+ .platform = "power4",
+ },
+ { /* Power4+ */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00380000,
+ .cpu_name = "POWER4+ (gq)",
+ .cpu_features = CPU_FTRS_POWER4,
+ .cpu_user_features = COMMON_USER_POWER4,
+ .mmu_features = MMU_FTRS_POWER4,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/power4",
+ .oprofile_type = PPC_OPROFILE_POWER4,
+ .platform = "power4",
+ },
+ { /* PPC970 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00390000,
+ .cpu_name = "PPC970",
+ .cpu_features = CPU_FTRS_PPC970,
+ .cpu_user_features = COMMON_USER_POWER4 |
+ PPC_FEATURE_HAS_ALTIVEC_COMP,
+ .mmu_features = MMU_FTRS_PPC970,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_ppc970,
+ .cpu_restore = __restore_cpu_ppc970,
+ .oprofile_cpu_type = "ppc64/970",
+ .oprofile_type = PPC_OPROFILE_POWER4,
+ .platform = "ppc970",
+ },
+ { /* PPC970FX */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x003c0000,
+ .cpu_name = "PPC970FX",
+ .cpu_features = CPU_FTRS_PPC970,
+ .cpu_user_features = COMMON_USER_POWER4 |
+ PPC_FEATURE_HAS_ALTIVEC_COMP,
+ .mmu_features = MMU_FTRS_PPC970,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_ppc970,
+ .cpu_restore = __restore_cpu_ppc970,
+ .oprofile_cpu_type = "ppc64/970",
+ .oprofile_type = PPC_OPROFILE_POWER4,
+ .platform = "ppc970",
+ },
+ { /* PPC970MP DD1.0 - no DEEPNAP, use regular 970 init */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x00440100,
+ .cpu_name = "PPC970MP",
+ .cpu_features = CPU_FTRS_PPC970,
+ .cpu_user_features = COMMON_USER_POWER4 |
+ PPC_FEATURE_HAS_ALTIVEC_COMP,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_ppc970,
+ .cpu_restore = __restore_cpu_ppc970,
+ .oprofile_cpu_type = "ppc64/970MP",
+ .oprofile_type = PPC_OPROFILE_POWER4,
+ .platform = "ppc970",
+ },
+ { /* PPC970MP */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00440000,
+ .cpu_name = "PPC970MP",
+ .cpu_features = CPU_FTRS_PPC970,
+ .cpu_user_features = COMMON_USER_POWER4 |
+ PPC_FEATURE_HAS_ALTIVEC_COMP,
+ .mmu_features = MMU_FTRS_PPC970,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_ppc970MP,
+ .cpu_restore = __restore_cpu_ppc970,
+ .oprofile_cpu_type = "ppc64/970MP",
+ .oprofile_type = PPC_OPROFILE_POWER4,
+ .platform = "ppc970",
+ },
+ { /* PPC970GX */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00450000,
+ .cpu_name = "PPC970GX",
+ .cpu_features = CPU_FTRS_PPC970,
+ .cpu_user_features = COMMON_USER_POWER4 |
+ PPC_FEATURE_HAS_ALTIVEC_COMP,
+ .mmu_features = MMU_FTRS_PPC970,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_ppc970,
+ .oprofile_cpu_type = "ppc64/970",
+ .oprofile_type = PPC_OPROFILE_POWER4,
+ .platform = "ppc970",
+ },
+ { /* Power5 GR */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x003a0000,
+ .cpu_name = "POWER5 (gr)",
+ .cpu_features = CPU_FTRS_POWER5,
+ .cpu_user_features = COMMON_USER_POWER5,
+ .mmu_features = MMU_FTRS_POWER5,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/power5",
+ .oprofile_type = PPC_OPROFILE_POWER4,
+ /* SIHV / SIPR bits are implemented on POWER4+ (GQ)
+ * and above but only works on POWER5 and above
+ */
+ .oprofile_mmcra_sihv = MMCRA_SIHV,
+ .oprofile_mmcra_sipr = MMCRA_SIPR,
+ .platform = "power5",
+ },
+ { /* Power5++ */
+ .pvr_mask = 0xffffff00,
+ .pvr_value = 0x003b0300,
+ .cpu_name = "POWER5+ (gs)",
+ .cpu_features = CPU_FTRS_POWER5,
+ .cpu_user_features = COMMON_USER_POWER5_PLUS,
+ .mmu_features = MMU_FTRS_POWER5,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .oprofile_cpu_type = "ppc64/power5++",
+ .oprofile_type = PPC_OPROFILE_POWER4,
+ .oprofile_mmcra_sihv = MMCRA_SIHV,
+ .oprofile_mmcra_sipr = MMCRA_SIPR,
+ .platform = "power5+",
+ },
+ { /* Power5 GS */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x003b0000,
+ .cpu_name = "POWER5+ (gs)",
+ .cpu_features = CPU_FTRS_POWER5,
+ .cpu_user_features = COMMON_USER_POWER5_PLUS,
+ .mmu_features = MMU_FTRS_POWER5,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/power5+",
+ .oprofile_type = PPC_OPROFILE_POWER4,
+ .oprofile_mmcra_sihv = MMCRA_SIHV,
+ .oprofile_mmcra_sipr = MMCRA_SIPR,
+ .platform = "power5+",
+ },
+ { /* POWER6 in P5+ mode; 2.04-compliant processor */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x0f000001,
+ .cpu_name = "POWER5+",
+ .cpu_features = CPU_FTRS_POWER5,
+ .cpu_user_features = COMMON_USER_POWER5_PLUS,
+ .mmu_features = MMU_FTRS_POWER5,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .oprofile_cpu_type = "ppc64/ibm-compat-v1",
+ .oprofile_type = PPC_OPROFILE_POWER4,
+ .platform = "power5+",
+ },
+ { /* Power6 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x003e0000,
+ .cpu_name = "POWER6 (raw)",
+ .cpu_features = CPU_FTRS_POWER6,
+ .cpu_user_features = COMMON_USER_POWER6 |
+ PPC_FEATURE_POWER6_EXT,
+ .mmu_features = MMU_FTRS_POWER6,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/power6",
+ .oprofile_type = PPC_OPROFILE_POWER4,
+ .oprofile_mmcra_sihv = POWER6_MMCRA_SIHV,
+ .oprofile_mmcra_sipr = POWER6_MMCRA_SIPR,
+ .oprofile_mmcra_clear = POWER6_MMCRA_THRM |
+ POWER6_MMCRA_OTHER,
+ .platform = "power6x",
+ },
+ { /* 2.05-compliant processor, i.e. Power6 "architected" mode */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x0f000002,
+ .cpu_name = "POWER6 (architected)",
+ .cpu_features = CPU_FTRS_POWER6,
+ .cpu_user_features = COMMON_USER_POWER6,
+ .mmu_features = MMU_FTRS_POWER6,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .oprofile_cpu_type = "ppc64/ibm-compat-v1",
+ .oprofile_type = PPC_OPROFILE_POWER4,
+ .platform = "power6",
+ },
+ { /* 2.06-compliant processor, i.e. Power7 "architected" mode */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x0f000003,
+ .cpu_name = "POWER7 (architected)",
+ .cpu_features = CPU_FTRS_POWER7,
+ .cpu_user_features = COMMON_USER_POWER7,
+ .mmu_features = MMU_FTRS_POWER7,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .oprofile_type = PPC_OPROFILE_POWER4,
+ .oprofile_cpu_type = "ppc64/ibm-compat-v1",
+ .cpu_setup = __setup_cpu_power7,
+ .cpu_restore = __restore_cpu_power7,
+ .platform = "power7",
+ },
+ { /* Power7 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x003f0000,
+ .cpu_name = "POWER7 (raw)",
+ .cpu_features = CPU_FTRS_POWER7,
+ .cpu_user_features = COMMON_USER_POWER7,
+ .mmu_features = MMU_FTRS_POWER7,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/power7",
+ .oprofile_type = PPC_OPROFILE_POWER4,
+ .cpu_setup = __setup_cpu_power7,
+ .cpu_restore = __restore_cpu_power7,
+ .platform = "power7",
+ },
+ { /* Power7+ */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x004A0000,
+ .cpu_name = "POWER7+ (raw)",
+ .cpu_features = CPU_FTRS_POWER7,
+ .cpu_user_features = COMMON_USER_POWER7,
+ .mmu_features = MMU_FTRS_POWER7,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/power7",
+ .oprofile_type = PPC_OPROFILE_POWER4,
+ .cpu_setup = __setup_cpu_power7,
+ .cpu_restore = __restore_cpu_power7,
+ .platform = "power7+",
+ },
+ { /* Cell Broadband Engine */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00700000,
+ .cpu_name = "Cell Broadband Engine",
+ .cpu_features = CPU_FTRS_CELL,
+ .cpu_user_features = COMMON_USER_PPC64 |
+ PPC_FEATURE_CELL | PPC_FEATURE_HAS_ALTIVEC_COMP |
+ PPC_FEATURE_SMT,
+ .mmu_features = MMU_FTRS_CELL,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/cell-be",
+ .oprofile_type = PPC_OPROFILE_CELL,
+ .platform = "ppc-cell-be",
+ },
+ { /* PA Semi PA6T */
+ .pvr_mask = 0x7fff0000,
+ .pvr_value = 0x00900000,
+ .cpu_name = "PA6T",
+ .cpu_features = CPU_FTRS_PA6T,
+ .cpu_user_features = COMMON_USER_PA6T,
+ .mmu_features = MMU_FTRS_PA6T,
+ .icache_bsize = 64,
+ .dcache_bsize = 64,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_PA6T,
+ .cpu_setup = __setup_cpu_pa6t,
+ .cpu_restore = __restore_cpu_pa6t,
+ .oprofile_cpu_type = "ppc64/pa6t",
+ .oprofile_type = PPC_OPROFILE_PA6T,
+ .platform = "pa6t",
+ },
+ { /* default match */
+ .pvr_mask = 0x00000000,
+ .pvr_value = 0x00000000,
+ .cpu_name = "POWER4 (compatible)",
+ .cpu_features = CPU_FTRS_COMPATIBLE,
+ .cpu_user_features = COMMON_USER_PPC64,
+ .mmu_features = MMU_FTRS_DEFAULT_HPTE_ARCH_V2,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .platform = "power4",
+ }
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+#ifdef CONFIG_PPC32
+#if CLASSIC_PPC
+ { /* 601 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00010000,
+ .cpu_name = "601",
+ .cpu_features = CPU_FTRS_PPC601,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_601_INSTR |
+ PPC_FEATURE_UNIFIED_CACHE | PPC_FEATURE_NO_TB,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_generic,
+ .platform = "ppc601",
+ },
+ { /* 603 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00030000,
+ .cpu_name = "603",
+ .cpu_features = CPU_FTRS_603,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = 0,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_generic,
+ .platform = "ppc603",
+ },
+ { /* 603e */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00060000,
+ .cpu_name = "603e",
+ .cpu_features = CPU_FTRS_603,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = 0,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_generic,
+ .platform = "ppc603",
+ },
+ { /* 603ev */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00070000,
+ .cpu_name = "603ev",
+ .cpu_features = CPU_FTRS_603,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = 0,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_generic,
+ .platform = "ppc603",
+ },
+ { /* 604 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00040000,
+ .cpu_name = "604",
+ .cpu_features = CPU_FTRS_604,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 2,
+ .cpu_setup = __setup_cpu_604,
+ .machine_check = machine_check_generic,
+ .platform = "ppc604",
+ },
+ { /* 604e */
+ .pvr_mask = 0xfffff000,
+ .pvr_value = 0x00090000,
+ .cpu_name = "604e",
+ .cpu_features = CPU_FTRS_604,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .cpu_setup = __setup_cpu_604,
+ .machine_check = machine_check_generic,
+ .platform = "ppc604",
+ },
+ { /* 604r */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00090000,
+ .cpu_name = "604r",
+ .cpu_features = CPU_FTRS_604,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .cpu_setup = __setup_cpu_604,
+ .machine_check = machine_check_generic,
+ .platform = "ppc604",
+ },
+ { /* 604ev */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x000a0000,
+ .cpu_name = "604ev",
+ .cpu_features = CPU_FTRS_604,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .cpu_setup = __setup_cpu_604,
+ .machine_check = machine_check_generic,
+ .platform = "ppc604",
+ },
+ { /* 740/750 (0x4202, don't support TAU ?) */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x00084202,
+ .cpu_name = "740/750",
+ .cpu_features = CPU_FTRS_740_NOTAU,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .cpu_setup = __setup_cpu_750,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ },
+ { /* 750CX (80100 and 8010x?) */
+ .pvr_mask = 0xfffffff0,
+ .pvr_value = 0x00080100,
+ .cpu_name = "750CX",
+ .cpu_features = CPU_FTRS_750,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .cpu_setup = __setup_cpu_750cx,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ },
+ { /* 750CX (82201 and 82202) */
+ .pvr_mask = 0xfffffff0,
+ .pvr_value = 0x00082200,
+ .cpu_name = "750CX",
+ .cpu_features = CPU_FTRS_750,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_750cx,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ },
+ { /* 750CXe (82214) */
+ .pvr_mask = 0xfffffff0,
+ .pvr_value = 0x00082210,
+ .cpu_name = "750CXe",
+ .cpu_features = CPU_FTRS_750,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_750cx,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ },
+ { /* 750CXe "Gekko" (83214) */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x00083214,
+ .cpu_name = "750CXe",
+ .cpu_features = CPU_FTRS_750,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_750cx,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ },
+ { /* 750CL (and "Broadway") */
+ .pvr_mask = 0xfffff0e0,
+ .pvr_value = 0x00087000,
+ .cpu_name = "750CL",
+ .cpu_features = CPU_FTRS_750CL,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_750,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ .oprofile_cpu_type = "ppc/750",
+ .oprofile_type = PPC_OPROFILE_G4,
+ },
+ { /* 745/755 */
+ .pvr_mask = 0xfffff000,
+ .pvr_value = 0x00083000,
+ .cpu_name = "745/755",
+ .cpu_features = CPU_FTRS_750,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_750,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ },
+ { /* 750FX rev 1.x */
+ .pvr_mask = 0xffffff00,
+ .pvr_value = 0x70000100,
+ .cpu_name = "750FX",
+ .cpu_features = CPU_FTRS_750FX1,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_750,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ .oprofile_cpu_type = "ppc/750",
+ .oprofile_type = PPC_OPROFILE_G4,
+ },
+ { /* 750FX rev 2.0 must disable HID0[DPM] */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x70000200,
+ .cpu_name = "750FX",
+ .cpu_features = CPU_FTRS_750FX2,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_750,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ .oprofile_cpu_type = "ppc/750",
+ .oprofile_type = PPC_OPROFILE_G4,
+ },
+ { /* 750FX (All revs except 2.0) */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x70000000,
+ .cpu_name = "750FX",
+ .cpu_features = CPU_FTRS_750FX,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_750fx,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ .oprofile_cpu_type = "ppc/750",
+ .oprofile_type = PPC_OPROFILE_G4,
+ },
+ { /* 750GX */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x70020000,
+ .cpu_name = "750GX",
+ .cpu_features = CPU_FTRS_750GX,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_750fx,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ .oprofile_cpu_type = "ppc/750",
+ .oprofile_type = PPC_OPROFILE_G4,
+ },
+ { /* 740/750 (L2CR bit need fixup for 740) */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00080000,
+ .cpu_name = "740/750",
+ .cpu_features = CPU_FTRS_740,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_750,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ },
+ { /* 7400 rev 1.1 ? (no TAU) */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x000c1101,
+ .cpu_name = "7400 (1.1)",
+ .cpu_features = CPU_FTRS_7400_NOTAU,
+ .cpu_user_features = COMMON_USER |
+ PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_7400,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7400",
+ },
+ { /* 7400 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x000c0000,
+ .cpu_name = "7400",
+ .cpu_features = CPU_FTRS_7400,
+ .cpu_user_features = COMMON_USER |
+ PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_7400,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7400",
+ },
+ { /* 7410 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x800c0000,
+ .cpu_name = "7410",
+ .cpu_features = CPU_FTRS_7400,
+ .cpu_user_features = COMMON_USER |
+ PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_7410,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7400",
+ },
+ { /* 7450 2.0 - no doze/nap */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x80000200,
+ .cpu_name = "7450",
+ .cpu_features = CPU_FTRS_7450_20,
+ .cpu_user_features = COMMON_USER |
+ PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .oprofile_cpu_type = "ppc/7450",
+ .oprofile_type = PPC_OPROFILE_G4,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* 7450 2.1 */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x80000201,
+ .cpu_name = "7450",
+ .cpu_features = CPU_FTRS_7450_21,
+ .cpu_user_features = COMMON_USER |
+ PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .oprofile_cpu_type = "ppc/7450",
+ .oprofile_type = PPC_OPROFILE_G4,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* 7450 2.3 and newer */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80000000,
+ .cpu_name = "7450",
+ .cpu_features = CPU_FTRS_7450_23,
+ .cpu_user_features = COMMON_USER |
+ PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .oprofile_cpu_type = "ppc/7450",
+ .oprofile_type = PPC_OPROFILE_G4,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* 7455 rev 1.x */
+ .pvr_mask = 0xffffff00,
+ .pvr_value = 0x80010100,
+ .cpu_name = "7455",
+ .cpu_features = CPU_FTRS_7455_1,
+ .cpu_user_features = COMMON_USER |
+ PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .oprofile_cpu_type = "ppc/7450",
+ .oprofile_type = PPC_OPROFILE_G4,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* 7455 rev 2.0 */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x80010200,
+ .cpu_name = "7455",
+ .cpu_features = CPU_FTRS_7455_20,
+ .cpu_user_features = COMMON_USER |
+ PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .oprofile_cpu_type = "ppc/7450",
+ .oprofile_type = PPC_OPROFILE_G4,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* 7455 others */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80010000,
+ .cpu_name = "7455",
+ .cpu_features = CPU_FTRS_7455,
+ .cpu_user_features = COMMON_USER |
+ PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .oprofile_cpu_type = "ppc/7450",
+ .oprofile_type = PPC_OPROFILE_G4,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* 7447/7457 Rev 1.0 */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x80020100,
+ .cpu_name = "7447/7457",
+ .cpu_features = CPU_FTRS_7447_10,
+ .cpu_user_features = COMMON_USER |
+ PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .oprofile_cpu_type = "ppc/7450",
+ .oprofile_type = PPC_OPROFILE_G4,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* 7447/7457 Rev 1.1 */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x80020101,
+ .cpu_name = "7447/7457",
+ .cpu_features = CPU_FTRS_7447_10,
+ .cpu_user_features = COMMON_USER |
+ PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .oprofile_cpu_type = "ppc/7450",
+ .oprofile_type = PPC_OPROFILE_G4,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* 7447/7457 Rev 1.2 and later */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80020000,
+ .cpu_name = "7447/7457",
+ .cpu_features = CPU_FTRS_7447,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .oprofile_cpu_type = "ppc/7450",
+ .oprofile_type = PPC_OPROFILE_G4,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* 7447A */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80030000,
+ .cpu_name = "7447A",
+ .cpu_features = CPU_FTRS_7447A,
+ .cpu_user_features = COMMON_USER |
+ PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .oprofile_cpu_type = "ppc/7450",
+ .oprofile_type = PPC_OPROFILE_G4,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* 7448 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80040000,
+ .cpu_name = "7448",
+ .cpu_features = CPU_FTRS_7448,
+ .cpu_user_features = COMMON_USER |
+ PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .oprofile_cpu_type = "ppc/7450",
+ .oprofile_type = PPC_OPROFILE_G4,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* 82xx (8240, 8245, 8260 are all 603e cores) */
+ .pvr_mask = 0x7fff0000,
+ .pvr_value = 0x00810000,
+ .cpu_name = "82xx",
+ .cpu_features = CPU_FTRS_82XX,
+ .cpu_user_features = COMMON_USER,
+ .mmu_features = 0,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_generic,
+ .platform = "ppc603",
+ },
+ { /* All G2_LE (603e core, plus some) have the same pvr */
+ .pvr_mask = 0x7fff0000,
+ .pvr_value = 0x00820000,
+ .cpu_name = "G2_LE",
+ .cpu_features = CPU_FTRS_G2_LE,
+ .cpu_user_features = COMMON_USER,
+ .mmu_features = MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_generic,
+ .platform = "ppc603",
+ },
+ { /* e300c1 (a 603e core, plus some) on 83xx */
+ .pvr_mask = 0x7fff0000,
+ .pvr_value = 0x00830000,
+ .cpu_name = "e300c1",
+ .cpu_features = CPU_FTRS_E300,
+ .cpu_user_features = COMMON_USER,
+ .mmu_features = MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_generic,
+ .platform = "ppc603",
+ },
+ { /* e300c2 (an e300c1 core, plus some, minus FPU) on 83xx */
+ .pvr_mask = 0x7fff0000,
+ .pvr_value = 0x00840000,
+ .cpu_name = "e300c2",
+ .cpu_features = CPU_FTRS_E300C2,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
+ .mmu_features = MMU_FTR_USE_HIGH_BATS |
+ MMU_FTR_NEED_DTLB_SW_LRU,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_generic,
+ .platform = "ppc603",
+ },
+ { /* e300c3 (e300c1, plus one IU, half cache size) on 83xx */
+ .pvr_mask = 0x7fff0000,
+ .pvr_value = 0x00850000,
+ .cpu_name = "e300c3",
+ .cpu_features = CPU_FTRS_E300,
+ .cpu_user_features = COMMON_USER,
+ .mmu_features = MMU_FTR_USE_HIGH_BATS |
+ MMU_FTR_NEED_DTLB_SW_LRU,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .num_pmcs = 4,
+ .oprofile_cpu_type = "ppc/e300",
+ .oprofile_type = PPC_OPROFILE_FSL_EMB,
+ .platform = "ppc603",
+ },
+ { /* e300c4 (e300c1, plus one IU) */
+ .pvr_mask = 0x7fff0000,
+ .pvr_value = 0x00860000,
+ .cpu_name = "e300c4",
+ .cpu_features = CPU_FTRS_E300,
+ .cpu_user_features = COMMON_USER,
+ .mmu_features = MMU_FTR_USE_HIGH_BATS |
+ MMU_FTR_NEED_DTLB_SW_LRU,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_generic,
+ .num_pmcs = 4,
+ .oprofile_cpu_type = "ppc/e300",
+ .oprofile_type = PPC_OPROFILE_FSL_EMB,
+ .platform = "ppc603",
+ },
+ { /* default match, we assume split I/D cache & TB (non-601)... */
+ .pvr_mask = 0x00000000,
+ .pvr_value = 0x00000000,
+ .cpu_name = "(generic PPC)",
+ .cpu_features = CPU_FTRS_CLASSIC32,
+ .cpu_user_features = COMMON_USER,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_generic,
+ .platform = "ppc603",
+ },
+#endif /* CLASSIC_PPC */
+#ifdef CONFIG_8xx
+ { /* 8xx */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00500000,
+ .cpu_name = "8xx",
+ /* CPU_FTR_MAYBE_CAN_DOZE is possible,
+ * if the 8xx code is there.... */
+ .cpu_features = CPU_FTRS_8XX,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
+ .mmu_features = MMU_FTR_TYPE_8xx,
+ .icache_bsize = 16,
+ .dcache_bsize = 16,
+ .platform = "ppc823",
+ },
+#endif /* CONFIG_8xx */
+#ifdef CONFIG_40x
+ { /* 403GC */
+ .pvr_mask = 0xffffff00,
+ .pvr_value = 0x00200200,
+ .cpu_name = "403GC",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 16,
+ .dcache_bsize = 16,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc403",
+ },
+ { /* 403GCX */
+ .pvr_mask = 0xffffff00,
+ .pvr_value = 0x00201400,
+ .cpu_name = "403GCX",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_NO_TB,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 16,
+ .dcache_bsize = 16,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc403",
+ },
+ { /* 403G ?? */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00200000,
+ .cpu_name = "403G ??",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 16,
+ .dcache_bsize = 16,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc403",
+ },
+ { /* 405GP */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x40110000,
+ .cpu_name = "405GP",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* STB 03xxx */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x40130000,
+ .cpu_name = "STB03xxx",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* STB 04xxx */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x41810000,
+ .cpu_name = "STB04xxx",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* NP405L */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x41610000,
+ .cpu_name = "NP405L",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* NP4GS3 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x40B10000,
+ .cpu_name = "NP4GS3",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* NP405H */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x41410000,
+ .cpu_name = "NP405H",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405GPr */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x50910000,
+ .cpu_name = "405GPr",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* STBx25xx */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x51510000,
+ .cpu_name = "STBx25xx",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405LP */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x41F10000,
+ .cpu_name = "405LP",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* Xilinx Virtex-II Pro */
+ .pvr_mask = 0xfffff000,
+ .pvr_value = 0x20010000,
+ .cpu_name = "Virtex-II Pro",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* Xilinx Virtex-4 FX */
+ .pvr_mask = 0xfffff000,
+ .pvr_value = 0x20011000,
+ .cpu_name = "Virtex-4 FX",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EP */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x51210000,
+ .cpu_name = "405EP",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EX Rev. A/B with Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x12910007,
+ .cpu_name = "405EX Rev. A/B",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EX Rev. C without Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x1291000d,
+ .cpu_name = "405EX Rev. C",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EX Rev. C with Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x1291000f,
+ .cpu_name = "405EX Rev. C",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EX Rev. D without Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x12910003,
+ .cpu_name = "405EX Rev. D",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EX Rev. D with Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x12910005,
+ .cpu_name = "405EX Rev. D",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EXr Rev. A/B without Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x12910001,
+ .cpu_name = "405EXr Rev. A/B",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EXr Rev. C without Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x12910009,
+ .cpu_name = "405EXr Rev. C",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EXr Rev. C with Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x1291000b,
+ .cpu_name = "405EXr Rev. C",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EXr Rev. D without Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x12910000,
+ .cpu_name = "405EXr Rev. D",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EXr Rev. D with Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x12910002,
+ .cpu_name = "405EXr Rev. D",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ {
+ /* 405EZ */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x41510000,
+ .cpu_name = "405EZ",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* APM8018X */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x7ff11432,
+ .cpu_name = "APM8018X",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* default match */
+ .pvr_mask = 0x00000000,
+ .pvr_value = 0x00000000,
+ .cpu_name = "(generic 40x PPC)",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ }
+
+#endif /* CONFIG_40x */
+#ifdef CONFIG_44x
+ {
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x40000850,
+ .cpu_name = "440GR Rev. A",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440",
+ },
+ { /* Use logical PVR for 440EP (logical pvr = pvr | 0x8) */
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x40000858,
+ .cpu_name = "440EP Rev. A",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440ep,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440",
+ },
+ {
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x400008d3,
+ .cpu_name = "440GR Rev. B",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440",
+ },
+ { /* Matches both physical and logical PVR for 440EP (logical pvr = pvr | 0x8) */
+ .pvr_mask = 0xf0000ff7,
+ .pvr_value = 0x400008d4,
+ .cpu_name = "440EP Rev. C",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440ep,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440",
+ },
+ { /* Use logical PVR for 440EP (logical pvr = pvr | 0x8) */
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x400008db,
+ .cpu_name = "440EP Rev. B",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440ep,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440",
+ },
+ { /* 440GRX */
+ .pvr_mask = 0xf0000ffb,
+ .pvr_value = 0x200008D0,
+ .cpu_name = "440GRX",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440grx,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* Use logical PVR for 440EPx (logical pvr = pvr | 0x8) */
+ .pvr_mask = 0xf0000ffb,
+ .pvr_value = 0x200008D8,
+ .cpu_name = "440EPX",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440epx,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 440GP Rev. B */
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x40000440,
+ .cpu_name = "440GP Rev. B",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440gp",
+ },
+ { /* 440GP Rev. C */
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x40000481,
+ .cpu_name = "440GP Rev. C",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440gp",
+ },
+ { /* 440GX Rev. A */
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x50000850,
+ .cpu_name = "440GX Rev. A",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440gx,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 440GX Rev. B */
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x50000851,
+ .cpu_name = "440GX Rev. B",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440gx,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 440GX Rev. C */
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x50000892,
+ .cpu_name = "440GX Rev. C",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440gx,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 440GX Rev. F */
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x50000894,
+ .cpu_name = "440GX Rev. F",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440gx,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 440SP Rev. A */
+ .pvr_mask = 0xfff00fff,
+ .pvr_value = 0x53200891,
+ .cpu_name = "440SP Rev. A",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440",
+ },
+ { /* 440SPe Rev. A */
+ .pvr_mask = 0xfff00fff,
+ .pvr_value = 0x53400890,
+ .cpu_name = "440SPe Rev. A",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440spe,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 440SPe Rev. B */
+ .pvr_mask = 0xfff00fff,
+ .pvr_value = 0x53400891,
+ .cpu_name = "440SPe Rev. B",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440spe,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 440 in Xilinx Virtex-5 FXT */
+ .pvr_mask = 0xfffffff0,
+ .pvr_value = 0x7ff21910,
+ .cpu_name = "440 in Virtex-5 FXT",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440x5,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 460EX */
+ .pvr_mask = 0xffff0006,
+ .pvr_value = 0x13020002,
+ .cpu_name = "460EX",
+ .cpu_features = CPU_FTRS_440x6,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_460ex,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 460EX Rev B */
+ .pvr_mask = 0xffff0007,
+ .pvr_value = 0x13020004,
+ .cpu_name = "460EX Rev. B",
+ .cpu_features = CPU_FTRS_440x6,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_460ex,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 460GT */
+ .pvr_mask = 0xffff0006,
+ .pvr_value = 0x13020000,
+ .cpu_name = "460GT",
+ .cpu_features = CPU_FTRS_440x6,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_460gt,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 460GT Rev B */
+ .pvr_mask = 0xffff0007,
+ .pvr_value = 0x13020005,
+ .cpu_name = "460GT Rev. B",
+ .cpu_features = CPU_FTRS_440x6,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_460gt,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 460SX */
+ .pvr_mask = 0xffffff00,
+ .pvr_value = 0x13541800,
+ .cpu_name = "460SX",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_460sx,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 464 in APM821xx */
+ .pvr_mask = 0xfffffff0,
+ .pvr_value = 0x12C41C80,
+ .cpu_name = "APM821XX",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE |
+ PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_apm821xx,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 476 DD2 core */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x11a52080,
+ .cpu_name = "476",
+ .cpu_features = CPU_FTRS_47X | CPU_FTR_476_DD2,
+ .cpu_user_features = COMMON_USER_BOOKE |
+ PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_47x |
+ MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL,
+ .icache_bsize = 32,
+ .dcache_bsize = 128,
+ .machine_check = machine_check_47x,
+ .platform = "ppc470",
+ },
+ { /* 476fpe */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x7ff50000,
+ .cpu_name = "476fpe",
+ .cpu_features = CPU_FTRS_47X | CPU_FTR_476_DD2,
+ .cpu_user_features = COMMON_USER_BOOKE |
+ PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_47x |
+ MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL,
+ .icache_bsize = 32,
+ .dcache_bsize = 128,
+ .machine_check = machine_check_47x,
+ .platform = "ppc470",
+ },
+ { /* 476 iss */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00050000,
+ .cpu_name = "476",
+ .cpu_features = CPU_FTRS_47X,
+ .cpu_user_features = COMMON_USER_BOOKE |
+ PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_47x |
+ MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL,
+ .icache_bsize = 32,
+ .dcache_bsize = 128,
+ .machine_check = machine_check_47x,
+ .platform = "ppc470",
+ },
+ { /* 476 others */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x11a50000,
+ .cpu_name = "476",
+ .cpu_features = CPU_FTRS_47X,
+ .cpu_user_features = COMMON_USER_BOOKE |
+ PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_47x |
+ MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL,
+ .icache_bsize = 32,
+ .dcache_bsize = 128,
+ .machine_check = machine_check_47x,
+ .platform = "ppc470",
+ },
+ { /* default match */
+ .pvr_mask = 0x00000000,
+ .pvr_value = 0x00000000,
+ .cpu_name = "(generic 44x PPC)",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440",
+ }
+#endif /* CONFIG_44x */
+#ifdef CONFIG_E200
+ { /* e200z5 */
+ .pvr_mask = 0xfff00000,
+ .pvr_value = 0x81000000,
+ .cpu_name = "e200z5",
+ /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */
+ .cpu_features = CPU_FTRS_E200,
+ .cpu_user_features = COMMON_USER_BOOKE |
+ PPC_FEATURE_HAS_EFP_SINGLE |
+ PPC_FEATURE_UNIFIED_CACHE,
+ .mmu_features = MMU_FTR_TYPE_FSL_E,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_e200,
+ .platform = "ppc5554",
+ },
+ { /* e200z6 */
+ .pvr_mask = 0xfff00000,
+ .pvr_value = 0x81100000,
+ .cpu_name = "e200z6",
+ /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */
+ .cpu_features = CPU_FTRS_E200,
+ .cpu_user_features = COMMON_USER_BOOKE |
+ PPC_FEATURE_HAS_SPE_COMP |
+ PPC_FEATURE_HAS_EFP_SINGLE_COMP |
+ PPC_FEATURE_UNIFIED_CACHE,
+ .mmu_features = MMU_FTR_TYPE_FSL_E,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_e200,
+ .platform = "ppc5554",
+ },
+ { /* default match */
+ .pvr_mask = 0x00000000,
+ .pvr_value = 0x00000000,
+ .cpu_name = "(generic E200 PPC)",
+ .cpu_features = CPU_FTRS_E200,
+ .cpu_user_features = COMMON_USER_BOOKE |
+ PPC_FEATURE_HAS_EFP_SINGLE |
+ PPC_FEATURE_UNIFIED_CACHE,
+ .mmu_features = MMU_FTR_TYPE_FSL_E,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_e200,
+ .machine_check = machine_check_e200,
+ .platform = "ppc5554",
+ }
+#endif /* CONFIG_E200 */
+#endif /* CONFIG_PPC32 */
+#ifdef CONFIG_E500
+#ifdef CONFIG_PPC32
+ { /* e500 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80200000,
+ .cpu_name = "e500",
+ .cpu_features = CPU_FTRS_E500,
+ .cpu_user_features = COMMON_USER_BOOKE |
+ PPC_FEATURE_HAS_SPE_COMP |
+ PPC_FEATURE_HAS_EFP_SINGLE_COMP,
+ .mmu_features = MMU_FTR_TYPE_FSL_E,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .oprofile_cpu_type = "ppc/e500",
+ .oprofile_type = PPC_OPROFILE_FSL_EMB,
+ .cpu_setup = __setup_cpu_e500v1,
+ .machine_check = machine_check_e500,
+ .platform = "ppc8540",
+ },
+ { /* e500v2 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80210000,
+ .cpu_name = "e500v2",
+ .cpu_features = CPU_FTRS_E500_2,
+ .cpu_user_features = COMMON_USER_BOOKE |
+ PPC_FEATURE_HAS_SPE_COMP |
+ PPC_FEATURE_HAS_EFP_SINGLE_COMP |
+ PPC_FEATURE_HAS_EFP_DOUBLE_COMP,
+ .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .oprofile_cpu_type = "ppc/e500",
+ .oprofile_type = PPC_OPROFILE_FSL_EMB,
+ .cpu_setup = __setup_cpu_e500v2,
+ .machine_check = machine_check_e500,
+ .platform = "ppc8548",
+ },
+ { /* e500mc */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80230000,
+ .cpu_name = "e500mc",
+ .cpu_features = CPU_FTRS_E500MC,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS |
+ MMU_FTR_USE_TLBILX,
+ .icache_bsize = 64,
+ .dcache_bsize = 64,
+ .num_pmcs = 4,
+ .oprofile_cpu_type = "ppc/e500mc",
+ .oprofile_type = PPC_OPROFILE_FSL_EMB,
+ .cpu_setup = __setup_cpu_e500mc,
+ .machine_check = machine_check_e500mc,
+ .platform = "ppce500mc",
+ },
+#endif /* CONFIG_PPC32 */
+ { /* e5500 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80240000,
+ .cpu_name = "e5500",
+ .cpu_features = CPU_FTRS_E5500,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS |
+ MMU_FTR_USE_TLBILX,
+ .icache_bsize = 64,
+ .dcache_bsize = 64,
+ .num_pmcs = 4,
+ .oprofile_cpu_type = "ppc/e500mc",
+ .oprofile_type = PPC_OPROFILE_FSL_EMB,
+ .cpu_setup = __setup_cpu_e5500,
+ .cpu_restore = __restore_cpu_e5500,
+ .machine_check = machine_check_e500mc,
+ .platform = "ppce5500",
+ },
+ { /* e6500 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80400000,
+ .cpu_name = "e6500",
+ .cpu_features = CPU_FTRS_E6500,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS |
+ MMU_FTR_USE_TLBILX,
+ .icache_bsize = 64,
+ .dcache_bsize = 64,
+ .num_pmcs = 4,
+ .oprofile_cpu_type = "ppc/e6500",
+ .oprofile_type = PPC_OPROFILE_FSL_EMB,
+ .cpu_setup = __setup_cpu_e5500,
+ .cpu_restore = __restore_cpu_e5500,
+ .machine_check = machine_check_e500mc,
+ .platform = "ppce6500",
+ },
+#ifdef CONFIG_PPC32
+ { /* default match */
+ .pvr_mask = 0x00000000,
+ .pvr_value = 0x00000000,
+ .cpu_name = "(generic E500 PPC)",
+ .cpu_features = CPU_FTRS_E500,
+ .cpu_user_features = COMMON_USER_BOOKE |
+ PPC_FEATURE_HAS_SPE_COMP |
+ PPC_FEATURE_HAS_EFP_SINGLE_COMP,
+ .mmu_features = MMU_FTR_TYPE_FSL_E,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_e500,
+ .platform = "powerpc",
+ }
+#endif /* CONFIG_PPC32 */
+#endif /* CONFIG_E500 */
+
+#ifdef CONFIG_PPC_A2
+ { /* Standard A2 (>= DD2) + FPU core */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00480000,
+ .cpu_name = "A2 (>= DD2)",
+ .cpu_features = CPU_FTRS_A2,
+ .cpu_user_features = COMMON_USER_PPC64,
+ .mmu_features = MMU_FTRS_A2,
+ .icache_bsize = 64,
+ .dcache_bsize = 64,
+ .num_pmcs = 0,
+ .cpu_setup = __setup_cpu_a2,
+ .cpu_restore = __restore_cpu_a2,
+ .machine_check = machine_check_generic,
+ .platform = "ppca2",
+ },
+ { /* This is a default entry to get going, to be replaced by
+ * a real one at some stage
+ */
+#define CPU_FTRS_BASE_BOOK3E (CPU_FTR_USE_TB | \
+ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_SMT | \
+ CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
+ .pvr_mask = 0x00000000,
+ .pvr_value = 0x00000000,
+ .cpu_name = "Book3E",
+ .cpu_features = CPU_FTRS_BASE_BOOK3E,
+ .cpu_user_features = COMMON_USER_PPC64,
+ .mmu_features = MMU_FTR_TYPE_3E | MMU_FTR_USE_TLBILX |
+ MMU_FTR_USE_TLBIVAX_BCAST |
+ MMU_FTR_LOCK_BCAST_INVAL,
+ .icache_bsize = 64,
+ .dcache_bsize = 64,
+ .num_pmcs = 0,
+ .machine_check = machine_check_generic,
+ .platform = "power6",
+ },
+#endif /* CONFIG_PPC_A2 */
+};
+
+static struct cpu_spec the_cpu_spec;
+
+static struct cpu_spec * __init setup_cpu_spec(unsigned long offset,
+ struct cpu_spec *s)
+{
+ struct cpu_spec *t = &the_cpu_spec;
+ struct cpu_spec old;
+
+ t = PTRRELOC(t);
+ old = *t;
+
+ /* Copy everything, then do fixups */
+ *t = *s;
+
+ /*
+ * If we are overriding a previous value derived from the real
+ * PVR with a new value obtained using a logical PVR value,
+ * don't modify the performance monitor fields.
+ */
+ if (old.num_pmcs && !s->num_pmcs) {
+ t->num_pmcs = old.num_pmcs;
+ t->pmc_type = old.pmc_type;
+ t->oprofile_type = old.oprofile_type;
+ t->oprofile_mmcra_sihv = old.oprofile_mmcra_sihv;
+ t->oprofile_mmcra_sipr = old.oprofile_mmcra_sipr;
+ t->oprofile_mmcra_clear = old.oprofile_mmcra_clear;
+
+ /*
+ * If we have passed through this logic once before and
+ * have pulled the default case because the real PVR was
+ * not found inside cpu_specs[], then we are possibly
+ * running in compatibility mode. In that case, let the
+ * oprofiler know which set of compatibility counters to
+ * pull from by making sure the oprofile_cpu_type string
+ * is set to that of compatibility mode. If the
+ * oprofile_cpu_type already has a value, then we are
+ * possibly overriding a real PVR with a logical one,
+ * and, in that case, keep the current value for
+ * oprofile_cpu_type.
+ */
+ if (old.oprofile_cpu_type != NULL) {
+ t->oprofile_cpu_type = old.oprofile_cpu_type;
+ t->oprofile_type = old.oprofile_type;
+ }
+ }
+
+ *PTRRELOC(&cur_cpu_spec) = &the_cpu_spec;
+
+ /*
+ * Set the base platform string once; assumes
+ * we're called with real pvr first.
+ */
+ if (*PTRRELOC(&powerpc_base_platform) == NULL)
+ *PTRRELOC(&powerpc_base_platform) = t->platform;
+
+#if defined(CONFIG_PPC64) || defined(CONFIG_BOOKE)
+ /* ppc64 and booke expect identify_cpu to also call setup_cpu for
+ * that processor. I will consolidate that at a later time, for now,
+ * just use #ifdef. We also don't need to PTRRELOC the function
+ * pointer on ppc64 and booke as we are running at 0 in real mode
+ * on ppc64 and reloc_offset is always 0 on booke.
+ */
+ if (t->cpu_setup) {
+ t->cpu_setup(offset, t);
+ }
+#endif /* CONFIG_PPC64 || CONFIG_BOOKE */
+
+ return t;
+}
+
+struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr)
+{
+ struct cpu_spec *s = cpu_specs;
+ int i;
+
+ s = PTRRELOC(s);
+
+ for (i = 0; i < ARRAY_SIZE(cpu_specs); i++,s++) {
+ if ((pvr & s->pvr_mask) == s->pvr_value)
+ return setup_cpu_spec(offset, s);
+ }
+
+ BUG();
+
+ return NULL;
+}
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
new file mode 100644
index 00000000..fdcd8f55
--- /dev/null
+++ b/arch/powerpc/kernel/crash.c
@@ -0,0 +1,368 @@
+/*
+ * Architecture specific (PPC64) functions for kexec based crash dumps.
+ *
+ * Copyright (C) 2005, IBM Corp.
+ *
+ * Created by: Haren Myneni
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/reboot.h>
+#include <linux/kexec.h>
+#include <linux/export.h>
+#include <linux/crash_dump.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/types.h>
+
+#include <asm/processor.h>
+#include <asm/machdep.h>
+#include <asm/kexec.h>
+#include <asm/kdump.h>
+#include <asm/prom.h>
+#include <asm/smp.h>
+#include <asm/setjmp.h>
+#include <asm/debug.h>
+
+/*
+ * The primary CPU waits a while for all secondary CPUs to enter. This is to
+ * avoid sending an IPI if the secondary CPUs are entering
+ * crash_kexec_secondary on their own (eg via a system reset).
+ *
+ * The secondary timeout has to be longer than the primary. Both timeouts are
+ * in milliseconds.
+ */
+#define PRIMARY_TIMEOUT 500
+#define SECONDARY_TIMEOUT 1000
+
+#define IPI_TIMEOUT 10000
+#define REAL_MODE_TIMEOUT 10000
+
+/* This keeps a track of which one is the crashing cpu. */
+int crashing_cpu = -1;
+static int time_to_dump;
+
+#define CRASH_HANDLER_MAX 3
+/* NULL terminated list of shutdown handles */
+static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1];
+static DEFINE_SPINLOCK(crash_handlers_lock);
+
+static unsigned long crash_shutdown_buf[JMP_BUF_LEN];
+static int crash_shutdown_cpu = -1;
+
+static int handle_fault(struct pt_regs *regs)
+{
+ if (crash_shutdown_cpu == smp_processor_id())
+ longjmp(crash_shutdown_buf, 1);
+ return 0;
+}
+
+#ifdef CONFIG_SMP
+
+static atomic_t cpus_in_crash;
+void crash_ipi_callback(struct pt_regs *regs)
+{
+ static cpumask_t cpus_state_saved = CPU_MASK_NONE;
+
+ int cpu = smp_processor_id();
+
+ if (!cpu_online(cpu))
+ return;
+
+ hard_irq_disable();
+ if (!cpumask_test_cpu(cpu, &cpus_state_saved)) {
+ crash_save_cpu(regs, cpu);
+ cpumask_set_cpu(cpu, &cpus_state_saved);
+ }
+
+ atomic_inc(&cpus_in_crash);
+ smp_mb__after_atomic_inc();
+
+ /*
+ * Starting the kdump boot.
+ * This barrier is needed to make sure that all CPUs are stopped.
+ */
+ while (!time_to_dump)
+ cpu_relax();
+
+ if (ppc_md.kexec_cpu_down)
+ ppc_md.kexec_cpu_down(1, 1);
+
+#ifdef CONFIG_PPC64
+ kexec_smp_wait();
+#else
+ for (;;); /* FIXME */
+#endif
+
+ /* NOTREACHED */
+}
+
+static void crash_kexec_prepare_cpus(int cpu)
+{
+ unsigned int msecs;
+ unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
+ int tries = 0;
+ int (*old_handler)(struct pt_regs *regs);
+
+ printk(KERN_EMERG "Sending IPI to other CPUs\n");
+
+ crash_send_ipi(crash_ipi_callback);
+ smp_wmb();
+
+again:
+ /*
+ * FIXME: Until we will have the way to stop other CPUs reliably,
+ * the crash CPU will send an IPI and wait for other CPUs to
+ * respond.
+ */
+ msecs = IPI_TIMEOUT;
+ while ((atomic_read(&cpus_in_crash) < ncpus) && (--msecs > 0))
+ mdelay(1);
+
+ /* Would it be better to replace the trap vector here? */
+
+ if (atomic_read(&cpus_in_crash) >= ncpus) {
+ printk(KERN_EMERG "IPI complete\n");
+ return;
+ }
+
+ printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n",
+ ncpus - atomic_read(&cpus_in_crash));
+
+ /*
+ * If we have a panic timeout set then we can't wait indefinitely
+ * for someone to activate system reset. We also give up on the
+ * second time through if system reset fail to work.
+ */
+ if ((panic_timeout > 0) || (tries > 0))
+ return;
+
+ /*
+ * A system reset will cause all CPUs to take an 0x100 exception.
+ * The primary CPU returns here via setjmp, and the secondary
+ * CPUs reexecute the crash_kexec_secondary path.
+ */
+ old_handler = __debugger;
+ __debugger = handle_fault;
+ crash_shutdown_cpu = smp_processor_id();
+
+ if (setjmp(crash_shutdown_buf) == 0) {
+ printk(KERN_EMERG "Activate system reset (dumprestart) "
+ "to stop other cpu(s)\n");
+
+ /*
+ * A system reset will force all CPUs to execute the
+ * crash code again. We need to reset cpus_in_crash so we
+ * wait for everyone to do this.
+ */
+ atomic_set(&cpus_in_crash, 0);
+ smp_mb();
+
+ while (atomic_read(&cpus_in_crash) < ncpus)
+ cpu_relax();
+ }
+
+ crash_shutdown_cpu = -1;
+ __debugger = old_handler;
+
+ tries++;
+ goto again;
+}
+
+/*
+ * This function will be called by secondary cpus.
+ */
+void crash_kexec_secondary(struct pt_regs *regs)
+{
+ unsigned long flags;
+ int msecs = SECONDARY_TIMEOUT;
+
+ local_irq_save(flags);
+
+ /* Wait for the primary crash CPU to signal its progress */
+ while (crashing_cpu < 0) {
+ if (--msecs < 0) {
+ /* No response, kdump image may not have been loaded */
+ local_irq_restore(flags);
+ return;
+ }
+
+ mdelay(1);
+ }
+
+ crash_ipi_callback(regs);
+}
+
+#else /* ! CONFIG_SMP */
+
+static void crash_kexec_prepare_cpus(int cpu)
+{
+ /*
+ * move the secondaries to us so that we can copy
+ * the new kernel 0-0x100 safely
+ *
+ * do this if kexec in setup.c ?
+ */
+#ifdef CONFIG_PPC64
+ smp_release_cpus();
+#else
+ /* FIXME */
+#endif
+}
+
+void crash_kexec_secondary(struct pt_regs *regs)
+{
+}
+#endif /* CONFIG_SMP */
+
+/* wait for all the CPUs to hit real mode but timeout if they don't come in */
+#if defined(CONFIG_SMP) && defined(CONFIG_PPC_STD_MMU_64)
+static void crash_kexec_wait_realmode(int cpu)
+{
+ unsigned int msecs;
+ int i;
+
+ msecs = REAL_MODE_TIMEOUT;
+ for (i=0; i < nr_cpu_ids && msecs > 0; i++) {
+ if (i == cpu)
+ continue;
+
+ while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) {
+ barrier();
+ if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0))
+ break;
+ msecs--;
+ mdelay(1);
+ }
+ }
+ mb();
+}
+#else
+static inline void crash_kexec_wait_realmode(int cpu) {}
+#endif /* CONFIG_SMP && CONFIG_PPC_STD_MMU_64 */
+
+/*
+ * Register a function to be called on shutdown. Only use this if you
+ * can't reset your device in the second kernel.
+ */
+int crash_shutdown_register(crash_shutdown_t handler)
+{
+ unsigned int i, rc;
+
+ spin_lock(&crash_handlers_lock);
+ for (i = 0 ; i < CRASH_HANDLER_MAX; i++)
+ if (!crash_shutdown_handles[i]) {
+ /* Insert handle at first empty entry */
+ crash_shutdown_handles[i] = handler;
+ rc = 0;
+ break;
+ }
+
+ if (i == CRASH_HANDLER_MAX) {
+ printk(KERN_ERR "Crash shutdown handles full, "
+ "not registered.\n");
+ rc = 1;
+ }
+
+ spin_unlock(&crash_handlers_lock);
+ return rc;
+}
+EXPORT_SYMBOL(crash_shutdown_register);
+
+int crash_shutdown_unregister(crash_shutdown_t handler)
+{
+ unsigned int i, rc;
+
+ spin_lock(&crash_handlers_lock);
+ for (i = 0 ; i < CRASH_HANDLER_MAX; i++)
+ if (crash_shutdown_handles[i] == handler)
+ break;
+
+ if (i == CRASH_HANDLER_MAX) {
+ printk(KERN_ERR "Crash shutdown handle not found\n");
+ rc = 1;
+ } else {
+ /* Shift handles down */
+ for (; crash_shutdown_handles[i]; i++)
+ crash_shutdown_handles[i] =
+ crash_shutdown_handles[i+1];
+ rc = 0;
+ }
+
+ spin_unlock(&crash_handlers_lock);
+ return rc;
+}
+EXPORT_SYMBOL(crash_shutdown_unregister);
+
+void default_machine_crash_shutdown(struct pt_regs *regs)
+{
+ unsigned int i;
+ int (*old_handler)(struct pt_regs *regs);
+
+ /*
+ * This function is only called after the system
+ * has panicked or is otherwise in a critical state.
+ * The minimum amount of code to allow a kexec'd kernel
+ * to run successfully needs to happen here.
+ *
+ * In practice this means stopping other cpus in
+ * an SMP system.
+ * The kernel is broken so disable interrupts.
+ */
+ hard_irq_disable();
+
+ /*
+ * Make a note of crashing cpu. Will be used in machine_kexec
+ * such that another IPI will not be sent.
+ */
+ crashing_cpu = smp_processor_id();
+
+ /*
+ * If we came in via system reset, wait a while for the secondary
+ * CPUs to enter.
+ */
+ if (TRAP(regs) == 0x100)
+ mdelay(PRIMARY_TIMEOUT);
+
+ crash_kexec_prepare_cpus(crashing_cpu);
+
+ crash_save_cpu(regs, crashing_cpu);
+
+ time_to_dump = 1;
+
+ crash_kexec_wait_realmode(crashing_cpu);
+
+ machine_kexec_mask_interrupts();
+
+ /*
+ * Call registered shutdown routines safely. Swap out
+ * __debugger_fault_handler, and replace on exit.
+ */
+ old_handler = __debugger_fault_handler;
+ __debugger_fault_handler = handle_fault;
+ crash_shutdown_cpu = smp_processor_id();
+ for (i = 0; crash_shutdown_handles[i]; i++) {
+ if (setjmp(crash_shutdown_buf) == 0) {
+ /*
+ * Insert syncs and delay to ensure
+ * instructions in the dangerous region don't
+ * leak away from this protected region.
+ */
+ asm volatile("sync; isync");
+ /* dangerous region */
+ crash_shutdown_handles[i]();
+ asm volatile("sync; isync");
+ }
+ }
+ crash_shutdown_cpu = -1;
+ __debugger_fault_handler = old_handler;
+
+ if (ppc_md.kexec_cpu_down)
+ ppc_md.kexec_cpu_down(1, 0);
+}
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
new file mode 100644
index 00000000..b3ba5163
--- /dev/null
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -0,0 +1,159 @@
+/*
+ * Routines for doing kexec-based kdump.
+ *
+ * Copyright (C) 2005, IBM Corp.
+ *
+ * Created by: Michael Ellerman
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#undef DEBUG
+
+#include <linux/crash_dump.h>
+#include <linux/bootmem.h>
+#include <linux/memblock.h>
+#include <asm/code-patching.h>
+#include <asm/kdump.h>
+#include <asm/prom.h>
+#include <asm/firmware.h>
+#include <asm/uaccess.h>
+#include <asm/rtas.h>
+
+#ifdef DEBUG
+#include <asm/udbg.h>
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+#ifndef CONFIG_NONSTATIC_KERNEL
+void __init reserve_kdump_trampoline(void)
+{
+ memblock_reserve(0, KDUMP_RESERVE_LIMIT);
+}
+
+static void __init create_trampoline(unsigned long addr)
+{
+ unsigned int *p = (unsigned int *)addr;
+
+ /* The maximum range of a single instruction branch, is the current
+ * instruction's address + (32 MB - 4) bytes. For the trampoline we
+ * need to branch to current address + 32 MB. So we insert a nop at
+ * the trampoline address, then the next instruction (+ 4 bytes)
+ * does a branch to (32 MB - 4). The net effect is that when we
+ * branch to "addr" we jump to ("addr" + 32 MB). Although it requires
+ * two instructions it doesn't require any registers.
+ */
+ patch_instruction(p, PPC_INST_NOP);
+ patch_branch(++p, addr + PHYSICAL_START, 0);
+}
+
+void __init setup_kdump_trampoline(void)
+{
+ unsigned long i;
+
+ DBG(" -> setup_kdump_trampoline()\n");
+
+ for (i = KDUMP_TRAMPOLINE_START; i < KDUMP_TRAMPOLINE_END; i += 8) {
+ create_trampoline(i);
+ }
+
+#ifdef CONFIG_PPC_PSERIES
+ create_trampoline(__pa(system_reset_fwnmi) - PHYSICAL_START);
+ create_trampoline(__pa(machine_check_fwnmi) - PHYSICAL_START);
+#endif /* CONFIG_PPC_PSERIES */
+
+ DBG(" <- setup_kdump_trampoline()\n");
+}
+#endif /* CONFIG_NONSTATIC_KERNEL */
+
+static int __init parse_savemaxmem(char *p)
+{
+ if (p)
+ saved_max_pfn = (memparse(p, &p) >> PAGE_SHIFT) - 1;
+
+ return 1;
+}
+__setup("savemaxmem=", parse_savemaxmem);
+
+
+static size_t copy_oldmem_vaddr(void *vaddr, char *buf, size_t csize,
+ unsigned long offset, int userbuf)
+{
+ if (userbuf) {
+ if (copy_to_user((char __user *)buf, (vaddr + offset), csize))
+ return -EFAULT;
+ } else
+ memcpy(buf, (vaddr + offset), csize);
+
+ return csize;
+}
+
+/**
+ * copy_oldmem_page - copy one page from "oldmem"
+ * @pfn: page frame number to be copied
+ * @buf: target memory address for the copy; this can be in kernel address
+ * space or user address space (see @userbuf)
+ * @csize: number of bytes to copy
+ * @offset: offset in bytes into the page (based on pfn) to begin the copy
+ * @userbuf: if set, @buf is in user address space, use copy_to_user(),
+ * otherwise @buf is in kernel address space, use memcpy().
+ *
+ * Copy a page from "oldmem". For this page, there is no pte mapped
+ * in the current kernel. We stitch up a pte, similar to kmap_atomic.
+ */
+ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
+ size_t csize, unsigned long offset, int userbuf)
+{
+ void *vaddr;
+
+ if (!csize)
+ return 0;
+
+ csize = min_t(size_t, csize, PAGE_SIZE);
+
+ if ((min_low_pfn < pfn) && (pfn < max_pfn)) {
+ vaddr = __va(pfn << PAGE_SHIFT);
+ csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf);
+ } else {
+ vaddr = __ioremap(pfn << PAGE_SHIFT, PAGE_SIZE, 0);
+ csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf);
+ iounmap(vaddr);
+ }
+
+ return csize;
+}
+
+#ifdef CONFIG_PPC_RTAS
+/*
+ * The crashkernel region will almost always overlap the RTAS region, so
+ * we have to be careful when shrinking the crashkernel region.
+ */
+void crash_free_reserved_phys_range(unsigned long begin, unsigned long end)
+{
+ unsigned long addr;
+ const u32 *basep, *sizep;
+ unsigned int rtas_start = 0, rtas_end = 0;
+
+ basep = of_get_property(rtas.dev, "linux,rtas-base", NULL);
+ sizep = of_get_property(rtas.dev, "rtas-size", NULL);
+
+ if (basep && sizep) {
+ rtas_start = *basep;
+ rtas_end = *basep + *sizep;
+ }
+
+ for (addr = begin; addr < end; addr += PAGE_SIZE) {
+ /* Does this page overlap with the RTAS region? */
+ if (addr <= rtas_end && ((addr + PAGE_SIZE) > rtas_start))
+ continue;
+
+ ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT));
+ init_page_count(pfn_to_page(addr >> PAGE_SHIFT));
+ free_page((unsigned long)__va(addr));
+ totalram_pages++;
+ }
+}
+#endif
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
new file mode 100644
index 00000000..5b25c806
--- /dev/null
+++ b/arch/powerpc/kernel/dbell.c
@@ -0,0 +1,53 @@
+/*
+ * Author: Kumar Gala <galak@kernel.crashing.org>
+ *
+ * Copyright 2009 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/threads.h>
+#include <linux/hardirq.h>
+
+#include <asm/dbell.h>
+#include <asm/irq_regs.h>
+
+#ifdef CONFIG_SMP
+void doorbell_setup_this_cpu(void)
+{
+ unsigned long tag = mfspr(SPRN_PIR) & 0x3fff;
+
+ smp_muxed_ipi_set_data(smp_processor_id(), tag);
+}
+
+void doorbell_cause_ipi(int cpu, unsigned long data)
+{
+ ppc_msgsnd(PPC_DBELL, 0, data);
+}
+
+void doorbell_exception(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ irq_enter();
+
+ may_hard_irq_enable();
+
+ smp_ipi_demux();
+
+ irq_exit();
+ set_irq_regs(old_regs);
+}
+#else /* CONFIG_SMP */
+void doorbell_exception(struct pt_regs *regs)
+{
+ printk(KERN_WARNING "Received doorbell on non-smp system\n");
+}
+#endif /* CONFIG_SMP */
+
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
new file mode 100644
index 00000000..bcfdcd22
--- /dev/null
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corporation
+ *
+ * Provide default implementations of the DMA mapping callbacks for
+ * busses using the iommu infrastructure
+ */
+
+#include <linux/export.h>
+#include <asm/iommu.h>
+
+/*
+ * Generic iommu implementation
+ */
+
+/* Allocates a contiguous real buffer and creates mappings over it.
+ * Returns the virtual address of the buffer and sets dma_handle
+ * to the dma address (mapping) of the first page.
+ */
+static void *dma_iommu_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag,
+ struct dma_attrs *attrs)
+{
+ return iommu_alloc_coherent(dev, get_iommu_table_base(dev), size,
+ dma_handle, dev->coherent_dma_mask, flag,
+ dev_to_node(dev));
+}
+
+static void dma_iommu_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_handle,
+ struct dma_attrs *attrs)
+{
+ iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle);
+}
+
+/* Creates TCEs for a user provided buffer. The user buffer must be
+ * contiguous real kernel storage (not vmalloc). The address passed here
+ * comprises a page address and offset into that page. The dma_addr_t
+ * returned will point to the same byte within the page as was passed in.
+ */
+static dma_addr_t dma_iommu_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ return iommu_map_page(dev, get_iommu_table_base(dev), page, offset,
+ size, device_to_mask(dev), direction, attrs);
+}
+
+
+static void dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
+ size_t size, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size, direction,
+ attrs);
+}
+
+
+static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
+ int nelems, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ return iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems,
+ device_to_mask(dev), direction, attrs);
+}
+
+static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist,
+ int nelems, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems, direction,
+ attrs);
+}
+
+/* We support DMA to/from any memory page via the iommu */
+static int dma_iommu_dma_supported(struct device *dev, u64 mask)
+{
+ struct iommu_table *tbl = get_iommu_table_base(dev);
+
+ if (!tbl) {
+ dev_info(dev, "Warning: IOMMU dma not supported: mask 0x%08llx"
+ ", table unavailable\n", mask);
+ return 0;
+ }
+
+ if ((tbl->it_offset + tbl->it_size) > (mask >> IOMMU_PAGE_SHIFT)) {
+ dev_info(dev, "Warning: IOMMU window too big for device mask\n");
+ dev_info(dev, "mask: 0x%08llx, table end: 0x%08lx\n",
+ mask, (tbl->it_offset + tbl->it_size) <<
+ IOMMU_PAGE_SHIFT);
+ return 0;
+ } else
+ return 1;
+}
+
+static u64 dma_iommu_get_required_mask(struct device *dev)
+{
+ struct iommu_table *tbl = get_iommu_table_base(dev);
+ u64 mask;
+ if (!tbl)
+ return 0;
+
+ mask = 1ULL < (fls_long(tbl->it_offset + tbl->it_size) - 1);
+ mask += mask - 1;
+
+ return mask;
+}
+
+struct dma_map_ops dma_iommu_ops = {
+ .alloc = dma_iommu_alloc_coherent,
+ .free = dma_iommu_free_coherent,
+ .map_sg = dma_iommu_map_sg,
+ .unmap_sg = dma_iommu_unmap_sg,
+ .dma_supported = dma_iommu_dma_supported,
+ .map_page = dma_iommu_map_page,
+ .unmap_page = dma_iommu_unmap_page,
+ .get_required_mask = dma_iommu_get_required_mask,
+};
+EXPORT_SYMBOL(dma_iommu_ops);
diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
new file mode 100644
index 00000000..4ab88daf
--- /dev/null
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -0,0 +1,106 @@
+/*
+ * Contains routines needed to support swiotlb for ppc.
+ *
+ * Copyright (C) 2009-2010 Freescale Semiconductor, Inc.
+ * Author: Becky Bruce
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/pfn.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pci.h>
+
+#include <asm/machdep.h>
+#include <asm/swiotlb.h>
+#include <asm/dma.h>
+#include <asm/abs_addr.h>
+
+unsigned int ppc_swiotlb_enable;
+
+static u64 swiotlb_powerpc_get_required(struct device *dev)
+{
+ u64 end, mask, max_direct_dma_addr = dev->archdata.max_direct_dma_addr;
+
+ end = memblock_end_of_DRAM();
+ if (max_direct_dma_addr && end > max_direct_dma_addr)
+ end = max_direct_dma_addr;
+ end += get_dma_offset(dev);
+
+ mask = 1ULL << (fls64(end) - 1);
+ mask += mask - 1;
+
+ return mask;
+}
+
+/*
+ * At the moment, all platforms that use this code only require
+ * swiotlb to be used if we're operating on HIGHMEM. Since
+ * we don't ever call anything other than map_sg, unmap_sg,
+ * map_page, and unmap_page on highmem, use normal dma_ops
+ * for everything else.
+ */
+struct dma_map_ops swiotlb_dma_ops = {
+ .alloc = dma_direct_alloc_coherent,
+ .free = dma_direct_free_coherent,
+ .map_sg = swiotlb_map_sg_attrs,
+ .unmap_sg = swiotlb_unmap_sg_attrs,
+ .dma_supported = swiotlb_dma_supported,
+ .map_page = swiotlb_map_page,
+ .unmap_page = swiotlb_unmap_page,
+ .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
+ .sync_single_for_device = swiotlb_sync_single_for_device,
+ .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
+ .sync_sg_for_device = swiotlb_sync_sg_for_device,
+ .mapping_error = swiotlb_dma_mapping_error,
+ .get_required_mask = swiotlb_powerpc_get_required,
+};
+
+void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev)
+{
+ struct pci_controller *hose;
+ struct dev_archdata *sd;
+
+ hose = pci_bus_to_host(pdev->bus);
+ sd = &pdev->dev.archdata;
+ sd->max_direct_dma_addr =
+ hose->dma_window_base_cur + hose->dma_window_size;
+}
+
+static int ppc_swiotlb_bus_notify(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct device *dev = data;
+ struct dev_archdata *sd;
+
+ /* We are only intereted in device addition */
+ if (action != BUS_NOTIFY_ADD_DEVICE)
+ return 0;
+
+ sd = &dev->archdata;
+ sd->max_direct_dma_addr = 0;
+
+ /* May need to bounce if the device can't address all of DRAM */
+ if ((dma_get_mask(dev) + 1) < memblock_end_of_DRAM())
+ set_dma_ops(dev, &swiotlb_dma_ops);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ppc_swiotlb_plat_bus_notifier = {
+ .notifier_call = ppc_swiotlb_bus_notify,
+ .priority = 0,
+};
+
+int __init swiotlb_setup_bus_notifier(void)
+{
+ bus_register_notifier(&platform_bus_type,
+ &ppc_swiotlb_plat_bus_notifier);
+ return 0;
+}
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
new file mode 100644
index 00000000..b1ec983d
--- /dev/null
+++ b/arch/powerpc/kernel/dma.c
@@ -0,0 +1,230 @@
+/*
+ * Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corporation
+ *
+ * Provide default implementations of the DMA mapping callbacks for
+ * directly mapped busses.
+ */
+
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/dma-debug.h>
+#include <linux/gfp.h>
+#include <linux/memblock.h>
+#include <linux/export.h>
+#include <asm/bug.h>
+#include <asm/abs_addr.h>
+#include <asm/machdep.h>
+
+/*
+ * Generic direct DMA implementation
+ *
+ * This implementation supports a per-device offset that can be applied if
+ * the address at which memory is visible to devices is not 0. Platform code
+ * can set archdata.dma_data to an unsigned long holding the offset. By
+ * default the offset is PCI_DRAM_OFFSET.
+ */
+
+
+void *dma_direct_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag,
+ struct dma_attrs *attrs)
+{
+ void *ret;
+#ifdef CONFIG_NOT_COHERENT_CACHE
+ ret = __dma_alloc_coherent(dev, size, dma_handle, flag);
+ if (ret == NULL)
+ return NULL;
+ *dma_handle += get_dma_offset(dev);
+ return ret;
+#else
+ struct page *page;
+ int node = dev_to_node(dev);
+
+ /* ignore region specifiers */
+ flag &= ~(__GFP_HIGHMEM);
+
+ page = alloc_pages_node(node, flag, get_order(size));
+ if (page == NULL)
+ return NULL;
+ ret = page_address(page);
+ memset(ret, 0, size);
+ *dma_handle = virt_to_abs(ret) + get_dma_offset(dev);
+
+ return ret;
+#endif
+}
+
+void dma_direct_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_handle,
+ struct dma_attrs *attrs)
+{
+#ifdef CONFIG_NOT_COHERENT_CACHE
+ __dma_free_coherent(size, vaddr);
+#else
+ free_pages((unsigned long)vaddr, get_order(size));
+#endif
+}
+
+static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ struct scatterlist *sg;
+ int i;
+
+ for_each_sg(sgl, sg, nents, i) {
+ sg->dma_address = sg_phys(sg) + get_dma_offset(dev);
+ sg->dma_length = sg->length;
+ __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
+ }
+
+ return nents;
+}
+
+static void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+}
+
+static int dma_direct_dma_supported(struct device *dev, u64 mask)
+{
+#ifdef CONFIG_PPC64
+ /* Could be improved so platforms can set the limit in case
+ * they have limited DMA windows
+ */
+ return mask >= get_dma_offset(dev) + (memblock_end_of_DRAM() - 1);
+#else
+ return 1;
+#endif
+}
+
+static u64 dma_direct_get_required_mask(struct device *dev)
+{
+ u64 end, mask;
+
+ end = memblock_end_of_DRAM() + get_dma_offset(dev);
+
+ mask = 1ULL << (fls64(end) - 1);
+ mask += mask - 1;
+
+ return mask;
+}
+
+static inline dma_addr_t dma_direct_map_page(struct device *dev,
+ struct page *page,
+ unsigned long offset,
+ size_t size,
+ enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ BUG_ON(dir == DMA_NONE);
+ __dma_sync_page(page, offset, size, dir);
+ return page_to_phys(page) + offset + get_dma_offset(dev);
+}
+
+static inline void dma_direct_unmap_page(struct device *dev,
+ dma_addr_t dma_address,
+ size_t size,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+}
+
+#ifdef CONFIG_NOT_COHERENT_CACHE
+static inline void dma_direct_sync_sg(struct device *dev,
+ struct scatterlist *sgl, int nents,
+ enum dma_data_direction direction)
+{
+ struct scatterlist *sg;
+ int i;
+
+ for_each_sg(sgl, sg, nents, i)
+ __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
+}
+
+static inline void dma_direct_sync_single(struct device *dev,
+ dma_addr_t dma_handle, size_t size,
+ enum dma_data_direction direction)
+{
+ __dma_sync(bus_to_virt(dma_handle), size, direction);
+}
+#endif
+
+struct dma_map_ops dma_direct_ops = {
+ .alloc = dma_direct_alloc_coherent,
+ .free = dma_direct_free_coherent,
+ .map_sg = dma_direct_map_sg,
+ .unmap_sg = dma_direct_unmap_sg,
+ .dma_supported = dma_direct_dma_supported,
+ .map_page = dma_direct_map_page,
+ .unmap_page = dma_direct_unmap_page,
+ .get_required_mask = dma_direct_get_required_mask,
+#ifdef CONFIG_NOT_COHERENT_CACHE
+ .sync_single_for_cpu = dma_direct_sync_single,
+ .sync_single_for_device = dma_direct_sync_single,
+ .sync_sg_for_cpu = dma_direct_sync_sg,
+ .sync_sg_for_device = dma_direct_sync_sg,
+#endif
+};
+EXPORT_SYMBOL(dma_direct_ops);
+
+#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
+
+int dma_set_mask(struct device *dev, u64 dma_mask)
+{
+ struct dma_map_ops *dma_ops = get_dma_ops(dev);
+
+ if (ppc_md.dma_set_mask)
+ return ppc_md.dma_set_mask(dev, dma_mask);
+ if ((dma_ops != NULL) && (dma_ops->set_dma_mask != NULL))
+ return dma_ops->set_dma_mask(dev, dma_mask);
+ if (!dev->dma_mask || !dma_supported(dev, dma_mask))
+ return -EIO;
+ *dev->dma_mask = dma_mask;
+ return 0;
+}
+EXPORT_SYMBOL(dma_set_mask);
+
+u64 dma_get_required_mask(struct device *dev)
+{
+ struct dma_map_ops *dma_ops = get_dma_ops(dev);
+
+ if (ppc_md.dma_get_required_mask)
+ return ppc_md.dma_get_required_mask(dev);
+
+ if (unlikely(dma_ops == NULL))
+ return 0;
+
+ if (dma_ops->get_required_mask)
+ return dma_ops->get_required_mask(dev);
+
+ return DMA_BIT_MASK(8 * sizeof(dma_addr_t));
+}
+EXPORT_SYMBOL_GPL(dma_get_required_mask);
+
+static int __init dma_init(void)
+{
+ dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
+
+ return 0;
+}
+fs_initcall(dma_init);
+
+int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t handle, size_t size)
+{
+ unsigned long pfn;
+
+#ifdef CONFIG_NOT_COHERENT_CACHE
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ pfn = __dma_get_coherent_pfn((unsigned long)cpu_addr);
+#else
+ pfn = page_to_pfn(virt_to_page(cpu_addr));
+#endif
+ return remap_pfn_range(vma, vma->vm_start,
+ pfn + vma->vm_pgoff,
+ vma->vm_end - vma->vm_start,
+ vma->vm_page_prot);
+}
+EXPORT_SYMBOL_GPL(dma_mmap_coherent);
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
new file mode 100644
index 00000000..ba3aeb4b
--- /dev/null
+++ b/arch/powerpc/kernel/entry_32.S
@@ -0,0 +1,1412 @@
+/*
+ * PowerPC version
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ * Rewritten by Cort Dougan (cort@fsmlabs.com) for PReP
+ * Copyright (C) 1996 Cort Dougan <cort@fsmlabs.com>
+ * Adapted for Power Macintosh by Paul Mackerras.
+ * Low-level exception handlers and MMU support
+ * rewritten by Paul Mackerras.
+ * Copyright (C) 1996 Paul Mackerras.
+ * MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
+ *
+ * This file contains the system call entry code, context switch
+ * code, and exception/interrupt return code for PowerPC.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/sys.h>
+#include <linux/threads.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+#include <asm/ftrace.h>
+#include <asm/ptrace.h>
+
+#undef SHOW_SYSCALLS
+#undef SHOW_SYSCALLS_TASK
+
+/*
+ * MSR_KERNEL is > 0x10000 on 4xx/Book-E since it include MSR_CE.
+ */
+#if MSR_KERNEL >= 0x10000
+#define LOAD_MSR_KERNEL(r, x) lis r,(x)@h; ori r,r,(x)@l
+#else
+#define LOAD_MSR_KERNEL(r, x) li r,(x)
+#endif
+
+#ifdef CONFIG_BOOKE
+ .globl mcheck_transfer_to_handler
+mcheck_transfer_to_handler:
+ mfspr r0,SPRN_DSRR0
+ stw r0,_DSRR0(r11)
+ mfspr r0,SPRN_DSRR1
+ stw r0,_DSRR1(r11)
+ /* fall through */
+
+ .globl debug_transfer_to_handler
+debug_transfer_to_handler:
+ mfspr r0,SPRN_CSRR0
+ stw r0,_CSRR0(r11)
+ mfspr r0,SPRN_CSRR1
+ stw r0,_CSRR1(r11)
+ /* fall through */
+
+ .globl crit_transfer_to_handler
+crit_transfer_to_handler:
+#ifdef CONFIG_PPC_BOOK3E_MMU
+ mfspr r0,SPRN_MAS0
+ stw r0,MAS0(r11)
+ mfspr r0,SPRN_MAS1
+ stw r0,MAS1(r11)
+ mfspr r0,SPRN_MAS2
+ stw r0,MAS2(r11)
+ mfspr r0,SPRN_MAS3
+ stw r0,MAS3(r11)
+ mfspr r0,SPRN_MAS6
+ stw r0,MAS6(r11)
+#ifdef CONFIG_PHYS_64BIT
+ mfspr r0,SPRN_MAS7
+ stw r0,MAS7(r11)
+#endif /* CONFIG_PHYS_64BIT */
+#endif /* CONFIG_PPC_BOOK3E_MMU */
+#ifdef CONFIG_44x
+ mfspr r0,SPRN_MMUCR
+ stw r0,MMUCR(r11)
+#endif
+ mfspr r0,SPRN_SRR0
+ stw r0,_SRR0(r11)
+ mfspr r0,SPRN_SRR1
+ stw r0,_SRR1(r11)
+
+ mfspr r8,SPRN_SPRG_THREAD
+ lwz r0,KSP_LIMIT(r8)
+ stw r0,SAVED_KSP_LIMIT(r11)
+ rlwimi r0,r1,0,0,(31-THREAD_SHIFT)
+ stw r0,KSP_LIMIT(r8)
+ /* fall through */
+#endif
+
+#ifdef CONFIG_40x
+ .globl crit_transfer_to_handler
+crit_transfer_to_handler:
+ lwz r0,crit_r10@l(0)
+ stw r0,GPR10(r11)
+ lwz r0,crit_r11@l(0)
+ stw r0,GPR11(r11)
+ mfspr r0,SPRN_SRR0
+ stw r0,crit_srr0@l(0)
+ mfspr r0,SPRN_SRR1
+ stw r0,crit_srr1@l(0)
+
+ mfspr r8,SPRN_SPRG_THREAD
+ lwz r0,KSP_LIMIT(r8)
+ stw r0,saved_ksp_limit@l(0)
+ rlwimi r0,r1,0,0,(31-THREAD_SHIFT)
+ stw r0,KSP_LIMIT(r8)
+ /* fall through */
+#endif
+
+/*
+ * This code finishes saving the registers to the exception frame
+ * and jumps to the appropriate handler for the exception, turning
+ * on address translation.
+ * Note that we rely on the caller having set cr0.eq iff the exception
+ * occurred in kernel mode (i.e. MSR:PR = 0).
+ */
+ .globl transfer_to_handler_full
+transfer_to_handler_full:
+ SAVE_NVGPRS(r11)
+ /* fall through */
+
+ .globl transfer_to_handler
+transfer_to_handler:
+ stw r2,GPR2(r11)
+ stw r12,_NIP(r11)
+ stw r9,_MSR(r11)
+ andi. r2,r9,MSR_PR
+ mfctr r12
+ mfspr r2,SPRN_XER
+ stw r12,_CTR(r11)
+ stw r2,_XER(r11)
+ mfspr r12,SPRN_SPRG_THREAD
+ addi r2,r12,-THREAD
+ tovirt(r2,r2) /* set r2 to current */
+ beq 2f /* if from user, fix up THREAD.regs */
+ addi r11,r1,STACK_FRAME_OVERHEAD
+ stw r11,PT_REGS(r12)
+#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
+ /* Check to see if the dbcr0 register is set up to debug. Use the
+ internal debug mode bit to do this. */
+ lwz r12,THREAD_DBCR0(r12)
+ andis. r12,r12,DBCR0_IDM@h
+ beq+ 3f
+ /* From user and task is ptraced - load up global dbcr0 */
+ li r12,-1 /* clear all pending debug events */
+ mtspr SPRN_DBSR,r12
+ lis r11,global_dbcr0@ha
+ tophys(r11,r11)
+ addi r11,r11,global_dbcr0@l
+#ifdef CONFIG_SMP
+ rlwinm r9,r1,0,0,(31-THREAD_SHIFT)
+ lwz r9,TI_CPU(r9)
+ slwi r9,r9,3
+ add r11,r11,r9
+#endif
+ lwz r12,0(r11)
+ mtspr SPRN_DBCR0,r12
+ lwz r12,4(r11)
+ addi r12,r12,-1
+ stw r12,4(r11)
+#endif
+ b 3f
+
+2: /* if from kernel, check interrupted DOZE/NAP mode and
+ * check for stack overflow
+ */
+ lwz r9,KSP_LIMIT(r12)
+ cmplw r1,r9 /* if r1 <= ksp_limit */
+ ble- stack_ovf /* then the kernel stack overflowed */
+5:
+#if defined(CONFIG_6xx) || defined(CONFIG_E500)
+ rlwinm r9,r1,0,0,31-THREAD_SHIFT
+ tophys(r9,r9) /* check local flags */
+ lwz r12,TI_LOCAL_FLAGS(r9)
+ mtcrf 0x01,r12
+ bt- 31-TLF_NAPPING,4f
+ bt- 31-TLF_SLEEPING,7f
+#endif /* CONFIG_6xx || CONFIG_E500 */
+ .globl transfer_to_handler_cont
+transfer_to_handler_cont:
+3:
+ mflr r9
+ lwz r11,0(r9) /* virtual address of handler */
+ lwz r9,4(r9) /* where to go when done */
+#ifdef CONFIG_TRACE_IRQFLAGS
+ lis r12,reenable_mmu@h
+ ori r12,r12,reenable_mmu@l
+ mtspr SPRN_SRR0,r12
+ mtspr SPRN_SRR1,r10
+ SYNC
+ RFI
+reenable_mmu: /* re-enable mmu so we can */
+ mfmsr r10
+ lwz r12,_MSR(r1)
+ xor r10,r10,r12
+ andi. r10,r10,MSR_EE /* Did EE change? */
+ beq 1f
+
+ /*
+ * The trace_hardirqs_off will use CALLER_ADDR0 and CALLER_ADDR1.
+ * If from user mode there is only one stack frame on the stack, and
+ * accessing CALLER_ADDR1 will cause oops. So we need create a dummy
+ * stack frame to make trace_hardirqs_off happy.
+ *
+ * This is handy because we also need to save a bunch of GPRs,
+ * r3 can be different from GPR3(r1) at this point, r9 and r11
+ * contains the old MSR and handler address respectively,
+ * r4 & r5 can contain page fault arguments that need to be passed
+ * along as well. r12, CCR, CTR, XER etc... are left clobbered as
+ * they aren't useful past this point (aren't syscall arguments),
+ * the rest is restored from the exception frame.
+ */
+ stwu r1,-32(r1)
+ stw r9,8(r1)
+ stw r11,12(r1)
+ stw r3,16(r1)
+ stw r4,20(r1)
+ stw r5,24(r1)
+ andi. r12,r12,MSR_PR
+ b 11f
+ bl trace_hardirqs_off
+ b 12f
+11:
+ bl trace_hardirqs_off
+12:
+ lwz r5,24(r1)
+ lwz r4,20(r1)
+ lwz r3,16(r1)
+ lwz r11,12(r1)
+ lwz r9,8(r1)
+ addi r1,r1,32
+ lwz r0,GPR0(r1)
+ lwz r6,GPR6(r1)
+ lwz r7,GPR7(r1)
+ lwz r8,GPR8(r1)
+1: mtctr r11
+ mtlr r9
+ bctr /* jump to handler */
+#else /* CONFIG_TRACE_IRQFLAGS */
+ mtspr SPRN_SRR0,r11
+ mtspr SPRN_SRR1,r10
+ mtlr r9
+ SYNC
+ RFI /* jump to handler, enable MMU */
+#endif /* CONFIG_TRACE_IRQFLAGS */
+
+#if defined (CONFIG_6xx) || defined(CONFIG_E500)
+4: rlwinm r12,r12,0,~_TLF_NAPPING
+ stw r12,TI_LOCAL_FLAGS(r9)
+ b power_save_ppc32_restore
+
+7: rlwinm r12,r12,0,~_TLF_SLEEPING
+ stw r12,TI_LOCAL_FLAGS(r9)
+ lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */
+ rlwinm r9,r9,0,~MSR_EE
+ lwz r12,_LINK(r11) /* and return to address in LR */
+ b fast_exception_return
+#endif
+
+/*
+ * On kernel stack overflow, load up an initial stack pointer
+ * and call StackOverflow(regs), which should not return.
+ */
+stack_ovf:
+ /* sometimes we use a statically-allocated stack, which is OK. */
+ lis r12,_end@h
+ ori r12,r12,_end@l
+ cmplw r1,r12
+ ble 5b /* r1 <= &_end is OK */
+ SAVE_NVGPRS(r11)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ lis r1,init_thread_union@ha
+ addi r1,r1,init_thread_union@l
+ addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+ lis r9,StackOverflow@ha
+ addi r9,r9,StackOverflow@l
+ LOAD_MSR_KERNEL(r10,MSR_KERNEL)
+ FIX_SRR1(r10,r12)
+ mtspr SPRN_SRR0,r9
+ mtspr SPRN_SRR1,r10
+ SYNC
+ RFI
+
+/*
+ * Handle a system call.
+ */
+ .stabs "arch/powerpc/kernel/",N_SO,0,0,0f
+ .stabs "entry_32.S",N_SO,0,0,0f
+0:
+
+_GLOBAL(DoSyscall)
+ stw r3,ORIG_GPR3(r1)
+ li r12,0
+ stw r12,RESULT(r1)
+ lwz r11,_CCR(r1) /* Clear SO bit in CR */
+ rlwinm r11,r11,0,4,2
+ stw r11,_CCR(r1)
+#ifdef SHOW_SYSCALLS
+ bl do_show_syscall
+#endif /* SHOW_SYSCALLS */
+#ifdef CONFIG_TRACE_IRQFLAGS
+ /* Return from syscalls can (and generally will) hard enable
+ * interrupts. You aren't supposed to call a syscall with
+ * interrupts disabled in the first place. However, to ensure
+ * that we get it right vs. lockdep if it happens, we force
+ * that hard enable here with appropriate tracing if we see
+ * that we have been called with interrupts off
+ */
+ mfmsr r11
+ andi. r12,r11,MSR_EE
+ bne+ 1f
+ /* We came in with interrupts disabled, we enable them now */
+ bl trace_hardirqs_on
+ mfmsr r11
+ lwz r0,GPR0(r1)
+ lwz r3,GPR3(r1)
+ lwz r4,GPR4(r1)
+ ori r11,r11,MSR_EE
+ lwz r5,GPR5(r1)
+ lwz r6,GPR6(r1)
+ lwz r7,GPR7(r1)
+ lwz r8,GPR8(r1)
+ mtmsr r11
+1:
+#endif /* CONFIG_TRACE_IRQFLAGS */
+ rlwinm r10,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */
+ lwz r11,TI_FLAGS(r10)
+ andi. r11,r11,_TIF_SYSCALL_T_OR_A
+ bne- syscall_dotrace
+syscall_dotrace_cont:
+ cmplwi 0,r0,NR_syscalls
+ lis r10,sys_call_table@h
+ ori r10,r10,sys_call_table@l
+ slwi r0,r0,2
+ bge- 66f
+ lwzx r10,r10,r0 /* Fetch system call handler [ptr] */
+ mtlr r10
+ addi r9,r1,STACK_FRAME_OVERHEAD
+ PPC440EP_ERR42
+ blrl /* Call handler */
+ .globl ret_from_syscall
+ret_from_syscall:
+#ifdef SHOW_SYSCALLS
+ bl do_show_syscall_exit
+#endif
+ mr r6,r3
+ rlwinm r12,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */
+ /* disable interrupts so current_thread_info()->flags can't change */
+ LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */
+ /* Note: We don't bother telling lockdep about it */
+ SYNC
+ MTMSRD(r10)
+ lwz r9,TI_FLAGS(r12)
+ li r8,-_LAST_ERRNO
+ andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
+ bne- syscall_exit_work
+ cmplw 0,r3,r8
+ blt+ syscall_exit_cont
+ lwz r11,_CCR(r1) /* Load CR */
+ neg r3,r3
+ oris r11,r11,0x1000 /* Set SO bit in CR */
+ stw r11,_CCR(r1)
+syscall_exit_cont:
+ lwz r8,_MSR(r1)
+#ifdef CONFIG_TRACE_IRQFLAGS
+ /* If we are going to return from the syscall with interrupts
+ * off, we trace that here. It shouldn't happen though but we
+ * want to catch the bugger if it does right ?
+ */
+ andi. r10,r8,MSR_EE
+ bne+ 1f
+ stw r3,GPR3(r1)
+ bl trace_hardirqs_off
+ lwz r3,GPR3(r1)
+1:
+#endif /* CONFIG_TRACE_IRQFLAGS */
+#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
+ /* If the process has its own DBCR0 value, load it up. The internal
+ debug mode bit tells us that dbcr0 should be loaded. */
+ lwz r0,THREAD+THREAD_DBCR0(r2)
+ andis. r10,r0,DBCR0_IDM@h
+ bnel- load_dbcr0
+#endif
+#ifdef CONFIG_44x
+BEGIN_MMU_FTR_SECTION
+ lis r4,icache_44x_need_flush@ha
+ lwz r5,icache_44x_need_flush@l(r4)
+ cmplwi cr0,r5,0
+ bne- 2f
+1:
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_47x)
+#endif /* CONFIG_44x */
+BEGIN_FTR_SECTION
+ lwarx r7,0,r1
+END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
+ stwcx. r0,0,r1 /* to clear the reservation */
+ lwz r4,_LINK(r1)
+ lwz r5,_CCR(r1)
+ mtlr r4
+ mtcr r5
+ lwz r7,_NIP(r1)
+ FIX_SRR1(r8, r0)
+ lwz r2,GPR2(r1)
+ lwz r1,GPR1(r1)
+ mtspr SPRN_SRR0,r7
+ mtspr SPRN_SRR1,r8
+ SYNC
+ RFI
+#ifdef CONFIG_44x
+2: li r7,0
+ iccci r0,r0
+ stw r7,icache_44x_need_flush@l(r4)
+ b 1b
+#endif /* CONFIG_44x */
+
+66: li r3,-ENOSYS
+ b ret_from_syscall
+
+ .globl ret_from_fork
+ret_from_fork:
+ REST_NVGPRS(r1)
+ bl schedule_tail
+ li r3,0
+ b ret_from_syscall
+
+/* Traced system call support */
+syscall_dotrace:
+ SAVE_NVGPRS(r1)
+ li r0,0xc00
+ stw r0,_TRAP(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl do_syscall_trace_enter
+ /*
+ * Restore argument registers possibly just changed.
+ * We use the return value of do_syscall_trace_enter
+ * for call number to look up in the table (r0).
+ */
+ mr r0,r3
+ lwz r3,GPR3(r1)
+ lwz r4,GPR4(r1)
+ lwz r5,GPR5(r1)
+ lwz r6,GPR6(r1)
+ lwz r7,GPR7(r1)
+ lwz r8,GPR8(r1)
+ REST_NVGPRS(r1)
+ b syscall_dotrace_cont
+
+syscall_exit_work:
+ andi. r0,r9,_TIF_RESTOREALL
+ beq+ 0f
+ REST_NVGPRS(r1)
+ b 2f
+0: cmplw 0,r3,r8
+ blt+ 1f
+ andi. r0,r9,_TIF_NOERROR
+ bne- 1f
+ lwz r11,_CCR(r1) /* Load CR */
+ neg r3,r3
+ oris r11,r11,0x1000 /* Set SO bit in CR */
+ stw r11,_CCR(r1)
+
+1: stw r6,RESULT(r1) /* Save result */
+ stw r3,GPR3(r1) /* Update return value */
+2: andi. r0,r9,(_TIF_PERSYSCALL_MASK)
+ beq 4f
+
+ /* Clear per-syscall TIF flags if any are set. */
+
+ li r11,_TIF_PERSYSCALL_MASK
+ addi r12,r12,TI_FLAGS
+3: lwarx r8,0,r12
+ andc r8,r8,r11
+#ifdef CONFIG_IBM405_ERR77
+ dcbt 0,r12
+#endif
+ stwcx. r8,0,r12
+ bne- 3b
+ subi r12,r12,TI_FLAGS
+
+4: /* Anything which requires enabling interrupts? */
+ andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP)
+ beq ret_from_except
+
+ /* Re-enable interrupts. There is no need to trace that with
+ * lockdep as we are supposed to have IRQs on at this point
+ */
+ ori r10,r10,MSR_EE
+ SYNC
+ MTMSRD(r10)
+
+ /* Save NVGPRS if they're not saved already */
+ lwz r4,_TRAP(r1)
+ andi. r4,r4,1
+ beq 5f
+ SAVE_NVGPRS(r1)
+ li r4,0xc00
+ stw r4,_TRAP(r1)
+5:
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl do_syscall_trace_leave
+ b ret_from_except_full
+
+#ifdef SHOW_SYSCALLS
+do_show_syscall:
+#ifdef SHOW_SYSCALLS_TASK
+ lis r11,show_syscalls_task@ha
+ lwz r11,show_syscalls_task@l(r11)
+ cmp 0,r2,r11
+ bnelr
+#endif
+ stw r31,GPR31(r1)
+ mflr r31
+ lis r3,7f@ha
+ addi r3,r3,7f@l
+ lwz r4,GPR0(r1)
+ lwz r5,GPR3(r1)
+ lwz r6,GPR4(r1)
+ lwz r7,GPR5(r1)
+ lwz r8,GPR6(r1)
+ lwz r9,GPR7(r1)
+ bl printk
+ lis r3,77f@ha
+ addi r3,r3,77f@l
+ lwz r4,GPR8(r1)
+ mr r5,r2
+ bl printk
+ lwz r0,GPR0(r1)
+ lwz r3,GPR3(r1)
+ lwz r4,GPR4(r1)
+ lwz r5,GPR5(r1)
+ lwz r6,GPR6(r1)
+ lwz r7,GPR7(r1)
+ lwz r8,GPR8(r1)
+ mtlr r31
+ lwz r31,GPR31(r1)
+ blr
+
+do_show_syscall_exit:
+#ifdef SHOW_SYSCALLS_TASK
+ lis r11,show_syscalls_task@ha
+ lwz r11,show_syscalls_task@l(r11)
+ cmp 0,r2,r11
+ bnelr
+#endif
+ stw r31,GPR31(r1)
+ mflr r31
+ stw r3,RESULT(r1) /* Save result */
+ mr r4,r3
+ lis r3,79f@ha
+ addi r3,r3,79f@l
+ bl printk
+ lwz r3,RESULT(r1)
+ mtlr r31
+ lwz r31,GPR31(r1)
+ blr
+
+7: .string "syscall %d(%x, %x, %x, %x, %x, "
+77: .string "%x), current=%p\n"
+79: .string " -> %x\n"
+ .align 2,0
+
+#ifdef SHOW_SYSCALLS_TASK
+ .data
+ .globl show_syscalls_task
+show_syscalls_task:
+ .long -1
+ .text
+#endif
+#endif /* SHOW_SYSCALLS */
+
+/*
+ * The fork/clone functions need to copy the full register set into
+ * the child process. Therefore we need to save all the nonvolatile
+ * registers (r13 - r31) before calling the C code.
+ */
+ .globl ppc_fork
+ppc_fork:
+ SAVE_NVGPRS(r1)
+ lwz r0,_TRAP(r1)
+ rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */
+ stw r0,_TRAP(r1) /* register set saved */
+ b sys_fork
+
+ .globl ppc_vfork
+ppc_vfork:
+ SAVE_NVGPRS(r1)
+ lwz r0,_TRAP(r1)
+ rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */
+ stw r0,_TRAP(r1) /* register set saved */
+ b sys_vfork
+
+ .globl ppc_clone
+ppc_clone:
+ SAVE_NVGPRS(r1)
+ lwz r0,_TRAP(r1)
+ rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */
+ stw r0,_TRAP(r1) /* register set saved */
+ b sys_clone
+
+ .globl ppc_swapcontext
+ppc_swapcontext:
+ SAVE_NVGPRS(r1)
+ lwz r0,_TRAP(r1)
+ rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */
+ stw r0,_TRAP(r1) /* register set saved */
+ b sys_swapcontext
+
+/*
+ * Top-level page fault handling.
+ * This is in assembler because if do_page_fault tells us that
+ * it is a bad kernel page fault, we want to save the non-volatile
+ * registers before calling bad_page_fault.
+ */
+ .globl handle_page_fault
+handle_page_fault:
+ stw r4,_DAR(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl do_page_fault
+ cmpwi r3,0
+ beq+ ret_from_except
+ SAVE_NVGPRS(r1)
+ lwz r0,_TRAP(r1)
+ clrrwi r0,r0,1
+ stw r0,_TRAP(r1)
+ mr r5,r3
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ lwz r4,_DAR(r1)
+ bl bad_page_fault
+ b ret_from_except_full
+
+/*
+ * This routine switches between two different tasks. The process
+ * state of one is saved on its kernel stack. Then the state
+ * of the other is restored from its kernel stack. The memory
+ * management hardware is updated to the second process's state.
+ * Finally, we can return to the second process.
+ * On entry, r3 points to the THREAD for the current task, r4
+ * points to the THREAD for the new task.
+ *
+ * This routine is always called with interrupts disabled.
+ *
+ * Note: there are two ways to get to the "going out" portion
+ * of this code; either by coming in via the entry (_switch)
+ * or via "fork" which must set up an environment equivalent
+ * to the "_switch" path. If you change this , you'll have to
+ * change the fork code also.
+ *
+ * The code which creates the new task context is in 'copy_thread'
+ * in arch/ppc/kernel/process.c
+ */
+_GLOBAL(_switch)
+ stwu r1,-INT_FRAME_SIZE(r1)
+ mflr r0
+ stw r0,INT_FRAME_SIZE+4(r1)
+ /* r3-r12 are caller saved -- Cort */
+ SAVE_NVGPRS(r1)
+ stw r0,_NIP(r1) /* Return to switch caller */
+ mfmsr r11
+ li r0,MSR_FP /* Disable floating-point */
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+ oris r0,r0,MSR_VEC@h /* Disable altivec */
+ mfspr r12,SPRN_VRSAVE /* save vrsave register value */
+ stw r12,THREAD+THREAD_VRSAVE(r2)
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_SPE
+BEGIN_FTR_SECTION
+ oris r0,r0,MSR_SPE@h /* Disable SPE */
+ mfspr r12,SPRN_SPEFSCR /* save spefscr register value */
+ stw r12,THREAD+THREAD_SPEFSCR(r2)
+END_FTR_SECTION_IFSET(CPU_FTR_SPE)
+#endif /* CONFIG_SPE */
+ and. r0,r0,r11 /* FP or altivec or SPE enabled? */
+ beq+ 1f
+ andc r11,r11,r0
+ MTMSRD(r11)
+ isync
+1: stw r11,_MSR(r1)
+ mfcr r10
+ stw r10,_CCR(r1)
+ stw r1,KSP(r3) /* Set old stack pointer */
+
+#ifdef CONFIG_SMP
+ /* We need a sync somewhere here to make sure that if the
+ * previous task gets rescheduled on another CPU, it sees all
+ * stores it has performed on this one.
+ */
+ sync
+#endif /* CONFIG_SMP */
+
+ tophys(r0,r4)
+ CLR_TOP32(r0)
+ mtspr SPRN_SPRG_THREAD,r0 /* Update current THREAD phys addr */
+ lwz r1,KSP(r4) /* Load new stack pointer */
+
+ /* save the old current 'last' for return value */
+ mr r3,r2
+ addi r2,r4,-THREAD /* Update current */
+
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+ lwz r0,THREAD+THREAD_VRSAVE(r2)
+ mtspr SPRN_VRSAVE,r0 /* if G4, restore VRSAVE reg */
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_SPE
+BEGIN_FTR_SECTION
+ lwz r0,THREAD+THREAD_SPEFSCR(r2)
+ mtspr SPRN_SPEFSCR,r0 /* restore SPEFSCR reg */
+END_FTR_SECTION_IFSET(CPU_FTR_SPE)
+#endif /* CONFIG_SPE */
+
+ lwz r0,_CCR(r1)
+ mtcrf 0xFF,r0
+ /* r3-r12 are destroyed -- Cort */
+ REST_NVGPRS(r1)
+
+ lwz r4,_NIP(r1) /* Return to _switch caller in new task */
+ mtlr r4
+ addi r1,r1,INT_FRAME_SIZE
+ blr
+
+ .globl fast_exception_return
+fast_exception_return:
+#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
+ andi. r10,r9,MSR_RI /* check for recoverable interrupt */
+ beq 1f /* if not, we've got problems */
+#endif
+
+2: REST_4GPRS(3, r11)
+ lwz r10,_CCR(r11)
+ REST_GPR(1, r11)
+ mtcr r10
+ lwz r10,_LINK(r11)
+ mtlr r10
+ REST_GPR(10, r11)
+ mtspr SPRN_SRR1,r9
+ mtspr SPRN_SRR0,r12
+ REST_GPR(9, r11)
+ REST_GPR(12, r11)
+ lwz r11,GPR11(r11)
+ SYNC
+ RFI
+
+#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
+/* check if the exception happened in a restartable section */
+1: lis r3,exc_exit_restart_end@ha
+ addi r3,r3,exc_exit_restart_end@l
+ cmplw r12,r3
+ bge 3f
+ lis r4,exc_exit_restart@ha
+ addi r4,r4,exc_exit_restart@l
+ cmplw r12,r4
+ blt 3f
+ lis r3,fee_restarts@ha
+ tophys(r3,r3)
+ lwz r5,fee_restarts@l(r3)
+ addi r5,r5,1
+ stw r5,fee_restarts@l(r3)
+ mr r12,r4 /* restart at exc_exit_restart */
+ b 2b
+
+ .section .bss
+ .align 2
+fee_restarts:
+ .space 4
+ .previous
+
+/* aargh, a nonrecoverable interrupt, panic */
+/* aargh, we don't know which trap this is */
+/* but the 601 doesn't implement the RI bit, so assume it's OK */
+3:
+BEGIN_FTR_SECTION
+ b 2b
+END_FTR_SECTION_IFSET(CPU_FTR_601)
+ li r10,-1
+ stw r10,_TRAP(r11)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ lis r10,MSR_KERNEL@h
+ ori r10,r10,MSR_KERNEL@l
+ bl transfer_to_handler_full
+ .long nonrecoverable_exception
+ .long ret_from_except
+#endif
+
+ .globl ret_from_except_full
+ret_from_except_full:
+ REST_NVGPRS(r1)
+ /* fall through */
+
+ .globl ret_from_except
+ret_from_except:
+ /* Hard-disable interrupts so that current_thread_info()->flags
+ * can't change between when we test it and when we return
+ * from the interrupt. */
+ /* Note: We don't bother telling lockdep about it */
+ LOAD_MSR_KERNEL(r10,MSR_KERNEL)
+ SYNC /* Some chip revs have problems here... */
+ MTMSRD(r10) /* disable interrupts */
+
+ lwz r3,_MSR(r1) /* Returning to user mode? */
+ andi. r0,r3,MSR_PR
+ beq resume_kernel
+
+user_exc_return: /* r10 contains MSR_KERNEL here */
+ /* Check current_thread_info()->flags */
+ rlwinm r9,r1,0,0,(31-THREAD_SHIFT)
+ lwz r9,TI_FLAGS(r9)
+ andi. r0,r9,_TIF_USER_WORK_MASK
+ bne do_work
+
+restore_user:
+#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
+ /* Check whether this process has its own DBCR0 value. The internal
+ debug mode bit tells us that dbcr0 should be loaded. */
+ lwz r0,THREAD+THREAD_DBCR0(r2)
+ andis. r10,r0,DBCR0_IDM@h
+ bnel- load_dbcr0
+#endif
+
+#ifdef CONFIG_PREEMPT
+ b restore
+
+/* N.B. the only way to get here is from the beq following ret_from_except. */
+resume_kernel:
+ /* check current_thread_info->preempt_count */
+ rlwinm r9,r1,0,0,(31-THREAD_SHIFT)
+ lwz r0,TI_PREEMPT(r9)
+ cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
+ bne restore
+ lwz r0,TI_FLAGS(r9)
+ andi. r0,r0,_TIF_NEED_RESCHED
+ beq+ restore
+ andi. r0,r3,MSR_EE /* interrupts off? */
+ beq restore /* don't schedule if so */
+#ifdef CONFIG_TRACE_IRQFLAGS
+ /* Lockdep thinks irqs are enabled, we need to call
+ * preempt_schedule_irq with IRQs off, so we inform lockdep
+ * now that we -did- turn them off already
+ */
+ bl trace_hardirqs_off
+#endif
+1: bl preempt_schedule_irq
+ rlwinm r9,r1,0,0,(31-THREAD_SHIFT)
+ lwz r3,TI_FLAGS(r9)
+ andi. r0,r3,_TIF_NEED_RESCHED
+ bne- 1b
+#ifdef CONFIG_TRACE_IRQFLAGS
+ /* And now, to properly rebalance the above, we tell lockdep they
+ * are being turned back on, which will happen when we return
+ */
+ bl trace_hardirqs_on
+#endif
+#else
+resume_kernel:
+#endif /* CONFIG_PREEMPT */
+
+ /* interrupts are hard-disabled at this point */
+restore:
+#ifdef CONFIG_44x
+BEGIN_MMU_FTR_SECTION
+ b 1f
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
+ lis r4,icache_44x_need_flush@ha
+ lwz r5,icache_44x_need_flush@l(r4)
+ cmplwi cr0,r5,0
+ beq+ 1f
+ li r6,0
+ iccci r0,r0
+ stw r6,icache_44x_need_flush@l(r4)
+1:
+#endif /* CONFIG_44x */
+
+ lwz r9,_MSR(r1)
+#ifdef CONFIG_TRACE_IRQFLAGS
+ /* Lockdep doesn't know about the fact that IRQs are temporarily turned
+ * off in this assembly code while peeking at TI_FLAGS() and such. However
+ * we need to inform it if the exception turned interrupts off, and we
+ * are about to trun them back on.
+ *
+ * The problem here sadly is that we don't know whether the exceptions was
+ * one that turned interrupts off or not. So we always tell lockdep about
+ * turning them on here when we go back to wherever we came from with EE
+ * on, even if that may meen some redudant calls being tracked. Maybe later
+ * we could encode what the exception did somewhere or test the exception
+ * type in the pt_regs but that sounds overkill
+ */
+ andi. r10,r9,MSR_EE
+ beq 1f
+ /*
+ * Since the ftrace irqsoff latency trace checks CALLER_ADDR1,
+ * which is the stack frame here, we need to force a stack frame
+ * in case we came from user space.
+ */
+ stwu r1,-32(r1)
+ mflr r0
+ stw r0,4(r1)
+ stwu r1,-32(r1)
+ bl trace_hardirqs_on
+ lwz r1,0(r1)
+ lwz r1,0(r1)
+ lwz r9,_MSR(r1)
+1:
+#endif /* CONFIG_TRACE_IRQFLAGS */
+
+ lwz r0,GPR0(r1)
+ lwz r2,GPR2(r1)
+ REST_4GPRS(3, r1)
+ REST_2GPRS(7, r1)
+
+ lwz r10,_XER(r1)
+ lwz r11,_CTR(r1)
+ mtspr SPRN_XER,r10
+ mtctr r11
+
+ PPC405_ERR77(0,r1)
+BEGIN_FTR_SECTION
+ lwarx r11,0,r1
+END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
+ stwcx. r0,0,r1 /* to clear the reservation */
+
+#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
+ andi. r10,r9,MSR_RI /* check if this exception occurred */
+ beql nonrecoverable /* at a bad place (MSR:RI = 0) */
+
+ lwz r10,_CCR(r1)
+ lwz r11,_LINK(r1)
+ mtcrf 0xFF,r10
+ mtlr r11
+
+ /*
+ * Once we put values in SRR0 and SRR1, we are in a state
+ * where exceptions are not recoverable, since taking an
+ * exception will trash SRR0 and SRR1. Therefore we clear the
+ * MSR:RI bit to indicate this. If we do take an exception,
+ * we can't return to the point of the exception but we
+ * can restart the exception exit path at the label
+ * exc_exit_restart below. -- paulus
+ */
+ LOAD_MSR_KERNEL(r10,MSR_KERNEL & ~MSR_RI)
+ SYNC
+ MTMSRD(r10) /* clear the RI bit */
+ .globl exc_exit_restart
+exc_exit_restart:
+ lwz r12,_NIP(r1)
+ FIX_SRR1(r9,r10)
+ mtspr SPRN_SRR0,r12
+ mtspr SPRN_SRR1,r9
+ REST_4GPRS(9, r1)
+ lwz r1,GPR1(r1)
+ .globl exc_exit_restart_end
+exc_exit_restart_end:
+ SYNC
+ RFI
+
+#else /* !(CONFIG_4xx || CONFIG_BOOKE) */
+ /*
+ * This is a bit different on 4xx/Book-E because it doesn't have
+ * the RI bit in the MSR.
+ * The TLB miss handler checks if we have interrupted
+ * the exception exit path and restarts it if so
+ * (well maybe one day it will... :).
+ */
+ lwz r11,_LINK(r1)
+ mtlr r11
+ lwz r10,_CCR(r1)
+ mtcrf 0xff,r10
+ REST_2GPRS(9, r1)
+ .globl exc_exit_restart
+exc_exit_restart:
+ lwz r11,_NIP(r1)
+ lwz r12,_MSR(r1)
+exc_exit_start:
+ mtspr SPRN_SRR0,r11
+ mtspr SPRN_SRR1,r12
+ REST_2GPRS(11, r1)
+ lwz r1,GPR1(r1)
+ .globl exc_exit_restart_end
+exc_exit_restart_end:
+ PPC405_ERR77_SYNC
+ rfi
+ b . /* prevent prefetch past rfi */
+
+/*
+ * Returning from a critical interrupt in user mode doesn't need
+ * to be any different from a normal exception. For a critical
+ * interrupt in the kernel, we just return (without checking for
+ * preemption) since the interrupt may have happened at some crucial
+ * place (e.g. inside the TLB miss handler), and because we will be
+ * running with r1 pointing into critical_stack, not the current
+ * process's kernel stack (and therefore current_thread_info() will
+ * give the wrong answer).
+ * We have to restore various SPRs that may have been in use at the
+ * time of the critical interrupt.
+ *
+ */
+#ifdef CONFIG_40x
+#define PPC_40x_TURN_OFF_MSR_DR \
+ /* avoid any possible TLB misses here by turning off MSR.DR, we \
+ * assume the instructions here are mapped by a pinned TLB entry */ \
+ li r10,MSR_IR; \
+ mtmsr r10; \
+ isync; \
+ tophys(r1, r1);
+#else
+#define PPC_40x_TURN_OFF_MSR_DR
+#endif
+
+#define RET_FROM_EXC_LEVEL(exc_lvl_srr0, exc_lvl_srr1, exc_lvl_rfi) \
+ REST_NVGPRS(r1); \
+ lwz r3,_MSR(r1); \
+ andi. r3,r3,MSR_PR; \
+ LOAD_MSR_KERNEL(r10,MSR_KERNEL); \
+ bne user_exc_return; \
+ lwz r0,GPR0(r1); \
+ lwz r2,GPR2(r1); \
+ REST_4GPRS(3, r1); \
+ REST_2GPRS(7, r1); \
+ lwz r10,_XER(r1); \
+ lwz r11,_CTR(r1); \
+ mtspr SPRN_XER,r10; \
+ mtctr r11; \
+ PPC405_ERR77(0,r1); \
+ stwcx. r0,0,r1; /* to clear the reservation */ \
+ lwz r11,_LINK(r1); \
+ mtlr r11; \
+ lwz r10,_CCR(r1); \
+ mtcrf 0xff,r10; \
+ PPC_40x_TURN_OFF_MSR_DR; \
+ lwz r9,_DEAR(r1); \
+ lwz r10,_ESR(r1); \
+ mtspr SPRN_DEAR,r9; \
+ mtspr SPRN_ESR,r10; \
+ lwz r11,_NIP(r1); \
+ lwz r12,_MSR(r1); \
+ mtspr exc_lvl_srr0,r11; \
+ mtspr exc_lvl_srr1,r12; \
+ lwz r9,GPR9(r1); \
+ lwz r12,GPR12(r1); \
+ lwz r10,GPR10(r1); \
+ lwz r11,GPR11(r1); \
+ lwz r1,GPR1(r1); \
+ PPC405_ERR77_SYNC; \
+ exc_lvl_rfi; \
+ b .; /* prevent prefetch past exc_lvl_rfi */
+
+#define RESTORE_xSRR(exc_lvl_srr0, exc_lvl_srr1) \
+ lwz r9,_##exc_lvl_srr0(r1); \
+ lwz r10,_##exc_lvl_srr1(r1); \
+ mtspr SPRN_##exc_lvl_srr0,r9; \
+ mtspr SPRN_##exc_lvl_srr1,r10;
+
+#if defined(CONFIG_PPC_BOOK3E_MMU)
+#ifdef CONFIG_PHYS_64BIT
+#define RESTORE_MAS7 \
+ lwz r11,MAS7(r1); \
+ mtspr SPRN_MAS7,r11;
+#else
+#define RESTORE_MAS7
+#endif /* CONFIG_PHYS_64BIT */
+#define RESTORE_MMU_REGS \
+ lwz r9,MAS0(r1); \
+ lwz r10,MAS1(r1); \
+ lwz r11,MAS2(r1); \
+ mtspr SPRN_MAS0,r9; \
+ lwz r9,MAS3(r1); \
+ mtspr SPRN_MAS1,r10; \
+ lwz r10,MAS6(r1); \
+ mtspr SPRN_MAS2,r11; \
+ mtspr SPRN_MAS3,r9; \
+ mtspr SPRN_MAS6,r10; \
+ RESTORE_MAS7;
+#elif defined(CONFIG_44x)
+#define RESTORE_MMU_REGS \
+ lwz r9,MMUCR(r1); \
+ mtspr SPRN_MMUCR,r9;
+#else
+#define RESTORE_MMU_REGS
+#endif
+
+#ifdef CONFIG_40x
+ .globl ret_from_crit_exc
+ret_from_crit_exc:
+ mfspr r9,SPRN_SPRG_THREAD
+ lis r10,saved_ksp_limit@ha;
+ lwz r10,saved_ksp_limit@l(r10);
+ tovirt(r9,r9);
+ stw r10,KSP_LIMIT(r9)
+ lis r9,crit_srr0@ha;
+ lwz r9,crit_srr0@l(r9);
+ lis r10,crit_srr1@ha;
+ lwz r10,crit_srr1@l(r10);
+ mtspr SPRN_SRR0,r9;
+ mtspr SPRN_SRR1,r10;
+ RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI)
+#endif /* CONFIG_40x */
+
+#ifdef CONFIG_BOOKE
+ .globl ret_from_crit_exc
+ret_from_crit_exc:
+ mfspr r9,SPRN_SPRG_THREAD
+ lwz r10,SAVED_KSP_LIMIT(r1)
+ stw r10,KSP_LIMIT(r9)
+ RESTORE_xSRR(SRR0,SRR1);
+ RESTORE_MMU_REGS;
+ RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI)
+
+ .globl ret_from_debug_exc
+ret_from_debug_exc:
+ mfspr r9,SPRN_SPRG_THREAD
+ lwz r10,SAVED_KSP_LIMIT(r1)
+ stw r10,KSP_LIMIT(r9)
+ lwz r9,THREAD_INFO-THREAD(r9)
+ rlwinm r10,r1,0,0,(31-THREAD_SHIFT)
+ lwz r10,TI_PREEMPT(r10)
+ stw r10,TI_PREEMPT(r9)
+ RESTORE_xSRR(SRR0,SRR1);
+ RESTORE_xSRR(CSRR0,CSRR1);
+ RESTORE_MMU_REGS;
+ RET_FROM_EXC_LEVEL(SPRN_DSRR0, SPRN_DSRR1, PPC_RFDI)
+
+ .globl ret_from_mcheck_exc
+ret_from_mcheck_exc:
+ mfspr r9,SPRN_SPRG_THREAD
+ lwz r10,SAVED_KSP_LIMIT(r1)
+ stw r10,KSP_LIMIT(r9)
+ RESTORE_xSRR(SRR0,SRR1);
+ RESTORE_xSRR(CSRR0,CSRR1);
+ RESTORE_xSRR(DSRR0,DSRR1);
+ RESTORE_MMU_REGS;
+ RET_FROM_EXC_LEVEL(SPRN_MCSRR0, SPRN_MCSRR1, PPC_RFMCI)
+#endif /* CONFIG_BOOKE */
+
+/*
+ * Load the DBCR0 value for a task that is being ptraced,
+ * having first saved away the global DBCR0. Note that r0
+ * has the dbcr0 value to set upon entry to this.
+ */
+load_dbcr0:
+ mfmsr r10 /* first disable debug exceptions */
+ rlwinm r10,r10,0,~MSR_DE
+ mtmsr r10
+ isync
+ mfspr r10,SPRN_DBCR0
+ lis r11,global_dbcr0@ha
+ addi r11,r11,global_dbcr0@l
+#ifdef CONFIG_SMP
+ rlwinm r9,r1,0,0,(31-THREAD_SHIFT)
+ lwz r9,TI_CPU(r9)
+ slwi r9,r9,3
+ add r11,r11,r9
+#endif
+ stw r10,0(r11)
+ mtspr SPRN_DBCR0,r0
+ lwz r10,4(r11)
+ addi r10,r10,1
+ stw r10,4(r11)
+ li r11,-1
+ mtspr SPRN_DBSR,r11 /* clear all pending debug events */
+ blr
+
+ .section .bss
+ .align 4
+global_dbcr0:
+ .space 8*NR_CPUS
+ .previous
+#endif /* !(CONFIG_4xx || CONFIG_BOOKE) */
+
+do_work: /* r10 contains MSR_KERNEL here */
+ andi. r0,r9,_TIF_NEED_RESCHED
+ beq do_user_signal
+
+do_resched: /* r10 contains MSR_KERNEL here */
+ /* Note: We don't need to inform lockdep that we are enabling
+ * interrupts here. As far as it knows, they are already enabled
+ */
+ ori r10,r10,MSR_EE
+ SYNC
+ MTMSRD(r10) /* hard-enable interrupts */
+ bl schedule
+recheck:
+ /* Note: And we don't tell it we are disabling them again
+ * neither. Those disable/enable cycles used to peek at
+ * TI_FLAGS aren't advertised.
+ */
+ LOAD_MSR_KERNEL(r10,MSR_KERNEL)
+ SYNC
+ MTMSRD(r10) /* disable interrupts */
+ rlwinm r9,r1,0,0,(31-THREAD_SHIFT)
+ lwz r9,TI_FLAGS(r9)
+ andi. r0,r9,_TIF_NEED_RESCHED
+ bne- do_resched
+ andi. r0,r9,_TIF_USER_WORK_MASK
+ beq restore_user
+do_user_signal: /* r10 contains MSR_KERNEL here */
+ ori r10,r10,MSR_EE
+ SYNC
+ MTMSRD(r10) /* hard-enable interrupts */
+ /* save r13-r31 in the exception frame, if not already done */
+ lwz r3,_TRAP(r1)
+ andi. r0,r3,1
+ beq 2f
+ SAVE_NVGPRS(r1)
+ rlwinm r3,r3,0,0,30
+ stw r3,_TRAP(r1)
+2: addi r3,r1,STACK_FRAME_OVERHEAD
+ mr r4,r9
+ bl do_notify_resume
+ REST_NVGPRS(r1)
+ b recheck
+
+/*
+ * We come here when we are at the end of handling an exception
+ * that occurred at a place where taking an exception will lose
+ * state information, such as the contents of SRR0 and SRR1.
+ */
+nonrecoverable:
+ lis r10,exc_exit_restart_end@ha
+ addi r10,r10,exc_exit_restart_end@l
+ cmplw r12,r10
+ bge 3f
+ lis r11,exc_exit_restart@ha
+ addi r11,r11,exc_exit_restart@l
+ cmplw r12,r11
+ blt 3f
+ lis r10,ee_restarts@ha
+ lwz r12,ee_restarts@l(r10)
+ addi r12,r12,1
+ stw r12,ee_restarts@l(r10)
+ mr r12,r11 /* restart at exc_exit_restart */
+ blr
+3: /* OK, we can't recover, kill this process */
+ /* but the 601 doesn't implement the RI bit, so assume it's OK */
+BEGIN_FTR_SECTION
+ blr
+END_FTR_SECTION_IFSET(CPU_FTR_601)
+ lwz r3,_TRAP(r1)
+ andi. r0,r3,1
+ beq 4f
+ SAVE_NVGPRS(r1)
+ rlwinm r3,r3,0,0,30
+ stw r3,_TRAP(r1)
+4: addi r3,r1,STACK_FRAME_OVERHEAD
+ bl nonrecoverable_exception
+ /* shouldn't return */
+ b 4b
+
+ .section .bss
+ .align 2
+ee_restarts:
+ .space 4
+ .previous
+
+/*
+ * PROM code for specific machines follows. Put it
+ * here so it's easy to add arch-specific sections later.
+ * -- Cort
+ */
+#ifdef CONFIG_PPC_RTAS
+/*
+ * On CHRP, the Run-Time Abstraction Services (RTAS) have to be
+ * called with the MMU off.
+ */
+_GLOBAL(enter_rtas)
+ stwu r1,-INT_FRAME_SIZE(r1)
+ mflr r0
+ stw r0,INT_FRAME_SIZE+4(r1)
+ LOAD_REG_ADDR(r4, rtas)
+ lis r6,1f@ha /* physical return address for rtas */
+ addi r6,r6,1f@l
+ tophys(r6,r6)
+ tophys(r7,r1)
+ lwz r8,RTASENTRY(r4)
+ lwz r4,RTASBASE(r4)
+ mfmsr r9
+ stw r9,8(r1)
+ LOAD_MSR_KERNEL(r0,MSR_KERNEL)
+ SYNC /* disable interrupts so SRR0/1 */
+ MTMSRD(r0) /* don't get trashed */
+ li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR)
+ mtlr r6
+ mtspr SPRN_SPRG_RTAS,r7
+ mtspr SPRN_SRR0,r8
+ mtspr SPRN_SRR1,r9
+ RFI
+1: tophys(r9,r1)
+ lwz r8,INT_FRAME_SIZE+4(r9) /* get return address */
+ lwz r9,8(r9) /* original msr value */
+ FIX_SRR1(r9,r0)
+ addi r1,r1,INT_FRAME_SIZE
+ li r0,0
+ mtspr SPRN_SPRG_RTAS,r0
+ mtspr SPRN_SRR0,r8
+ mtspr SPRN_SRR1,r9
+ RFI /* return to caller */
+
+ .globl machine_check_in_rtas
+machine_check_in_rtas:
+ twi 31,0,0
+ /* XXX load up BATs and panic */
+
+#endif /* CONFIG_PPC_RTAS */
+
+#ifdef CONFIG_FUNCTION_TRACER
+#ifdef CONFIG_DYNAMIC_FTRACE
+_GLOBAL(mcount)
+_GLOBAL(_mcount)
+ /*
+ * It is required that _mcount on PPC32 must preserve the
+ * link register. But we have r0 to play with. We use r0
+ * to push the return address back to the caller of mcount
+ * into the ctr register, restore the link register and
+ * then jump back using the ctr register.
+ */
+ mflr r0
+ mtctr r0
+ lwz r0, 4(r1)
+ mtlr r0
+ bctr
+
+_GLOBAL(ftrace_caller)
+ MCOUNT_SAVE_FRAME
+ /* r3 ends up with link register */
+ subi r3, r3, MCOUNT_INSN_SIZE
+.globl ftrace_call
+ftrace_call:
+ bl ftrace_stub
+ nop
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+.globl ftrace_graph_call
+ftrace_graph_call:
+ b ftrace_graph_stub
+_GLOBAL(ftrace_graph_stub)
+#endif
+ MCOUNT_RESTORE_FRAME
+ /* old link register ends up in ctr reg */
+ bctr
+#else
+_GLOBAL(mcount)
+_GLOBAL(_mcount)
+
+ MCOUNT_SAVE_FRAME
+
+ subi r3, r3, MCOUNT_INSN_SIZE
+ LOAD_REG_ADDR(r5, ftrace_trace_function)
+ lwz r5,0(r5)
+
+ mtctr r5
+ bctrl
+ nop
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ b ftrace_graph_caller
+#endif
+ MCOUNT_RESTORE_FRAME
+ bctr
+#endif
+
+_GLOBAL(ftrace_stub)
+ blr
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+_GLOBAL(ftrace_graph_caller)
+ /* load r4 with local address */
+ lwz r4, 44(r1)
+ subi r4, r4, MCOUNT_INSN_SIZE
+
+ /* get the parent address */
+ addi r3, r1, 52
+
+ bl prepare_ftrace_return
+ nop
+
+ MCOUNT_RESTORE_FRAME
+ /* old link register ends up in ctr reg */
+ bctr
+
+_GLOBAL(return_to_handler)
+ /* need to save return values */
+ stwu r1, -32(r1)
+ stw r3, 20(r1)
+ stw r4, 16(r1)
+ stw r31, 12(r1)
+ mr r31, r1
+
+ bl ftrace_return_to_handler
+ nop
+
+ /* return value has real return address */
+ mtlr r3
+
+ lwz r3, 20(r1)
+ lwz r4, 16(r1)
+ lwz r31,12(r1)
+ lwz r1, 0(r1)
+
+ /* Jump back to real return address */
+ blr
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+#endif /* CONFIG_MCOUNT */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
new file mode 100644
index 00000000..ef2074c3
--- /dev/null
+++ b/arch/powerpc/kernel/entry_64.S
@@ -0,0 +1,1152 @@
+/*
+ * PowerPC version
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
+ * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
+ * Adapted for Power Macintosh by Paul Mackerras.
+ * Low-level exception handlers and MMU support
+ * rewritten by Paul Mackerras.
+ * Copyright (C) 1996 Paul Mackerras.
+ * MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
+ *
+ * This file contains the system call entry code, context switch
+ * code, and exception/interrupt return code for PowerPC.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <asm/unistd.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cputable.h>
+#include <asm/firmware.h>
+#include <asm/bug.h>
+#include <asm/ptrace.h>
+#include <asm/irqflags.h>
+#include <asm/ftrace.h>
+#include <asm/hw_irq.h>
+
+/*
+ * System calls.
+ */
+ .section ".toc","aw"
+.SYS_CALL_TABLE:
+ .tc .sys_call_table[TC],.sys_call_table
+
+/* This value is used to mark exception frames on the stack. */
+exception_marker:
+ .tc ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER
+
+ .section ".text"
+ .align 7
+
+#undef SHOW_SYSCALLS
+
+ .globl system_call_common
+system_call_common:
+ andi. r10,r12,MSR_PR
+ mr r10,r1
+ addi r1,r1,-INT_FRAME_SIZE
+ beq- 1f
+ ld r1,PACAKSAVE(r13)
+1: std r10,0(r1)
+ std r11,_NIP(r1)
+ std r12,_MSR(r1)
+ std r0,GPR0(r1)
+ std r10,GPR1(r1)
+ ACCOUNT_CPU_USER_ENTRY(r10, r11)
+ /*
+ * This "crclr so" clears CR0.SO, which is the error indication on
+ * return from this system call. There must be no cmp instruction
+ * between it and the "mfcr r9" below, otherwise if XER.SO is set,
+ * CR0.SO will get set, causing all system calls to appear to fail.
+ */
+ crclr so
+ std r2,GPR2(r1)
+ std r3,GPR3(r1)
+ std r4,GPR4(r1)
+ std r5,GPR5(r1)
+ std r6,GPR6(r1)
+ std r7,GPR7(r1)
+ std r8,GPR8(r1)
+ li r11,0
+ std r11,GPR9(r1)
+ std r11,GPR10(r1)
+ std r11,GPR11(r1)
+ std r11,GPR12(r1)
+ std r9,GPR13(r1)
+ mfcr r9
+ mflr r10
+ li r11,0xc01
+ std r9,_CCR(r1)
+ std r10,_LINK(r1)
+ std r11,_TRAP(r1)
+ mfxer r9
+ mfctr r10
+ std r9,_XER(r1)
+ std r10,_CTR(r1)
+ std r3,ORIG_GPR3(r1)
+ ld r2,PACATOC(r13)
+ addi r9,r1,STACK_FRAME_OVERHEAD
+ ld r11,exception_marker@toc(r2)
+ std r11,-16(r9) /* "regshere" marker */
+#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR)
+BEGIN_FW_FTR_SECTION
+ beq 33f
+ /* if from user, see if there are any DTL entries to process */
+ ld r10,PACALPPACAPTR(r13) /* get ptr to VPA */
+ ld r11,PACA_DTL_RIDX(r13) /* get log read index */
+ ld r10,LPPACA_DTLIDX(r10) /* get log write index */
+ cmpd cr1,r11,r10
+ beq+ cr1,33f
+ bl .accumulate_stolen_time
+ REST_GPR(0,r1)
+ REST_4GPRS(3,r1)
+ REST_2GPRS(7,r1)
+ addi r9,r1,STACK_FRAME_OVERHEAD
+33:
+END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */
+
+ /*
+ * A syscall should always be called with interrupts enabled
+ * so we just unconditionally hard-enable here. When some kind
+ * of irq tracing is used, we additionally check that condition
+ * is correct
+ */
+#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_BUG)
+ lbz r10,PACASOFTIRQEN(r13)
+ xori r10,r10,1
+1: tdnei r10,0
+ EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
+#endif
+
+#ifdef CONFIG_PPC_BOOK3E
+ wrteei 1
+#else
+ ld r11,PACAKMSR(r13)
+ ori r11,r11,MSR_EE
+ mtmsrd r11,1
+#endif /* CONFIG_PPC_BOOK3E */
+
+ /* We do need to set SOFTE in the stack frame or the return
+ * from interrupt will be painful
+ */
+ li r10,1
+ std r10,SOFTE(r1)
+
+#ifdef SHOW_SYSCALLS
+ bl .do_show_syscall
+ REST_GPR(0,r1)
+ REST_4GPRS(3,r1)
+ REST_2GPRS(7,r1)
+ addi r9,r1,STACK_FRAME_OVERHEAD
+#endif
+ clrrdi r11,r1,THREAD_SHIFT
+ ld r10,TI_FLAGS(r11)
+ andi. r11,r10,_TIF_SYSCALL_T_OR_A
+ bne- syscall_dotrace
+syscall_dotrace_cont:
+ cmpldi 0,r0,NR_syscalls
+ bge- syscall_enosys
+
+system_call: /* label this so stack traces look sane */
+/*
+ * Need to vector to 32 Bit or default sys_call_table here,
+ * based on caller's run-mode / personality.
+ */
+ ld r11,.SYS_CALL_TABLE@toc(2)
+ andi. r10,r10,_TIF_32BIT
+ beq 15f
+ addi r11,r11,8 /* use 32-bit syscall entries */
+ clrldi r3,r3,32
+ clrldi r4,r4,32
+ clrldi r5,r5,32
+ clrldi r6,r6,32
+ clrldi r7,r7,32
+ clrldi r8,r8,32
+15:
+ slwi r0,r0,4
+ ldx r10,r11,r0 /* Fetch system call handler [ptr] */
+ mtctr r10
+ bctrl /* Call handler */
+
+syscall_exit:
+ std r3,RESULT(r1)
+#ifdef SHOW_SYSCALLS
+ bl .do_show_syscall_exit
+ ld r3,RESULT(r1)
+#endif
+ clrrdi r12,r1,THREAD_SHIFT
+
+ ld r8,_MSR(r1)
+#ifdef CONFIG_PPC_BOOK3S
+ /* No MSR:RI on BookE */
+ andi. r10,r8,MSR_RI
+ beq- unrecov_restore
+#endif
+ /*
+ * Disable interrupts so current_thread_info()->flags can't change,
+ * and so that we don't get interrupted after loading SRR0/1.
+ */
+#ifdef CONFIG_PPC_BOOK3E
+ wrteei 0
+#else
+ ld r10,PACAKMSR(r13)
+ mtmsrd r10,1
+#endif /* CONFIG_PPC_BOOK3E */
+
+ ld r9,TI_FLAGS(r12)
+ li r11,-_LAST_ERRNO
+ andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
+ bne- syscall_exit_work
+ cmpld r3,r11
+ ld r5,_CCR(r1)
+ bge- syscall_error
+syscall_error_cont:
+ ld r7,_NIP(r1)
+BEGIN_FTR_SECTION
+ stdcx. r0,0,r1 /* to clear the reservation */
+END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+ andi. r6,r8,MSR_PR
+ ld r4,_LINK(r1)
+ /*
+ * Clear RI before restoring r13. If we are returning to
+ * userspace and we take an exception after restoring r13,
+ * we end up corrupting the userspace r13 value.
+ */
+#ifdef CONFIG_PPC_BOOK3S
+ /* No MSR:RI on BookE */
+ li r12,MSR_RI
+ andc r11,r10,r12
+ mtmsrd r11,1 /* clear MSR.RI */
+#endif /* CONFIG_PPC_BOOK3S */
+
+ beq- 1f
+ ACCOUNT_CPU_USER_EXIT(r11, r12)
+ ld r13,GPR13(r1) /* only restore r13 if returning to usermode */
+1: ld r2,GPR2(r1)
+ ld r1,GPR1(r1)
+ mtlr r4
+ mtcr r5
+ mtspr SPRN_SRR0,r7
+ mtspr SPRN_SRR1,r8
+ RFI
+ b . /* prevent speculative execution */
+
+syscall_error:
+ oris r5,r5,0x1000 /* Set SO bit in CR */
+ neg r3,r3
+ std r5,_CCR(r1)
+ b syscall_error_cont
+
+/* Traced system call support */
+syscall_dotrace:
+ bl .save_nvgprs
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .do_syscall_trace_enter
+ /*
+ * Restore argument registers possibly just changed.
+ * We use the return value of do_syscall_trace_enter
+ * for the call number to look up in the table (r0).
+ */
+ mr r0,r3
+ ld r3,GPR3(r1)
+ ld r4,GPR4(r1)
+ ld r5,GPR5(r1)
+ ld r6,GPR6(r1)
+ ld r7,GPR7(r1)
+ ld r8,GPR8(r1)
+ addi r9,r1,STACK_FRAME_OVERHEAD
+ clrrdi r10,r1,THREAD_SHIFT
+ ld r10,TI_FLAGS(r10)
+ b syscall_dotrace_cont
+
+syscall_enosys:
+ li r3,-ENOSYS
+ b syscall_exit
+
+syscall_exit_work:
+ /* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr.
+ If TIF_NOERROR is set, just save r3 as it is. */
+
+ andi. r0,r9,_TIF_RESTOREALL
+ beq+ 0f
+ REST_NVGPRS(r1)
+ b 2f
+0: cmpld r3,r11 /* r10 is -LAST_ERRNO */
+ blt+ 1f
+ andi. r0,r9,_TIF_NOERROR
+ bne- 1f
+ ld r5,_CCR(r1)
+ neg r3,r3
+ oris r5,r5,0x1000 /* Set SO bit in CR */
+ std r5,_CCR(r1)
+1: std r3,GPR3(r1)
+2: andi. r0,r9,(_TIF_PERSYSCALL_MASK)
+ beq 4f
+
+ /* Clear per-syscall TIF flags if any are set. */
+
+ li r11,_TIF_PERSYSCALL_MASK
+ addi r12,r12,TI_FLAGS
+3: ldarx r10,0,r12
+ andc r10,r10,r11
+ stdcx. r10,0,r12
+ bne- 3b
+ subi r12,r12,TI_FLAGS
+
+4: /* Anything else left to do? */
+ andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP)
+ beq .ret_from_except_lite
+
+ /* Re-enable interrupts */
+#ifdef CONFIG_PPC_BOOK3E
+ wrteei 1
+#else
+ ld r10,PACAKMSR(r13)
+ ori r10,r10,MSR_EE
+ mtmsrd r10,1
+#endif /* CONFIG_PPC_BOOK3E */
+
+ bl .save_nvgprs
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .do_syscall_trace_leave
+ b .ret_from_except
+
+/* Save non-volatile GPRs, if not already saved. */
+_GLOBAL(save_nvgprs)
+ ld r11,_TRAP(r1)
+ andi. r0,r11,1
+ beqlr-
+ SAVE_NVGPRS(r1)
+ clrrdi r0,r11,1
+ std r0,_TRAP(r1)
+ blr
+
+
+/*
+ * The sigsuspend and rt_sigsuspend system calls can call do_signal
+ * and thus put the process into the stopped state where we might
+ * want to examine its user state with ptrace. Therefore we need
+ * to save all the nonvolatile registers (r14 - r31) before calling
+ * the C code. Similarly, fork, vfork and clone need the full
+ * register state on the stack so that it can be copied to the child.
+ */
+
+_GLOBAL(ppc_fork)
+ bl .save_nvgprs
+ bl .sys_fork
+ b syscall_exit
+
+_GLOBAL(ppc_vfork)
+ bl .save_nvgprs
+ bl .sys_vfork
+ b syscall_exit
+
+_GLOBAL(ppc_clone)
+ bl .save_nvgprs
+ bl .sys_clone
+ b syscall_exit
+
+_GLOBAL(ppc32_swapcontext)
+ bl .save_nvgprs
+ bl .compat_sys_swapcontext
+ b syscall_exit
+
+_GLOBAL(ppc64_swapcontext)
+ bl .save_nvgprs
+ bl .sys_swapcontext
+ b syscall_exit
+
+_GLOBAL(ret_from_fork)
+ bl .schedule_tail
+ REST_NVGPRS(r1)
+ li r3,0
+ b syscall_exit
+
+/*
+ * This routine switches between two different tasks. The process
+ * state of one is saved on its kernel stack. Then the state
+ * of the other is restored from its kernel stack. The memory
+ * management hardware is updated to the second process's state.
+ * Finally, we can return to the second process, via ret_from_except.
+ * On entry, r3 points to the THREAD for the current task, r4
+ * points to the THREAD for the new task.
+ *
+ * Note: there are two ways to get to the "going out" portion
+ * of this code; either by coming in via the entry (_switch)
+ * or via "fork" which must set up an environment equivalent
+ * to the "_switch" path. If you change this you'll have to change
+ * the fork code also.
+ *
+ * The code which creates the new task context is in 'copy_thread'
+ * in arch/powerpc/kernel/process.c
+ */
+ .align 7
+_GLOBAL(_switch)
+ mflr r0
+ std r0,16(r1)
+ stdu r1,-SWITCH_FRAME_SIZE(r1)
+ /* r3-r13 are caller saved -- Cort */
+ SAVE_8GPRS(14, r1)
+ SAVE_10GPRS(22, r1)
+ mflr r20 /* Return to switch caller */
+ mfmsr r22
+ li r0, MSR_FP
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+ oris r0,r0,MSR_VSX@h /* Disable VSX */
+END_FTR_SECTION_IFSET(CPU_FTR_VSX)
+#endif /* CONFIG_VSX */
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+ oris r0,r0,MSR_VEC@h /* Disable altivec */
+ mfspr r24,SPRN_VRSAVE /* save vrsave register value */
+ std r24,THREAD_VRSAVE(r3)
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_PPC64
+BEGIN_FTR_SECTION
+ mfspr r25,SPRN_DSCR
+ std r25,THREAD_DSCR(r3)
+END_FTR_SECTION_IFSET(CPU_FTR_DSCR)
+#endif
+ and. r0,r0,r22
+ beq+ 1f
+ andc r22,r22,r0
+ MTMSRD(r22)
+ isync
+1: std r20,_NIP(r1)
+ mfcr r23
+ std r23,_CCR(r1)
+ std r1,KSP(r3) /* Set old stack pointer */
+
+#ifdef CONFIG_SMP
+ /* We need a sync somewhere here to make sure that if the
+ * previous task gets rescheduled on another CPU, it sees all
+ * stores it has performed on this one.
+ */
+ sync
+#endif /* CONFIG_SMP */
+
+ /*
+ * If we optimise away the clear of the reservation in system
+ * calls because we know the CPU tracks the address of the
+ * reservation, then we need to clear it here to cover the
+ * case that the kernel context switch path has no larx
+ * instructions.
+ */
+BEGIN_FTR_SECTION
+ ldarx r6,0,r1
+END_FTR_SECTION_IFSET(CPU_FTR_STCX_CHECKS_ADDRESS)
+
+ addi r6,r4,-THREAD /* Convert THREAD to 'current' */
+ std r6,PACACURRENT(r13) /* Set new 'current' */
+
+ ld r8,KSP(r4) /* new stack pointer */
+#ifdef CONFIG_PPC_BOOK3S
+BEGIN_FTR_SECTION
+ BEGIN_FTR_SECTION_NESTED(95)
+ clrrdi r6,r8,28 /* get its ESID */
+ clrrdi r9,r1,28 /* get current sp ESID */
+ FTR_SECTION_ELSE_NESTED(95)
+ clrrdi r6,r8,40 /* get its 1T ESID */
+ clrrdi r9,r1,40 /* get current sp 1T ESID */
+ ALT_MMU_FTR_SECTION_END_NESTED_IFCLR(MMU_FTR_1T_SEGMENT, 95)
+FTR_SECTION_ELSE
+ b 2f
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_SLB)
+ clrldi. r0,r6,2 /* is new ESID c00000000? */
+ cmpd cr1,r6,r9 /* or is new ESID the same as current ESID? */
+ cror eq,4*cr1+eq,eq
+ beq 2f /* if yes, don't slbie it */
+
+ /* Bolt in the new stack SLB entry */
+ ld r7,KSP_VSID(r4) /* Get new stack's VSID */
+ oris r0,r6,(SLB_ESID_V)@h
+ ori r0,r0,(SLB_NUM_BOLTED-1)@l
+BEGIN_FTR_SECTION
+ li r9,MMU_SEGSIZE_1T /* insert B field */
+ oris r6,r6,(MMU_SEGSIZE_1T << SLBIE_SSIZE_SHIFT)@h
+ rldimi r7,r9,SLB_VSID_SSIZE_SHIFT,0
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
+
+ /* Update the last bolted SLB. No write barriers are needed
+ * here, provided we only update the current CPU's SLB shadow
+ * buffer.
+ */
+ ld r9,PACA_SLBSHADOWPTR(r13)
+ li r12,0
+ std r12,SLBSHADOW_STACKESID(r9) /* Clear ESID */
+ std r7,SLBSHADOW_STACKVSID(r9) /* Save VSID */
+ std r0,SLBSHADOW_STACKESID(r9) /* Save ESID */
+
+ /* No need to check for MMU_FTR_NO_SLBIE_B here, since when
+ * we have 1TB segments, the only CPUs known to have the errata
+ * only support less than 1TB of system memory and we'll never
+ * actually hit this code path.
+ */
+
+ slbie r6
+ slbie r6 /* Workaround POWER5 < DD2.1 issue */
+ slbmte r7,r0
+ isync
+2:
+#endif /* !CONFIG_PPC_BOOK3S */
+
+ clrrdi r7,r8,THREAD_SHIFT /* base of new stack */
+ /* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
+ because we don't need to leave the 288-byte ABI gap at the
+ top of the kernel stack. */
+ addi r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE
+
+ mr r1,r8 /* start using new stack pointer */
+ std r7,PACAKSAVE(r13)
+
+ ld r6,_CCR(r1)
+ mtcrf 0xFF,r6
+
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+ ld r0,THREAD_VRSAVE(r4)
+ mtspr SPRN_VRSAVE,r0 /* if G4, restore VRSAVE reg */
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_PPC64
+BEGIN_FTR_SECTION
+ ld r0,THREAD_DSCR(r4)
+ cmpd r0,r25
+ beq 1f
+ mtspr SPRN_DSCR,r0
+1:
+END_FTR_SECTION_IFSET(CPU_FTR_DSCR)
+#endif
+
+ /* r3-r13 are destroyed -- Cort */
+ REST_8GPRS(14, r1)
+ REST_10GPRS(22, r1)
+
+ /* convert old thread to its task_struct for return value */
+ addi r3,r3,-THREAD
+ ld r7,_NIP(r1) /* Return to _switch caller in new task */
+ mtlr r7
+ addi r1,r1,SWITCH_FRAME_SIZE
+ blr
+
+ .align 7
+_GLOBAL(ret_from_except)
+ ld r11,_TRAP(r1)
+ andi. r0,r11,1
+ bne .ret_from_except_lite
+ REST_NVGPRS(r1)
+
+_GLOBAL(ret_from_except_lite)
+ /*
+ * Disable interrupts so that current_thread_info()->flags
+ * can't change between when we test it and when we return
+ * from the interrupt.
+ */
+#ifdef CONFIG_PPC_BOOK3E
+ wrteei 0
+#else
+ ld r10,PACAKMSR(r13) /* Get kernel MSR without EE */
+ mtmsrd r10,1 /* Update machine state */
+#endif /* CONFIG_PPC_BOOK3E */
+
+#ifdef CONFIG_PREEMPT
+ clrrdi r9,r1,THREAD_SHIFT /* current_thread_info() */
+ li r0,_TIF_NEED_RESCHED /* bits to check */
+ ld r3,_MSR(r1)
+ ld r4,TI_FLAGS(r9)
+ /* Move MSR_PR bit in r3 to _TIF_SIGPENDING position in r0 */
+ rlwimi r0,r3,32+TIF_SIGPENDING-MSR_PR_LG,_TIF_SIGPENDING
+ and. r0,r4,r0 /* check NEED_RESCHED and maybe SIGPENDING */
+ bne do_work
+
+#else /* !CONFIG_PREEMPT */
+ ld r3,_MSR(r1) /* Returning to user mode? */
+ andi. r3,r3,MSR_PR
+ beq restore /* if not, just restore regs and return */
+
+ /* Check current_thread_info()->flags */
+ clrrdi r9,r1,THREAD_SHIFT
+ ld r4,TI_FLAGS(r9)
+ andi. r0,r4,_TIF_USER_WORK_MASK
+ bne do_work
+#endif /* !CONFIG_PREEMPT */
+
+ .globl fast_exc_return_irq
+fast_exc_return_irq:
+restore:
+ /*
+ * This is the main kernel exit path. First we check if we
+ * are about to re-enable interrupts
+ */
+ ld r5,SOFTE(r1)
+ lbz r6,PACASOFTIRQEN(r13)
+ cmpwi cr0,r5,0
+ beq restore_irq_off
+
+ /* We are enabling, were we already enabled ? Yes, just return */
+ cmpwi cr0,r6,1
+ beq cr0,do_restore
+
+ /*
+ * We are about to soft-enable interrupts (we are hard disabled
+ * at this point). We check if there's anything that needs to
+ * be replayed first.
+ */
+ lbz r0,PACAIRQHAPPENED(r13)
+ cmpwi cr0,r0,0
+ bne- restore_check_irq_replay
+
+ /*
+ * Get here when nothing happened while soft-disabled, just
+ * soft-enable and move-on. We will hard-enable as a side
+ * effect of rfi
+ */
+restore_no_replay:
+ TRACE_ENABLE_INTS
+ li r0,1
+ stb r0,PACASOFTIRQEN(r13);
+
+ /*
+ * Final return path. BookE is handled in a different file
+ */
+do_restore:
+#ifdef CONFIG_PPC_BOOK3E
+ b .exception_return_book3e
+#else
+ /*
+ * Clear the reservation. If we know the CPU tracks the address of
+ * the reservation then we can potentially save some cycles and use
+ * a larx. On POWER6 and POWER7 this is significantly faster.
+ */
+BEGIN_FTR_SECTION
+ stdcx. r0,0,r1 /* to clear the reservation */
+FTR_SECTION_ELSE
+ ldarx r4,0,r1
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+
+ /*
+ * Some code path such as load_up_fpu or altivec return directly
+ * here. They run entirely hard disabled and do not alter the
+ * interrupt state. They also don't use lwarx/stwcx. and thus
+ * are known not to leave dangling reservations.
+ */
+ .globl fast_exception_return
+fast_exception_return:
+ ld r3,_MSR(r1)
+ ld r4,_CTR(r1)
+ ld r0,_LINK(r1)
+ mtctr r4
+ mtlr r0
+ ld r4,_XER(r1)
+ mtspr SPRN_XER,r4
+
+ REST_8GPRS(5, r1)
+
+ andi. r0,r3,MSR_RI
+ beq- unrecov_restore
+
+ /*
+ * Clear RI before restoring r13. If we are returning to
+ * userspace and we take an exception after restoring r13,
+ * we end up corrupting the userspace r13 value.
+ */
+ ld r4,PACAKMSR(r13) /* Get kernel MSR without EE */
+ andc r4,r4,r0 /* r0 contains MSR_RI here */
+ mtmsrd r4,1
+
+ /*
+ * r13 is our per cpu area, only restore it if we are returning to
+ * userspace the value stored in the stack frame may belong to
+ * another CPU.
+ */
+ andi. r0,r3,MSR_PR
+ beq 1f
+ ACCOUNT_CPU_USER_EXIT(r2, r4)
+ REST_GPR(13, r1)
+1:
+ mtspr SPRN_SRR1,r3
+
+ ld r2,_CCR(r1)
+ mtcrf 0xFF,r2
+ ld r2,_NIP(r1)
+ mtspr SPRN_SRR0,r2
+
+ ld r0,GPR0(r1)
+ ld r2,GPR2(r1)
+ ld r3,GPR3(r1)
+ ld r4,GPR4(r1)
+ ld r1,GPR1(r1)
+
+ rfid
+ b . /* prevent speculative execution */
+
+#endif /* CONFIG_PPC_BOOK3E */
+
+ /*
+ * We are returning to a context with interrupts soft disabled.
+ *
+ * However, we may also about to hard enable, so we need to
+ * make sure that in this case, we also clear PACA_IRQ_HARD_DIS
+ * or that bit can get out of sync and bad things will happen
+ */
+restore_irq_off:
+ ld r3,_MSR(r1)
+ lbz r7,PACAIRQHAPPENED(r13)
+ andi. r0,r3,MSR_EE
+ beq 1f
+ rlwinm r7,r7,0,~PACA_IRQ_HARD_DIS
+ stb r7,PACAIRQHAPPENED(r13)
+1: li r0,0
+ stb r0,PACASOFTIRQEN(r13);
+ TRACE_DISABLE_INTS
+ b do_restore
+
+ /*
+ * Something did happen, check if a re-emit is needed
+ * (this also clears paca->irq_happened)
+ */
+restore_check_irq_replay:
+ /* XXX: We could implement a fast path here where we check
+ * for irq_happened being just 0x01, in which case we can
+ * clear it and return. That means that we would potentially
+ * miss a decrementer having wrapped all the way around.
+ *
+ * Still, this might be useful for things like hash_page
+ */
+ bl .__check_irq_replay
+ cmpwi cr0,r3,0
+ beq restore_no_replay
+
+ /*
+ * We need to re-emit an interrupt. We do so by re-using our
+ * existing exception frame. We first change the trap value,
+ * but we need to ensure we preserve the low nibble of it
+ */
+ ld r4,_TRAP(r1)
+ clrldi r4,r4,60
+ or r4,r4,r3
+ std r4,_TRAP(r1)
+
+ /*
+ * Then find the right handler and call it. Interrupts are
+ * still soft-disabled and we keep them that way.
+ */
+ cmpwi cr0,r3,0x500
+ bne 1f
+ addi r3,r1,STACK_FRAME_OVERHEAD;
+ bl .do_IRQ
+ b .ret_from_except
+1: cmpwi cr0,r3,0x900
+ bne 1f
+ addi r3,r1,STACK_FRAME_OVERHEAD;
+ bl .timer_interrupt
+ b .ret_from_except
+#ifdef CONFIG_PPC_BOOK3E
+1: cmpwi cr0,r3,0x280
+ bne 1f
+ addi r3,r1,STACK_FRAME_OVERHEAD;
+ bl .doorbell_exception
+ b .ret_from_except
+#endif /* CONFIG_PPC_BOOK3E */
+1: b .ret_from_except /* What else to do here ? */
+
+
+
+3:
+do_work:
+#ifdef CONFIG_PREEMPT
+ andi. r0,r3,MSR_PR /* Returning to user mode? */
+ bne user_work
+ /* Check that preempt_count() == 0 and interrupts are enabled */
+ lwz r8,TI_PREEMPT(r9)
+ cmpwi cr1,r8,0
+ ld r0,SOFTE(r1)
+ cmpdi r0,0
+ crandc eq,cr1*4+eq,eq
+ bne restore
+
+ /*
+ * Here we are preempting the current task. We want to make
+ * sure we are soft-disabled first
+ */
+ SOFT_DISABLE_INTS(r3,r4)
+1: bl .preempt_schedule_irq
+
+ /* Re-test flags and eventually loop */
+ clrrdi r9,r1,THREAD_SHIFT
+ ld r4,TI_FLAGS(r9)
+ andi. r0,r4,_TIF_NEED_RESCHED
+ bne 1b
+ b restore
+
+user_work:
+#endif /* CONFIG_PREEMPT */
+
+ andi. r0,r4,_TIF_NEED_RESCHED
+ beq 1f
+ bl .restore_interrupts
+ bl .schedule
+ b .ret_from_except_lite
+
+1: bl .save_nvgprs
+ bl .restore_interrupts
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .do_notify_resume
+ b .ret_from_except
+
+unrecov_restore:
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .unrecoverable_exception
+ b unrecov_restore
+
+#ifdef CONFIG_PPC_RTAS
+/*
+ * On CHRP, the Run-Time Abstraction Services (RTAS) have to be
+ * called with the MMU off.
+ *
+ * In addition, we need to be in 32b mode, at least for now.
+ *
+ * Note: r3 is an input parameter to rtas, so don't trash it...
+ */
+_GLOBAL(enter_rtas)
+ mflr r0
+ std r0,16(r1)
+ stdu r1,-RTAS_FRAME_SIZE(r1) /* Save SP and create stack space. */
+
+ /* Because RTAS is running in 32b mode, it clobbers the high order half
+ * of all registers that it saves. We therefore save those registers
+ * RTAS might touch to the stack. (r0, r3-r13 are caller saved)
+ */
+ SAVE_GPR(2, r1) /* Save the TOC */
+ SAVE_GPR(13, r1) /* Save paca */
+ SAVE_8GPRS(14, r1) /* Save the non-volatiles */
+ SAVE_10GPRS(22, r1) /* ditto */
+
+ mfcr r4
+ std r4,_CCR(r1)
+ mfctr r5
+ std r5,_CTR(r1)
+ mfspr r6,SPRN_XER
+ std r6,_XER(r1)
+ mfdar r7
+ std r7,_DAR(r1)
+ mfdsisr r8
+ std r8,_DSISR(r1)
+
+ /* Temporary workaround to clear CR until RTAS can be modified to
+ * ignore all bits.
+ */
+ li r0,0
+ mtcr r0
+
+#ifdef CONFIG_BUG
+ /* There is no way it is acceptable to get here with interrupts enabled,
+ * check it with the asm equivalent of WARN_ON
+ */
+ lbz r0,PACASOFTIRQEN(r13)
+1: tdnei r0,0
+ EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
+#endif
+
+ /* Hard-disable interrupts */
+ mfmsr r6
+ rldicl r7,r6,48,1
+ rotldi r7,r7,16
+ mtmsrd r7,1
+
+ /* Unfortunately, the stack pointer and the MSR are also clobbered,
+ * so they are saved in the PACA which allows us to restore
+ * our original state after RTAS returns.
+ */
+ std r1,PACAR1(r13)
+ std r6,PACASAVEDMSR(r13)
+
+ /* Setup our real return addr */
+ LOAD_REG_ADDR(r4,.rtas_return_loc)
+ clrldi r4,r4,2 /* convert to realmode address */
+ mtlr r4
+
+ li r0,0
+ ori r0,r0,MSR_EE|MSR_SE|MSR_BE|MSR_RI
+ andc r0,r6,r0
+
+ li r9,1
+ rldicr r9,r9,MSR_SF_LG,(63-MSR_SF_LG)
+ ori r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI
+ andc r6,r0,r9
+ sync /* disable interrupts so SRR0/1 */
+ mtmsrd r0 /* don't get trashed */
+
+ LOAD_REG_ADDR(r4, rtas)
+ ld r5,RTASENTRY(r4) /* get the rtas->entry value */
+ ld r4,RTASBASE(r4) /* get the rtas->base value */
+
+ mtspr SPRN_SRR0,r5
+ mtspr SPRN_SRR1,r6
+ rfid
+ b . /* prevent speculative execution */
+
+_STATIC(rtas_return_loc)
+ /* relocation is off at this point */
+ GET_PACA(r4)
+ clrldi r4,r4,2 /* convert to realmode address */
+
+ bcl 20,31,$+4
+0: mflr r3
+ ld r3,(1f-0b)(r3) /* get &.rtas_restore_regs */
+
+ mfmsr r6
+ li r0,MSR_RI
+ andc r6,r6,r0
+ sync
+ mtmsrd r6
+
+ ld r1,PACAR1(r4) /* Restore our SP */
+ ld r4,PACASAVEDMSR(r4) /* Restore our MSR */
+
+ mtspr SPRN_SRR0,r3
+ mtspr SPRN_SRR1,r4
+ rfid
+ b . /* prevent speculative execution */
+
+ .align 3
+1: .llong .rtas_restore_regs
+
+_STATIC(rtas_restore_regs)
+ /* relocation is on at this point */
+ REST_GPR(2, r1) /* Restore the TOC */
+ REST_GPR(13, r1) /* Restore paca */
+ REST_8GPRS(14, r1) /* Restore the non-volatiles */
+ REST_10GPRS(22, r1) /* ditto */
+
+ GET_PACA(r13)
+
+ ld r4,_CCR(r1)
+ mtcr r4
+ ld r5,_CTR(r1)
+ mtctr r5
+ ld r6,_XER(r1)
+ mtspr SPRN_XER,r6
+ ld r7,_DAR(r1)
+ mtdar r7
+ ld r8,_DSISR(r1)
+ mtdsisr r8
+
+ addi r1,r1,RTAS_FRAME_SIZE /* Unstack our frame */
+ ld r0,16(r1) /* get return address */
+
+ mtlr r0
+ blr /* return to caller */
+
+#endif /* CONFIG_PPC_RTAS */
+
+_GLOBAL(enter_prom)
+ mflr r0
+ std r0,16(r1)
+ stdu r1,-PROM_FRAME_SIZE(r1) /* Save SP and create stack space */
+
+ /* Because PROM is running in 32b mode, it clobbers the high order half
+ * of all registers that it saves. We therefore save those registers
+ * PROM might touch to the stack. (r0, r3-r13 are caller saved)
+ */
+ SAVE_GPR(2, r1)
+ SAVE_GPR(13, r1)
+ SAVE_8GPRS(14, r1)
+ SAVE_10GPRS(22, r1)
+ mfcr r10
+ mfmsr r11
+ std r10,_CCR(r1)
+ std r11,_MSR(r1)
+
+ /* Get the PROM entrypoint */
+ mtlr r4
+
+ /* Switch MSR to 32 bits mode
+ */
+#ifdef CONFIG_PPC_BOOK3E
+ rlwinm r11,r11,0,1,31
+ mtmsr r11
+#else /* CONFIG_PPC_BOOK3E */
+ mfmsr r11
+ li r12,1
+ rldicr r12,r12,MSR_SF_LG,(63-MSR_SF_LG)
+ andc r11,r11,r12
+ li r12,1
+ rldicr r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG)
+ andc r11,r11,r12
+ mtmsrd r11
+#endif /* CONFIG_PPC_BOOK3E */
+ isync
+
+ /* Enter PROM here... */
+ blrl
+
+ /* Just make sure that r1 top 32 bits didn't get
+ * corrupt by OF
+ */
+ rldicl r1,r1,0,32
+
+ /* Restore the MSR (back to 64 bits) */
+ ld r0,_MSR(r1)
+ MTMSRD(r0)
+ isync
+
+ /* Restore other registers */
+ REST_GPR(2, r1)
+ REST_GPR(13, r1)
+ REST_8GPRS(14, r1)
+ REST_10GPRS(22, r1)
+ ld r4,_CCR(r1)
+ mtcr r4
+
+ addi r1,r1,PROM_FRAME_SIZE
+ ld r0,16(r1)
+ mtlr r0
+ blr
+
+#ifdef CONFIG_FUNCTION_TRACER
+#ifdef CONFIG_DYNAMIC_FTRACE
+_GLOBAL(mcount)
+_GLOBAL(_mcount)
+ blr
+
+_GLOBAL(ftrace_caller)
+ /* Taken from output of objdump from lib64/glibc */
+ mflr r3
+ ld r11, 0(r1)
+ stdu r1, -112(r1)
+ std r3, 128(r1)
+ ld r4, 16(r11)
+ subi r3, r3, MCOUNT_INSN_SIZE
+.globl ftrace_call
+ftrace_call:
+ bl ftrace_stub
+ nop
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+.globl ftrace_graph_call
+ftrace_graph_call:
+ b ftrace_graph_stub
+_GLOBAL(ftrace_graph_stub)
+#endif
+ ld r0, 128(r1)
+ mtlr r0
+ addi r1, r1, 112
+_GLOBAL(ftrace_stub)
+ blr
+#else
+_GLOBAL(mcount)
+ blr
+
+_GLOBAL(_mcount)
+ /* Taken from output of objdump from lib64/glibc */
+ mflr r3
+ ld r11, 0(r1)
+ stdu r1, -112(r1)
+ std r3, 128(r1)
+ ld r4, 16(r11)
+
+ subi r3, r3, MCOUNT_INSN_SIZE
+ LOAD_REG_ADDR(r5,ftrace_trace_function)
+ ld r5,0(r5)
+ ld r5,0(r5)
+ mtctr r5
+ bctrl
+ nop
+
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ b ftrace_graph_caller
+#endif
+ ld r0, 128(r1)
+ mtlr r0
+ addi r1, r1, 112
+_GLOBAL(ftrace_stub)
+ blr
+
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+_GLOBAL(ftrace_graph_caller)
+ /* load r4 with local address */
+ ld r4, 128(r1)
+ subi r4, r4, MCOUNT_INSN_SIZE
+
+ /* get the parent address */
+ ld r11, 112(r1)
+ addi r3, r11, 16
+
+ bl .prepare_ftrace_return
+ nop
+
+ ld r0, 128(r1)
+ mtlr r0
+ addi r1, r1, 112
+ blr
+
+_GLOBAL(return_to_handler)
+ /* need to save return values */
+ std r4, -24(r1)
+ std r3, -16(r1)
+ std r31, -8(r1)
+ mr r31, r1
+ stdu r1, -112(r1)
+
+ bl .ftrace_return_to_handler
+ nop
+
+ /* return value has real return address */
+ mtlr r3
+
+ ld r1, 0(r1)
+ ld r4, -24(r1)
+ ld r3, -16(r1)
+ ld r31, -8(r1)
+
+ /* Jump back to real return address */
+ blr
+
+_GLOBAL(mod_return_to_handler)
+ /* need to save return values */
+ std r4, -32(r1)
+ std r3, -24(r1)
+ /* save TOC */
+ std r2, -16(r1)
+ std r31, -8(r1)
+ mr r31, r1
+ stdu r1, -112(r1)
+
+ /*
+ * We are in a module using the module's TOC.
+ * Switch to our TOC to run inside the core kernel.
+ */
+ ld r2, PACATOC(r13)
+
+ bl .ftrace_return_to_handler
+ nop
+
+ /* return value has real return address */
+ mtlr r3
+
+ ld r1, 0(r1)
+ ld r4, -32(r1)
+ ld r3, -24(r1)
+ ld r2, -16(r1)
+ ld r31, -8(r1)
+
+ /* Jump back to real return address */
+ blr
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+#endif /* CONFIG_FUNCTION_TRACER */
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
new file mode 100644
index 00000000..7215cc24
--- /dev/null
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -0,0 +1,1326 @@
+/*
+ * Boot code and exception vectors for Book3E processors
+ *
+ * Copyright (C) 2007 Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/threads.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cputable.h>
+#include <asm/setup.h>
+#include <asm/thread_info.h>
+#include <asm/reg_a2.h>
+#include <asm/exception-64e.h>
+#include <asm/bug.h>
+#include <asm/irqflags.h>
+#include <asm/ptrace.h>
+#include <asm/ppc-opcode.h>
+#include <asm/mmu.h>
+#include <asm/hw_irq.h>
+
+/* XXX This will ultimately add space for a special exception save
+ * structure used to save things like SRR0/SRR1, SPRGs, MAS, etc...
+ * when taking special interrupts. For now we don't support that,
+ * special interrupts from within a non-standard level will probably
+ * blow you up
+ */
+#define SPECIAL_EXC_FRAME_SIZE INT_FRAME_SIZE
+
+/* Exception prolog code for all exceptions */
+#define EXCEPTION_PROLOG(n, type, addition) \
+ mtspr SPRN_SPRG_##type##_SCRATCH,r13; /* get spare registers */ \
+ mfspr r13,SPRN_SPRG_PACA; /* get PACA */ \
+ std r10,PACA_EX##type+EX_R10(r13); \
+ std r11,PACA_EX##type+EX_R11(r13); \
+ mfcr r10; /* save CR */ \
+ addition; /* additional code for that exc. */ \
+ std r1,PACA_EX##type+EX_R1(r13); /* save old r1 in the PACA */ \
+ stw r10,PACA_EX##type+EX_CR(r13); /* save old CR in the PACA */ \
+ mfspr r11,SPRN_##type##_SRR1;/* what are we coming from */ \
+ type##_SET_KSTACK; /* get special stack if necessary */\
+ andi. r10,r11,MSR_PR; /* save stack pointer */ \
+ beq 1f; /* branch around if supervisor */ \
+ ld r1,PACAKSAVE(r13); /* get kernel stack coming from usr */\
+1: cmpdi cr1,r1,0; /* check if SP makes sense */ \
+ bge- cr1,exc_##n##_bad_stack;/* bad stack (TODO: out of line) */ \
+ mfspr r10,SPRN_##type##_SRR0; /* read SRR0 before touching stack */
+
+/* Exception type-specific macros */
+#define GEN_SET_KSTACK \
+ subi r1,r1,INT_FRAME_SIZE; /* alloc frame on kernel stack */
+#define SPRN_GEN_SRR0 SPRN_SRR0
+#define SPRN_GEN_SRR1 SPRN_SRR1
+
+#define CRIT_SET_KSTACK \
+ ld r1,PACA_CRIT_STACK(r13); \
+ subi r1,r1,SPECIAL_EXC_FRAME_SIZE;
+#define SPRN_CRIT_SRR0 SPRN_CSRR0
+#define SPRN_CRIT_SRR1 SPRN_CSRR1
+
+#define DBG_SET_KSTACK \
+ ld r1,PACA_DBG_STACK(r13); \
+ subi r1,r1,SPECIAL_EXC_FRAME_SIZE;
+#define SPRN_DBG_SRR0 SPRN_DSRR0
+#define SPRN_DBG_SRR1 SPRN_DSRR1
+
+#define MC_SET_KSTACK \
+ ld r1,PACA_MC_STACK(r13); \
+ subi r1,r1,SPECIAL_EXC_FRAME_SIZE;
+#define SPRN_MC_SRR0 SPRN_MCSRR0
+#define SPRN_MC_SRR1 SPRN_MCSRR1
+
+#define NORMAL_EXCEPTION_PROLOG(n, addition) \
+ EXCEPTION_PROLOG(n, GEN, addition##_GEN(n))
+
+#define CRIT_EXCEPTION_PROLOG(n, addition) \
+ EXCEPTION_PROLOG(n, CRIT, addition##_CRIT(n))
+
+#define DBG_EXCEPTION_PROLOG(n, addition) \
+ EXCEPTION_PROLOG(n, DBG, addition##_DBG(n))
+
+#define MC_EXCEPTION_PROLOG(n, addition) \
+ EXCEPTION_PROLOG(n, MC, addition##_MC(n))
+
+
+/* Variants of the "addition" argument for the prolog
+ */
+#define PROLOG_ADDITION_NONE_GEN(n)
+#define PROLOG_ADDITION_NONE_CRIT(n)
+#define PROLOG_ADDITION_NONE_DBG(n)
+#define PROLOG_ADDITION_NONE_MC(n)
+
+#define PROLOG_ADDITION_MASKABLE_GEN(n) \
+ lbz r11,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */ \
+ cmpwi cr0,r11,0; /* yes -> go out of line */ \
+ beq masked_interrupt_book3e_##n
+
+#define PROLOG_ADDITION_2REGS_GEN(n) \
+ std r14,PACA_EXGEN+EX_R14(r13); \
+ std r15,PACA_EXGEN+EX_R15(r13)
+
+#define PROLOG_ADDITION_1REG_GEN(n) \
+ std r14,PACA_EXGEN+EX_R14(r13);
+
+#define PROLOG_ADDITION_2REGS_CRIT(n) \
+ std r14,PACA_EXCRIT+EX_R14(r13); \
+ std r15,PACA_EXCRIT+EX_R15(r13)
+
+#define PROLOG_ADDITION_2REGS_DBG(n) \
+ std r14,PACA_EXDBG+EX_R14(r13); \
+ std r15,PACA_EXDBG+EX_R15(r13)
+
+#define PROLOG_ADDITION_2REGS_MC(n) \
+ std r14,PACA_EXMC+EX_R14(r13); \
+ std r15,PACA_EXMC+EX_R15(r13)
+
+
+/* Core exception code for all exceptions except TLB misses.
+ * XXX: Needs to make SPRN_SPRG_GEN depend on exception type
+ */
+#define EXCEPTION_COMMON(n, excf, ints) \
+exc_##n##_common: \
+ std r0,GPR0(r1); /* save r0 in stackframe */ \
+ std r2,GPR2(r1); /* save r2 in stackframe */ \
+ SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
+ SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \
+ std r9,GPR9(r1); /* save r9 in stackframe */ \
+ std r10,_NIP(r1); /* save SRR0 to stackframe */ \
+ std r11,_MSR(r1); /* save SRR1 to stackframe */ \
+ ACCOUNT_CPU_USER_ENTRY(r10,r11);/* accounting (uses cr0+eq) */ \
+ ld r3,excf+EX_R10(r13); /* get back r10 */ \
+ ld r4,excf+EX_R11(r13); /* get back r11 */ \
+ mfspr r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 */ \
+ std r12,GPR12(r1); /* save r12 in stackframe */ \
+ ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \
+ mflr r6; /* save LR in stackframe */ \
+ mfctr r7; /* save CTR in stackframe */ \
+ mfspr r8,SPRN_XER; /* save XER in stackframe */ \
+ ld r9,excf+EX_R1(r13); /* load orig r1 back from PACA */ \
+ lwz r10,excf+EX_CR(r13); /* load orig CR back from PACA */ \
+ lbz r11,PACASOFTIRQEN(r13); /* get current IRQ softe */ \
+ ld r12,exception_marker@toc(r2); \
+ li r0,0; \
+ std r3,GPR10(r1); /* save r10 to stackframe */ \
+ std r4,GPR11(r1); /* save r11 to stackframe */ \
+ std r5,GPR13(r1); /* save it to stackframe */ \
+ std r6,_LINK(r1); \
+ std r7,_CTR(r1); \
+ std r8,_XER(r1); \
+ li r3,(n)+1; /* indicate partial regs in trap */ \
+ std r9,0(r1); /* store stack frame back link */ \
+ std r10,_CCR(r1); /* store orig CR in stackframe */ \
+ std r9,GPR1(r1); /* store stack frame back link */ \
+ std r11,SOFTE(r1); /* and save it to stackframe */ \
+ std r12,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ \
+ std r3,_TRAP(r1); /* set trap number */ \
+ std r0,RESULT(r1); /* clear regs->result */ \
+ ints;
+
+/* Variants for the "ints" argument. This one does nothing when we want
+ * to keep interrupts in their original state
+ */
+#define INTS_KEEP
+
+/* This second version is meant for exceptions that don't immediately
+ * hard-enable. We set a bit in paca->irq_happened to ensure that
+ * a subsequent call to arch_local_irq_restore() will properly
+ * hard-enable and avoid the fast-path
+ */
+#define INTS_DISABLE SOFT_DISABLE_INTS(r3,r4)
+
+/* This is called by exceptions that used INTS_KEEP (that did not touch
+ * irq indicators in the PACA). This will restore MSR:EE to it's previous
+ * value
+ *
+ * XXX In the long run, we may want to open-code it in order to separate the
+ * load from the wrtee, thus limiting the latency caused by the dependency
+ * but at this point, I'll favor code clarity until we have a near to final
+ * implementation
+ */
+#define INTS_RESTORE_HARD \
+ ld r11,_MSR(r1); \
+ wrtee r11;
+
+/* XXX FIXME: Restore r14/r15 when necessary */
+#define BAD_STACK_TRAMPOLINE(n) \
+exc_##n##_bad_stack: \
+ li r1,(n); /* get exception number */ \
+ sth r1,PACA_TRAP_SAVE(r13); /* store trap */ \
+ b bad_stack_book3e; /* bad stack error */
+
+/* WARNING: If you change the layout of this stub, make sure you chcek
+ * the debug exception handler which handles single stepping
+ * into exceptions from userspace, and the MM code in
+ * arch/powerpc/mm/tlb_nohash.c which patches the branch here
+ * and would need to be updated if that branch is moved
+ */
+#define EXCEPTION_STUB(loc, label) \
+ . = interrupt_base_book3e + loc; \
+ nop; /* To make debug interrupts happy */ \
+ b exc_##label##_book3e;
+
+#define ACK_NONE(r)
+#define ACK_DEC(r) \
+ lis r,TSR_DIS@h; \
+ mtspr SPRN_TSR,r
+#define ACK_FIT(r) \
+ lis r,TSR_FIS@h; \
+ mtspr SPRN_TSR,r
+
+/* Used by asynchronous interrupt that may happen in the idle loop.
+ *
+ * This check if the thread was in the idle loop, and if yes, returns
+ * to the caller rather than the PC. This is to avoid a race if
+ * interrupts happen before the wait instruction.
+ */
+#define CHECK_NAPPING() \
+ clrrdi r11,r1,THREAD_SHIFT; \
+ ld r10,TI_LOCAL_FLAGS(r11); \
+ andi. r9,r10,_TLF_NAPPING; \
+ beq+ 1f; \
+ ld r8,_LINK(r1); \
+ rlwinm r7,r10,0,~_TLF_NAPPING; \
+ std r8,_NIP(r1); \
+ std r7,TI_LOCAL_FLAGS(r11); \
+1:
+
+
+#define MASKABLE_EXCEPTION(trapnum, label, hdlr, ack) \
+ START_EXCEPTION(label); \
+ NORMAL_EXCEPTION_PROLOG(trapnum, PROLOG_ADDITION_MASKABLE) \
+ EXCEPTION_COMMON(trapnum, PACA_EXGEN, INTS_DISABLE) \
+ ack(r8); \
+ CHECK_NAPPING(); \
+ addi r3,r1,STACK_FRAME_OVERHEAD; \
+ bl hdlr; \
+ b .ret_from_except_lite;
+
+/* This value is used to mark exception frames on the stack. */
+ .section ".toc","aw"
+exception_marker:
+ .tc ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER
+
+
+/*
+ * And here we have the exception vectors !
+ */
+
+ .text
+ .balign 0x1000
+ .globl interrupt_base_book3e
+interrupt_base_book3e: /* fake trap */
+ EXCEPTION_STUB(0x000, machine_check) /* 0x0200 */
+ EXCEPTION_STUB(0x020, critical_input) /* 0x0580 */
+ EXCEPTION_STUB(0x040, debug_crit) /* 0x0d00 */
+ EXCEPTION_STUB(0x060, data_storage) /* 0x0300 */
+ EXCEPTION_STUB(0x080, instruction_storage) /* 0x0400 */
+ EXCEPTION_STUB(0x0a0, external_input) /* 0x0500 */
+ EXCEPTION_STUB(0x0c0, alignment) /* 0x0600 */
+ EXCEPTION_STUB(0x0e0, program) /* 0x0700 */
+ EXCEPTION_STUB(0x100, fp_unavailable) /* 0x0800 */
+ EXCEPTION_STUB(0x120, system_call) /* 0x0c00 */
+ EXCEPTION_STUB(0x140, ap_unavailable) /* 0x0f20 */
+ EXCEPTION_STUB(0x160, decrementer) /* 0x0900 */
+ EXCEPTION_STUB(0x180, fixed_interval) /* 0x0980 */
+ EXCEPTION_STUB(0x1a0, watchdog) /* 0x09f0 */
+ EXCEPTION_STUB(0x1c0, data_tlb_miss)
+ EXCEPTION_STUB(0x1e0, instruction_tlb_miss)
+ EXCEPTION_STUB(0x260, perfmon)
+ EXCEPTION_STUB(0x280, doorbell)
+ EXCEPTION_STUB(0x2a0, doorbell_crit)
+ EXCEPTION_STUB(0x2c0, guest_doorbell)
+ EXCEPTION_STUB(0x2e0, guest_doorbell_crit)
+ EXCEPTION_STUB(0x300, hypercall)
+ EXCEPTION_STUB(0x320, ehpriv)
+
+ .globl interrupt_end_book3e
+interrupt_end_book3e:
+
+/* Critical Input Interrupt */
+ START_EXCEPTION(critical_input);
+ CRIT_EXCEPTION_PROLOG(0x100, PROLOG_ADDITION_NONE)
+// EXCEPTION_COMMON(0x100, PACA_EXCRIT, INTS_DISABLE)
+// bl special_reg_save_crit
+// CHECK_NAPPING();
+// addi r3,r1,STACK_FRAME_OVERHEAD
+// bl .critical_exception
+// b ret_from_crit_except
+ b .
+
+/* Machine Check Interrupt */
+ START_EXCEPTION(machine_check);
+ CRIT_EXCEPTION_PROLOG(0x200, PROLOG_ADDITION_NONE)
+// EXCEPTION_COMMON(0x200, PACA_EXMC, INTS_DISABLE)
+// bl special_reg_save_mc
+// addi r3,r1,STACK_FRAME_OVERHEAD
+// CHECK_NAPPING();
+// bl .machine_check_exception
+// b ret_from_mc_except
+ b .
+
+/* Data Storage Interrupt */
+ START_EXCEPTION(data_storage)
+ NORMAL_EXCEPTION_PROLOG(0x300, PROLOG_ADDITION_2REGS)
+ mfspr r14,SPRN_DEAR
+ mfspr r15,SPRN_ESR
+ EXCEPTION_COMMON(0x300, PACA_EXGEN, INTS_DISABLE)
+ b storage_fault_common
+
+/* Instruction Storage Interrupt */
+ START_EXCEPTION(instruction_storage);
+ NORMAL_EXCEPTION_PROLOG(0x400, PROLOG_ADDITION_2REGS)
+ li r15,0
+ mr r14,r10
+ EXCEPTION_COMMON(0x400, PACA_EXGEN, INTS_DISABLE)
+ b storage_fault_common
+
+/* External Input Interrupt */
+ MASKABLE_EXCEPTION(0x500, external_input, .do_IRQ, ACK_NONE)
+
+/* Alignment */
+ START_EXCEPTION(alignment);
+ NORMAL_EXCEPTION_PROLOG(0x600, PROLOG_ADDITION_2REGS)
+ mfspr r14,SPRN_DEAR
+ mfspr r15,SPRN_ESR
+ EXCEPTION_COMMON(0x600, PACA_EXGEN, INTS_KEEP)
+ b alignment_more /* no room, go out of line */
+
+/* Program Interrupt */
+ START_EXCEPTION(program);
+ NORMAL_EXCEPTION_PROLOG(0x700, PROLOG_ADDITION_1REG)
+ mfspr r14,SPRN_ESR
+ EXCEPTION_COMMON(0x700, PACA_EXGEN, INTS_DISABLE)
+ std r14,_DSISR(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ ld r14,PACA_EXGEN+EX_R14(r13)
+ bl .save_nvgprs
+ bl .program_check_exception
+ b .ret_from_except
+
+/* Floating Point Unavailable Interrupt */
+ START_EXCEPTION(fp_unavailable);
+ NORMAL_EXCEPTION_PROLOG(0x800, PROLOG_ADDITION_NONE)
+ /* we can probably do a shorter exception entry for that one... */
+ EXCEPTION_COMMON(0x800, PACA_EXGEN, INTS_KEEP)
+ ld r12,_MSR(r1)
+ andi. r0,r12,MSR_PR;
+ beq- 1f
+ bl .load_up_fpu
+ b fast_exception_return
+1: INTS_DISABLE
+ bl .save_nvgprs
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .kernel_fp_unavailable_exception
+ b .ret_from_except
+
+/* Decrementer Interrupt */
+ MASKABLE_EXCEPTION(0x900, decrementer, .timer_interrupt, ACK_DEC)
+
+/* Fixed Interval Timer Interrupt */
+ MASKABLE_EXCEPTION(0x980, fixed_interval, .unknown_exception, ACK_FIT)
+
+/* Watchdog Timer Interrupt */
+ START_EXCEPTION(watchdog);
+ CRIT_EXCEPTION_PROLOG(0x9f0, PROLOG_ADDITION_NONE)
+// EXCEPTION_COMMON(0x9f0, PACA_EXCRIT, INTS_DISABLE)
+// bl special_reg_save_crit
+// CHECK_NAPPING();
+// addi r3,r1,STACK_FRAME_OVERHEAD
+// bl .unknown_exception
+// b ret_from_crit_except
+ b .
+
+/* System Call Interrupt */
+ START_EXCEPTION(system_call)
+ mr r9,r13 /* keep a copy of userland r13 */
+ mfspr r11,SPRN_SRR0 /* get return address */
+ mfspr r12,SPRN_SRR1 /* get previous MSR */
+ mfspr r13,SPRN_SPRG_PACA /* get our PACA */
+ b system_call_common
+
+/* Auxiliary Processor Unavailable Interrupt */
+ START_EXCEPTION(ap_unavailable);
+ NORMAL_EXCEPTION_PROLOG(0xf20, PROLOG_ADDITION_NONE)
+ EXCEPTION_COMMON(0xf20, PACA_EXGEN, INTS_DISABLE)
+ bl .save_nvgprs
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .unknown_exception
+ b .ret_from_except
+
+/* Debug exception as a critical interrupt*/
+ START_EXCEPTION(debug_crit);
+ CRIT_EXCEPTION_PROLOG(0xd00, PROLOG_ADDITION_2REGS)
+
+ /*
+ * If there is a single step or branch-taken exception in an
+ * exception entry sequence, it was probably meant to apply to
+ * the code where the exception occurred (since exception entry
+ * doesn't turn off DE automatically). We simulate the effect
+ * of turning off DE on entry to an exception handler by turning
+ * off DE in the CSRR1 value and clearing the debug status.
+ */
+
+ mfspr r14,SPRN_DBSR /* check single-step/branch taken */
+ andis. r15,r14,DBSR_IC@h
+ beq+ 1f
+
+ LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
+ LOAD_REG_IMMEDIATE(r15,interrupt_end_book3e)
+ cmpld cr0,r10,r14
+ cmpld cr1,r10,r15
+ blt+ cr0,1f
+ bge+ cr1,1f
+
+ /* here it looks like we got an inappropriate debug exception. */
+ lis r14,DBSR_IC@h /* clear the IC event */
+ rlwinm r11,r11,0,~MSR_DE /* clear DE in the CSRR1 value */
+ mtspr SPRN_DBSR,r14
+ mtspr SPRN_CSRR1,r11
+ lwz r10,PACA_EXCRIT+EX_CR(r13) /* restore registers */
+ ld r1,PACA_EXCRIT+EX_R1(r13)
+ ld r14,PACA_EXCRIT+EX_R14(r13)
+ ld r15,PACA_EXCRIT+EX_R15(r13)
+ mtcr r10
+ ld r10,PACA_EXCRIT+EX_R10(r13) /* restore registers */
+ ld r11,PACA_EXCRIT+EX_R11(r13)
+ mfspr r13,SPRN_SPRG_CRIT_SCRATCH
+ rfci
+
+ /* Normal debug exception */
+ /* XXX We only handle coming from userspace for now since we can't
+ * quite save properly an interrupted kernel state yet
+ */
+1: andi. r14,r11,MSR_PR; /* check for userspace again */
+ beq kernel_dbg_exc; /* if from kernel mode */
+
+ /* Now we mash up things to make it look like we are coming on a
+ * normal exception
+ */
+ mfspr r15,SPRN_SPRG_CRIT_SCRATCH
+ mtspr SPRN_SPRG_GEN_SCRATCH,r15
+ mfspr r14,SPRN_DBSR
+ EXCEPTION_COMMON(0xd00, PACA_EXCRIT, INTS_DISABLE)
+ std r14,_DSISR(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ mr r4,r14
+ ld r14,PACA_EXCRIT+EX_R14(r13)
+ ld r15,PACA_EXCRIT+EX_R15(r13)
+ bl .save_nvgprs
+ bl .DebugException
+ b .ret_from_except
+
+kernel_dbg_exc:
+ b . /* NYI */
+
+/* Debug exception as a debug interrupt*/
+ START_EXCEPTION(debug_debug);
+ DBG_EXCEPTION_PROLOG(0xd08, PROLOG_ADDITION_2REGS)
+
+ /*
+ * If there is a single step or branch-taken exception in an
+ * exception entry sequence, it was probably meant to apply to
+ * the code where the exception occurred (since exception entry
+ * doesn't turn off DE automatically). We simulate the effect
+ * of turning off DE on entry to an exception handler by turning
+ * off DE in the DSRR1 value and clearing the debug status.
+ */
+
+ mfspr r14,SPRN_DBSR /* check single-step/branch taken */
+ andis. r15,r14,DBSR_IC@h
+ beq+ 1f
+
+ LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
+ LOAD_REG_IMMEDIATE(r15,interrupt_end_book3e)
+ cmpld cr0,r10,r14
+ cmpld cr1,r10,r15
+ blt+ cr0,1f
+ bge+ cr1,1f
+
+ /* here it looks like we got an inappropriate debug exception. */
+ lis r14,DBSR_IC@h /* clear the IC event */
+ rlwinm r11,r11,0,~MSR_DE /* clear DE in the DSRR1 value */
+ mtspr SPRN_DBSR,r14
+ mtspr SPRN_DSRR1,r11
+ lwz r10,PACA_EXDBG+EX_CR(r13) /* restore registers */
+ ld r1,PACA_EXDBG+EX_R1(r13)
+ ld r14,PACA_EXDBG+EX_R14(r13)
+ ld r15,PACA_EXDBG+EX_R15(r13)
+ mtcr r10
+ ld r10,PACA_EXDBG+EX_R10(r13) /* restore registers */
+ ld r11,PACA_EXDBG+EX_R11(r13)
+ mfspr r13,SPRN_SPRG_DBG_SCRATCH
+ rfdi
+
+ /* Normal debug exception */
+ /* XXX We only handle coming from userspace for now since we can't
+ * quite save properly an interrupted kernel state yet
+ */
+1: andi. r14,r11,MSR_PR; /* check for userspace again */
+ beq kernel_dbg_exc; /* if from kernel mode */
+
+ /* Now we mash up things to make it look like we are coming on a
+ * normal exception
+ */
+ mfspr r15,SPRN_SPRG_DBG_SCRATCH
+ mtspr SPRN_SPRG_GEN_SCRATCH,r15
+ mfspr r14,SPRN_DBSR
+ EXCEPTION_COMMON(0xd08, PACA_EXDBG, INTS_DISABLE)
+ std r14,_DSISR(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ mr r4,r14
+ ld r14,PACA_EXDBG+EX_R14(r13)
+ ld r15,PACA_EXDBG+EX_R15(r13)
+ bl .save_nvgprs
+ bl .DebugException
+ b .ret_from_except
+
+ START_EXCEPTION(perfmon);
+ NORMAL_EXCEPTION_PROLOG(0x260, PROLOG_ADDITION_NONE)
+ EXCEPTION_COMMON(0x260, PACA_EXGEN, INTS_DISABLE)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .performance_monitor_exception
+ b .ret_from_except_lite
+
+/* Doorbell interrupt */
+ MASKABLE_EXCEPTION(0x280, doorbell, .doorbell_exception, ACK_NONE)
+
+/* Doorbell critical Interrupt */
+ START_EXCEPTION(doorbell_crit);
+ CRIT_EXCEPTION_PROLOG(0x2a0, PROLOG_ADDITION_NONE)
+// EXCEPTION_COMMON(0x2a0, PACA_EXCRIT, INTS_DISABLE)
+// bl special_reg_save_crit
+// CHECK_NAPPING();
+// addi r3,r1,STACK_FRAME_OVERHEAD
+// bl .doorbell_critical_exception
+// b ret_from_crit_except
+ b .
+
+/* Guest Doorbell */
+ MASKABLE_EXCEPTION(0x2c0, guest_doorbell, .unknown_exception, ACK_NONE)
+
+/* Guest Doorbell critical Interrupt */
+ START_EXCEPTION(guest_doorbell_crit);
+ CRIT_EXCEPTION_PROLOG(0x2e0, PROLOG_ADDITION_NONE)
+// EXCEPTION_COMMON(0x2e0, PACA_EXCRIT, INTS_DISABLE)
+// bl special_reg_save_crit
+// CHECK_NAPPING();
+// addi r3,r1,STACK_FRAME_OVERHEAD
+// bl .guest_doorbell_critical_exception
+// b ret_from_crit_except
+ b .
+
+/* Hypervisor call */
+ START_EXCEPTION(hypercall);
+ NORMAL_EXCEPTION_PROLOG(0x310, PROLOG_ADDITION_NONE)
+ EXCEPTION_COMMON(0x310, PACA_EXGEN, INTS_KEEP)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .save_nvgprs
+ INTS_RESTORE_HARD
+ bl .unknown_exception
+ b .ret_from_except
+
+/* Embedded Hypervisor priviledged */
+ START_EXCEPTION(ehpriv);
+ NORMAL_EXCEPTION_PROLOG(0x320, PROLOG_ADDITION_NONE)
+ EXCEPTION_COMMON(0x320, PACA_EXGEN, INTS_KEEP)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .save_nvgprs
+ INTS_RESTORE_HARD
+ bl .unknown_exception
+ b .ret_from_except
+
+/*
+ * An interrupt came in while soft-disabled; We mark paca->irq_happened
+ * accordingly and if the interrupt is level sensitive, we hard disable
+ */
+
+masked_interrupt_book3e_0x500:
+ /* XXX When adding support for EPR, use PACA_IRQ_EE_EDGE */
+ li r11,PACA_IRQ_EE
+ b masked_interrupt_book3e_full_mask
+
+masked_interrupt_book3e_0x900:
+ ACK_DEC(r11);
+ li r11,PACA_IRQ_DEC
+ b masked_interrupt_book3e_no_mask
+masked_interrupt_book3e_0x980:
+ ACK_FIT(r11);
+ li r11,PACA_IRQ_DEC
+ b masked_interrupt_book3e_no_mask
+masked_interrupt_book3e_0x280:
+masked_interrupt_book3e_0x2c0:
+ li r11,PACA_IRQ_DBELL
+ b masked_interrupt_book3e_no_mask
+
+masked_interrupt_book3e_no_mask:
+ mtcr r10
+ lbz r10,PACAIRQHAPPENED(r13)
+ or r10,r10,r11
+ stb r10,PACAIRQHAPPENED(r13)
+ b 1f
+masked_interrupt_book3e_full_mask:
+ mtcr r10
+ lbz r10,PACAIRQHAPPENED(r13)
+ or r10,r10,r11
+ stb r10,PACAIRQHAPPENED(r13)
+ mfspr r10,SPRN_SRR1
+ rldicl r11,r10,48,1 /* clear MSR_EE */
+ rotldi r10,r11,16
+ mtspr SPRN_SRR1,r10
+1: ld r10,PACA_EXGEN+EX_R10(r13);
+ ld r11,PACA_EXGEN+EX_R11(r13);
+ mfspr r13,SPRN_SPRG_GEN_SCRATCH;
+ rfi
+ b .
+/*
+ * Called from arch_local_irq_enable when an interrupt needs
+ * to be resent. r3 contains either 0x500,0x900,0x260 or 0x280
+ * to indicate the kind of interrupt. MSR:EE is already off.
+ * We generate a stackframe like if a real interrupt had happened.
+ *
+ * Note: While MSR:EE is off, we need to make sure that _MSR
+ * in the generated frame has EE set to 1 or the exception
+ * handler will not properly re-enable them.
+ */
+_GLOBAL(__replay_interrupt)
+ /* We are going to jump to the exception common code which
+ * will retrieve various register values from the PACA which
+ * we don't give a damn about.
+ */
+ mflr r10
+ mfmsr r11
+ mfcr r4
+ mtspr SPRN_SPRG_GEN_SCRATCH,r13;
+ std r1,PACA_EXGEN+EX_R1(r13);
+ stw r4,PACA_EXGEN+EX_CR(r13);
+ ori r11,r11,MSR_EE
+ subi r1,r1,INT_FRAME_SIZE;
+ cmpwi cr0,r3,0x500
+ beq exc_0x500_common
+ cmpwi cr0,r3,0x900
+ beq exc_0x900_common
+ cmpwi cr0,r3,0x280
+ beq exc_0x280_common
+ blr
+
+
+/*
+ * This is called from 0x300 and 0x400 handlers after the prologs with
+ * r14 and r15 containing the fault address and error code, with the
+ * original values stashed away in the PACA
+ */
+storage_fault_common:
+ std r14,_DAR(r1)
+ std r15,_DSISR(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ mr r4,r14
+ mr r5,r15
+ ld r14,PACA_EXGEN+EX_R14(r13)
+ ld r15,PACA_EXGEN+EX_R15(r13)
+ bl .do_page_fault
+ cmpdi r3,0
+ bne- 1f
+ b .ret_from_except_lite
+1: bl .save_nvgprs
+ mr r5,r3
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ ld r4,_DAR(r1)
+ bl .bad_page_fault
+ b .ret_from_except
+
+/*
+ * Alignment exception doesn't fit entirely in the 0x100 bytes so it
+ * continues here.
+ */
+alignment_more:
+ std r14,_DAR(r1)
+ std r15,_DSISR(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ ld r14,PACA_EXGEN+EX_R14(r13)
+ ld r15,PACA_EXGEN+EX_R15(r13)
+ bl .save_nvgprs
+ INTS_RESTORE_HARD
+ bl .alignment_exception
+ b .ret_from_except
+
+/*
+ * We branch here from entry_64.S for the last stage of the exception
+ * return code path. MSR:EE is expected to be off at that point
+ */
+_GLOBAL(exception_return_book3e)
+ b 1f
+
+/* This is the return from load_up_fpu fast path which could do with
+ * less GPR restores in fact, but for now we have a single return path
+ */
+ .globl fast_exception_return
+fast_exception_return:
+ wrteei 0
+1: mr r0,r13
+ ld r10,_MSR(r1)
+ REST_4GPRS(2, r1)
+ andi. r6,r10,MSR_PR
+ REST_2GPRS(6, r1)
+ beq 1f
+ ACCOUNT_CPU_USER_EXIT(r10, r11)
+ ld r0,GPR13(r1)
+
+1: stdcx. r0,0,r1 /* to clear the reservation */
+
+ ld r8,_CCR(r1)
+ ld r9,_LINK(r1)
+ ld r10,_CTR(r1)
+ ld r11,_XER(r1)
+ mtcr r8
+ mtlr r9
+ mtctr r10
+ mtxer r11
+ REST_2GPRS(8, r1)
+ ld r10,GPR10(r1)
+ ld r11,GPR11(r1)
+ ld r12,GPR12(r1)
+ mtspr SPRN_SPRG_GEN_SCRATCH,r0
+
+ std r10,PACA_EXGEN+EX_R10(r13);
+ std r11,PACA_EXGEN+EX_R11(r13);
+ ld r10,_NIP(r1)
+ ld r11,_MSR(r1)
+ ld r0,GPR0(r1)
+ ld r1,GPR1(r1)
+ mtspr SPRN_SRR0,r10
+ mtspr SPRN_SRR1,r11
+ ld r10,PACA_EXGEN+EX_R10(r13)
+ ld r11,PACA_EXGEN+EX_R11(r13)
+ mfspr r13,SPRN_SPRG_GEN_SCRATCH
+ rfi
+
+/*
+ * Trampolines used when spotting a bad kernel stack pointer in
+ * the exception entry code.
+ *
+ * TODO: move some bits like SRR0 read to trampoline, pass PACA
+ * index around, etc... to handle crit & mcheck
+ */
+BAD_STACK_TRAMPOLINE(0x000)
+BAD_STACK_TRAMPOLINE(0x100)
+BAD_STACK_TRAMPOLINE(0x200)
+BAD_STACK_TRAMPOLINE(0x260)
+BAD_STACK_TRAMPOLINE(0x280)
+BAD_STACK_TRAMPOLINE(0x2a0)
+BAD_STACK_TRAMPOLINE(0x2c0)
+BAD_STACK_TRAMPOLINE(0x2e0)
+BAD_STACK_TRAMPOLINE(0x300)
+BAD_STACK_TRAMPOLINE(0x310)
+BAD_STACK_TRAMPOLINE(0x320)
+BAD_STACK_TRAMPOLINE(0x400)
+BAD_STACK_TRAMPOLINE(0x500)
+BAD_STACK_TRAMPOLINE(0x600)
+BAD_STACK_TRAMPOLINE(0x700)
+BAD_STACK_TRAMPOLINE(0x800)
+BAD_STACK_TRAMPOLINE(0x900)
+BAD_STACK_TRAMPOLINE(0x980)
+BAD_STACK_TRAMPOLINE(0x9f0)
+BAD_STACK_TRAMPOLINE(0xa00)
+BAD_STACK_TRAMPOLINE(0xb00)
+BAD_STACK_TRAMPOLINE(0xc00)
+BAD_STACK_TRAMPOLINE(0xd00)
+BAD_STACK_TRAMPOLINE(0xd08)
+BAD_STACK_TRAMPOLINE(0xe00)
+BAD_STACK_TRAMPOLINE(0xf00)
+BAD_STACK_TRAMPOLINE(0xf20)
+
+ .globl bad_stack_book3e
+bad_stack_book3e:
+ /* XXX: Needs to make SPRN_SPRG_GEN depend on exception type */
+ mfspr r10,SPRN_SRR0; /* read SRR0 before touching stack */
+ ld r1,PACAEMERGSP(r13)
+ subi r1,r1,64+INT_FRAME_SIZE
+ std r10,_NIP(r1)
+ std r11,_MSR(r1)
+ ld r10,PACA_EXGEN+EX_R1(r13) /* FIXME for crit & mcheck */
+ lwz r11,PACA_EXGEN+EX_CR(r13) /* FIXME for crit & mcheck */
+ std r10,GPR1(r1)
+ std r11,_CCR(r1)
+ mfspr r10,SPRN_DEAR
+ mfspr r11,SPRN_ESR
+ std r10,_DAR(r1)
+ std r11,_DSISR(r1)
+ std r0,GPR0(r1); /* save r0 in stackframe */ \
+ std r2,GPR2(r1); /* save r2 in stackframe */ \
+ SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
+ SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \
+ std r9,GPR9(r1); /* save r9 in stackframe */ \
+ ld r3,PACA_EXGEN+EX_R10(r13);/* get back r10 */ \
+ ld r4,PACA_EXGEN+EX_R11(r13);/* get back r11 */ \
+ mfspr r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 XXX can be wrong */ \
+ std r3,GPR10(r1); /* save r10 to stackframe */ \
+ std r4,GPR11(r1); /* save r11 to stackframe */ \
+ std r12,GPR12(r1); /* save r12 in stackframe */ \
+ std r5,GPR13(r1); /* save it to stackframe */ \
+ mflr r10
+ mfctr r11
+ mfxer r12
+ std r10,_LINK(r1)
+ std r11,_CTR(r1)
+ std r12,_XER(r1)
+ SAVE_10GPRS(14,r1)
+ SAVE_8GPRS(24,r1)
+ lhz r12,PACA_TRAP_SAVE(r13)
+ std r12,_TRAP(r1)
+ addi r11,r1,INT_FRAME_SIZE
+ std r11,0(r1)
+ li r12,0
+ std r12,0(r11)
+ ld r2,PACATOC(r13)
+1: addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .kernel_bad_stack
+ b 1b
+
+/*
+ * Setup the initial TLB for a core. This current implementation
+ * assume that whatever we are running off will not conflict with
+ * the new mapping at PAGE_OFFSET.
+ */
+_GLOBAL(initial_tlb_book3e)
+
+ /* Look for the first TLB with IPROT set */
+ mfspr r4,SPRN_TLB0CFG
+ andi. r3,r4,TLBnCFG_IPROT
+ lis r3,MAS0_TLBSEL(0)@h
+ bne found_iprot
+
+ mfspr r4,SPRN_TLB1CFG
+ andi. r3,r4,TLBnCFG_IPROT
+ lis r3,MAS0_TLBSEL(1)@h
+ bne found_iprot
+
+ mfspr r4,SPRN_TLB2CFG
+ andi. r3,r4,TLBnCFG_IPROT
+ lis r3,MAS0_TLBSEL(2)@h
+ bne found_iprot
+
+ lis r3,MAS0_TLBSEL(3)@h
+ mfspr r4,SPRN_TLB3CFG
+ /* fall through */
+
+found_iprot:
+ andi. r5,r4,TLBnCFG_HES
+ bne have_hes
+
+ mflr r8 /* save LR */
+/* 1. Find the index of the entry we're executing in
+ *
+ * r3 = MAS0_TLBSEL (for the iprot array)
+ * r4 = SPRN_TLBnCFG
+ */
+ bl invstr /* Find our address */
+invstr: mflr r6 /* Make it accessible */
+ mfmsr r7
+ rlwinm r5,r7,27,31,31 /* extract MSR[IS] */
+ mfspr r7,SPRN_PID
+ slwi r7,r7,16
+ or r7,r7,r5
+ mtspr SPRN_MAS6,r7
+ tlbsx 0,r6 /* search MSR[IS], SPID=PID */
+
+ mfspr r3,SPRN_MAS0
+ rlwinm r5,r3,16,20,31 /* Extract MAS0(Entry) */
+
+ mfspr r7,SPRN_MAS1 /* Insure IPROT set */
+ oris r7,r7,MAS1_IPROT@h
+ mtspr SPRN_MAS1,r7
+ tlbwe
+
+/* 2. Invalidate all entries except the entry we're executing in
+ *
+ * r3 = MAS0 w/TLBSEL & ESEL for the entry we are running in
+ * r4 = SPRN_TLBnCFG
+ * r5 = ESEL of entry we are running in
+ */
+ andi. r4,r4,TLBnCFG_N_ENTRY /* Extract # entries */
+ li r6,0 /* Set Entry counter to 0 */
+1: mr r7,r3 /* Set MAS0(TLBSEL) */
+ rlwimi r7,r6,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r6) */
+ mtspr SPRN_MAS0,r7
+ tlbre
+ mfspr r7,SPRN_MAS1
+ rlwinm r7,r7,0,2,31 /* Clear MAS1 Valid and IPROT */
+ cmpw r5,r6
+ beq skpinv /* Dont update the current execution TLB */
+ mtspr SPRN_MAS1,r7
+ tlbwe
+ isync
+skpinv: addi r6,r6,1 /* Increment */
+ cmpw r6,r4 /* Are we done? */
+ bne 1b /* If not, repeat */
+
+ /* Invalidate all TLBs */
+ PPC_TLBILX_ALL(0,0)
+ sync
+ isync
+
+/* 3. Setup a temp mapping and jump to it
+ *
+ * r3 = MAS0 w/TLBSEL & ESEL for the entry we are running in
+ * r5 = ESEL of entry we are running in
+ */
+ andi. r7,r5,0x1 /* Find an entry not used and is non-zero */
+ addi r7,r7,0x1
+ mr r4,r3 /* Set MAS0(TLBSEL) = 1 */
+ mtspr SPRN_MAS0,r4
+ tlbre
+
+ rlwimi r4,r7,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r7) */
+ mtspr SPRN_MAS0,r4
+
+ mfspr r7,SPRN_MAS1
+ xori r6,r7,MAS1_TS /* Setup TMP mapping in the other Address space */
+ mtspr SPRN_MAS1,r6
+
+ tlbwe
+
+ mfmsr r6
+ xori r6,r6,MSR_IS
+ mtspr SPRN_SRR1,r6
+ bl 1f /* Find our address */
+1: mflr r6
+ addi r6,r6,(2f - 1b)
+ mtspr SPRN_SRR0,r6
+ rfi
+2:
+
+/* 4. Clear out PIDs & Search info
+ *
+ * r3 = MAS0 w/TLBSEL & ESEL for the entry we started in
+ * r4 = MAS0 w/TLBSEL & ESEL for the temp mapping
+ * r5 = MAS3
+ */
+ li r6,0
+ mtspr SPRN_MAS6,r6
+ mtspr SPRN_PID,r6
+
+/* 5. Invalidate mapping we started in
+ *
+ * r3 = MAS0 w/TLBSEL & ESEL for the entry we started in
+ * r4 = MAS0 w/TLBSEL & ESEL for the temp mapping
+ * r5 = MAS3
+ */
+ mtspr SPRN_MAS0,r3
+ tlbre
+ mfspr r6,SPRN_MAS1
+ rlwinm r6,r6,0,2,0 /* clear IPROT */
+ mtspr SPRN_MAS1,r6
+ tlbwe
+
+ /* Invalidate TLB1 */
+ PPC_TLBILX_ALL(0,0)
+ sync
+ isync
+
+/* The mapping only needs to be cache-coherent on SMP */
+#ifdef CONFIG_SMP
+#define M_IF_SMP MAS2_M
+#else
+#define M_IF_SMP 0
+#endif
+
+/* 6. Setup KERNELBASE mapping in TLB[0]
+ *
+ * r3 = MAS0 w/TLBSEL & ESEL for the entry we started in
+ * r4 = MAS0 w/TLBSEL & ESEL for the temp mapping
+ * r5 = MAS3
+ */
+ rlwinm r3,r3,0,16,3 /* clear ESEL */
+ mtspr SPRN_MAS0,r3
+ lis r6,(MAS1_VALID|MAS1_IPROT)@h
+ ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_1GB))@l
+ mtspr SPRN_MAS1,r6
+
+ LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET | M_IF_SMP)
+ mtspr SPRN_MAS2,r6
+
+ rlwinm r5,r5,0,0,25
+ ori r5,r5,MAS3_SR | MAS3_SW | MAS3_SX
+ mtspr SPRN_MAS3,r5
+ li r5,-1
+ rlwinm r5,r5,0,0,25
+
+ tlbwe
+
+/* 7. Jump to KERNELBASE mapping
+ *
+ * r4 = MAS0 w/TLBSEL & ESEL for the temp mapping
+ */
+ /* Now we branch the new virtual address mapped by this entry */
+ LOAD_REG_IMMEDIATE(r6,2f)
+ lis r7,MSR_KERNEL@h
+ ori r7,r7,MSR_KERNEL@l
+ mtspr SPRN_SRR0,r6
+ mtspr SPRN_SRR1,r7
+ rfi /* start execution out of TLB1[0] entry */
+2:
+
+/* 8. Clear out the temp mapping
+ *
+ * r4 = MAS0 w/TLBSEL & ESEL for the entry we are running in
+ */
+ mtspr SPRN_MAS0,r4
+ tlbre
+ mfspr r5,SPRN_MAS1
+ rlwinm r5,r5,0,2,0 /* clear IPROT */
+ mtspr SPRN_MAS1,r5
+ tlbwe
+
+ /* Invalidate TLB1 */
+ PPC_TLBILX_ALL(0,0)
+ sync
+ isync
+
+ /* We translate LR and return */
+ tovirt(r8,r8)
+ mtlr r8
+ blr
+
+have_hes:
+ /* Setup MAS 0,1,2,3 and 7 for tlbwe of a 1G entry that maps the
+ * kernel linear mapping. We also set MAS8 once for all here though
+ * that will have to be made dependent on whether we are running under
+ * a hypervisor I suppose.
+ */
+
+ /* BEWARE, MAGIC
+ * This code is called as an ordinary function on the boot CPU. But to
+ * avoid duplication, this code is also used in SCOM bringup of
+ * secondary CPUs. We read the code between the initial_tlb_code_start
+ * and initial_tlb_code_end labels one instruction at a time and RAM it
+ * into the new core via SCOM. That doesn't process branches, so there
+ * must be none between those two labels. It also means if this code
+ * ever takes any parameters, the SCOM code must also be updated to
+ * provide them.
+ */
+ .globl a2_tlbinit_code_start
+a2_tlbinit_code_start:
+
+ ori r11,r3,MAS0_WQ_ALLWAYS
+ oris r11,r11,MAS0_ESEL(3)@h /* Use way 3: workaround A2 erratum 376 */
+ mtspr SPRN_MAS0,r11
+ lis r3,(MAS1_VALID | MAS1_IPROT)@h
+ ori r3,r3,BOOK3E_PAGESZ_1GB << MAS1_TSIZE_SHIFT
+ mtspr SPRN_MAS1,r3
+ LOAD_REG_IMMEDIATE(r3, PAGE_OFFSET | MAS2_M)
+ mtspr SPRN_MAS2,r3
+ li r3,MAS3_SR | MAS3_SW | MAS3_SX
+ mtspr SPRN_MAS7_MAS3,r3
+ li r3,0
+ mtspr SPRN_MAS8,r3
+
+ /* Write the TLB entry */
+ tlbwe
+
+ .globl a2_tlbinit_after_linear_map
+a2_tlbinit_after_linear_map:
+
+ /* Now we branch the new virtual address mapped by this entry */
+ LOAD_REG_IMMEDIATE(r3,1f)
+ mtctr r3
+ bctr
+
+1: /* We are now running at PAGE_OFFSET, clean the TLB of everything
+ * else (including IPROTed things left by firmware)
+ * r4 = TLBnCFG
+ * r3 = current address (more or less)
+ */
+
+ li r5,0
+ mtspr SPRN_MAS6,r5
+ tlbsx 0,r3
+
+ rlwinm r9,r4,0,TLBnCFG_N_ENTRY
+ rlwinm r10,r4,8,0xff
+ addi r10,r10,-1 /* Get inner loop mask */
+
+ li r3,1
+
+ mfspr r5,SPRN_MAS1
+ rlwinm r5,r5,0,(~(MAS1_VALID|MAS1_IPROT))
+
+ mfspr r6,SPRN_MAS2
+ rldicr r6,r6,0,51 /* Extract EPN */
+
+ mfspr r7,SPRN_MAS0
+ rlwinm r7,r7,0,0xffff0fff /* Clear HES and WQ */
+
+ rlwinm r8,r7,16,0xfff /* Extract ESEL */
+
+2: add r4,r3,r8
+ and r4,r4,r10
+
+ rlwimi r7,r4,16,MAS0_ESEL_MASK
+
+ mtspr SPRN_MAS0,r7
+ mtspr SPRN_MAS1,r5
+ mtspr SPRN_MAS2,r6
+ tlbwe
+
+ addi r3,r3,1
+ and. r4,r3,r10
+
+ bne 3f
+ addis r6,r6,(1<<30)@h
+3:
+ cmpw r3,r9
+ blt 2b
+
+ .globl a2_tlbinit_after_iprot_flush
+a2_tlbinit_after_iprot_flush:
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_WSP
+ /* Now establish early debug mappings if applicable */
+ /* Restore the MAS0 we used for linear mapping load */
+ mtspr SPRN_MAS0,r11
+
+ lis r3,(MAS1_VALID | MAS1_IPROT)@h
+ ori r3,r3,(BOOK3E_PAGESZ_4K << MAS1_TSIZE_SHIFT)
+ mtspr SPRN_MAS1,r3
+ LOAD_REG_IMMEDIATE(r3, WSP_UART_VIRT | MAS2_I | MAS2_G)
+ mtspr SPRN_MAS2,r3
+ LOAD_REG_IMMEDIATE(r3, WSP_UART_PHYS | MAS3_SR | MAS3_SW)
+ mtspr SPRN_MAS7_MAS3,r3
+ /* re-use the MAS8 value from the linear mapping */
+ tlbwe
+#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */
+
+ PPC_TLBILX(0,0,0)
+ sync
+ isync
+
+ .globl a2_tlbinit_code_end
+a2_tlbinit_code_end:
+
+ /* We translate LR and return */
+ mflr r3
+ tovirt(r3,r3)
+ mtlr r3
+ blr
+
+/*
+ * Main entry (boot CPU, thread 0)
+ *
+ * We enter here from head_64.S, possibly after the prom_init trampoline
+ * with r3 and r4 already saved to r31 and 30 respectively and in 64 bits
+ * mode. Anything else is as it was left by the bootloader
+ *
+ * Initial requirements of this port:
+ *
+ * - Kernel loaded at 0 physical
+ * - A good lump of memory mapped 0:0 by UTLB entry 0
+ * - MSR:IS & MSR:DS set to 0
+ *
+ * Note that some of the above requirements will be relaxed in the future
+ * as the kernel becomes smarter at dealing with different initial conditions
+ * but for now you have to be careful
+ */
+_GLOBAL(start_initialization_book3e)
+ mflr r28
+
+ /* First, we need to setup some initial TLBs to map the kernel
+ * text, data and bss at PAGE_OFFSET. We don't have a real mode
+ * and always use AS 0, so we just set it up to match our link
+ * address and never use 0 based addresses.
+ */
+ bl .initial_tlb_book3e
+
+ /* Init global core bits */
+ bl .init_core_book3e
+
+ /* Init per-thread bits */
+ bl .init_thread_book3e
+
+ /* Return to common init code */
+ tovirt(r28,r28)
+ mtlr r28
+ blr
+
+
+/*
+ * Secondary core/processor entry
+ *
+ * This is entered for thread 0 of a secondary core, all other threads
+ * are expected to be stopped. It's similar to start_initialization_book3e
+ * except that it's generally entered from the holding loop in head_64.S
+ * after CPUs have been gathered by Open Firmware.
+ *
+ * We assume we are in 32 bits mode running with whatever TLB entry was
+ * set for us by the firmware or POR engine.
+ */
+_GLOBAL(book3e_secondary_core_init_tlb_set)
+ li r4,1
+ b .generic_secondary_smp_init
+
+_GLOBAL(book3e_secondary_core_init)
+ mflr r28
+
+ /* Do we need to setup initial TLB entry ? */
+ cmplwi r4,0
+ bne 2f
+
+ /* Setup TLB for this core */
+ bl .initial_tlb_book3e
+
+ /* We can return from the above running at a different
+ * address, so recalculate r2 (TOC)
+ */
+ bl .relative_toc
+
+ /* Init global core bits */
+2: bl .init_core_book3e
+
+ /* Init per-thread bits */
+3: bl .init_thread_book3e
+
+ /* Return to common init code at proper virtual address.
+ *
+ * Due to various previous assumptions, we know we entered this
+ * function at either the final PAGE_OFFSET mapping or using a
+ * 1:1 mapping at 0, so we don't bother doing a complicated check
+ * here, we just ensure the return address has the right top bits.
+ *
+ * Note that if we ever want to be smarter about where we can be
+ * started from, we have to be careful that by the time we reach
+ * the code below we may already be running at a different location
+ * than the one we were called from since initial_tlb_book3e can
+ * have moved us already.
+ */
+ cmpdi cr0,r28,0
+ blt 1f
+ lis r3,PAGE_OFFSET@highest
+ sldi r3,r3,32
+ or r28,r28,r3
+1: mtlr r28
+ blr
+
+_GLOBAL(book3e_secondary_thread_init)
+ mflr r28
+ b 3b
+
+_STATIC(init_core_book3e)
+ /* Establish the interrupt vector base */
+ LOAD_REG_IMMEDIATE(r3, interrupt_base_book3e)
+ mtspr SPRN_IVPR,r3
+ sync
+ blr
+
+_STATIC(init_thread_book3e)
+ lis r3,(SPRN_EPCR_ICM | SPRN_EPCR_GICM)@h
+ mtspr SPRN_EPCR,r3
+
+ /* Make sure interrupts are off */
+ wrteei 0
+
+ /* disable all timers and clear out status */
+ li r3,0
+ mtspr SPRN_TCR,r3
+ mfspr r3,SPRN_TSR
+ mtspr SPRN_TSR,r3
+
+ blr
+
+_GLOBAL(__setup_base_ivors)
+ SET_IVOR(0, 0x020) /* Critical Input */
+ SET_IVOR(1, 0x000) /* Machine Check */
+ SET_IVOR(2, 0x060) /* Data Storage */
+ SET_IVOR(3, 0x080) /* Instruction Storage */
+ SET_IVOR(4, 0x0a0) /* External Input */
+ SET_IVOR(5, 0x0c0) /* Alignment */
+ SET_IVOR(6, 0x0e0) /* Program */
+ SET_IVOR(7, 0x100) /* FP Unavailable */
+ SET_IVOR(8, 0x120) /* System Call */
+ SET_IVOR(9, 0x140) /* Auxiliary Processor Unavailable */
+ SET_IVOR(10, 0x160) /* Decrementer */
+ SET_IVOR(11, 0x180) /* Fixed Interval Timer */
+ SET_IVOR(12, 0x1a0) /* Watchdog Timer */
+ SET_IVOR(13, 0x1c0) /* Data TLB Error */
+ SET_IVOR(14, 0x1e0) /* Instruction TLB Error */
+ SET_IVOR(15, 0x040) /* Debug */
+
+ sync
+
+ blr
+
+_GLOBAL(setup_perfmon_ivor)
+ SET_IVOR(35, 0x260) /* Performance Monitor */
+ blr
+
+_GLOBAL(setup_doorbell_ivors)
+ SET_IVOR(36, 0x280) /* Processor Doorbell */
+ SET_IVOR(37, 0x2a0) /* Processor Doorbell Crit */
+
+ /* Check MMUCFG[LPIDSIZE] to determine if we have category E.HV */
+ mfspr r10,SPRN_MMUCFG
+ rlwinm. r10,r10,0,MMUCFG_LPIDSIZE
+ beqlr
+
+ SET_IVOR(38, 0x2c0) /* Guest Processor Doorbell */
+ SET_IVOR(39, 0x2e0) /* Guest Processor Doorbell Crit/MC */
+ blr
+
+_GLOBAL(setup_ehv_ivors)
+ /*
+ * We may be running as a guest and lack E.HV even on a chip
+ * that normally has it.
+ */
+ mfspr r10,SPRN_MMUCFG
+ rlwinm. r10,r10,0,MMUCFG_LPIDSIZE
+ beqlr
+
+ SET_IVOR(40, 0x300) /* Embedded Hypervisor System Call */
+ SET_IVOR(41, 0x320) /* Embedded Hypervisor Privilege */
+ blr
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
new file mode 100644
index 00000000..8f880bc7
--- /dev/null
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -0,0 +1,1066 @@
+/*
+ * This file contains the 64-bit "server" PowerPC variant
+ * of the low level exception handling including exception
+ * vectors, exception return, part of the slb and stab
+ * handling and other fixed offset specific things.
+ *
+ * This file is meant to be #included from head_64.S due to
+ * position dependent assembly.
+ *
+ * Most of this originates from head_64.S and thus has the same
+ * copyright history.
+ *
+ */
+
+#include <asm/hw_irq.h>
+#include <asm/exception-64s.h>
+#include <asm/ptrace.h>
+
+/*
+ * We layout physical memory as follows:
+ * 0x0000 - 0x00ff : Secondary processor spin code
+ * 0x0100 - 0x2fff : pSeries Interrupt prologs
+ * 0x3000 - 0x5fff : interrupt support common interrupt prologs
+ * 0x6000 - 0x6fff : Initial (CPU0) segment table
+ * 0x7000 - 0x7fff : FWNMI data area
+ * 0x8000 - : Early init and support code
+ */
+
+/*
+ * This is the start of the interrupt handlers for pSeries
+ * This code runs with relocation off.
+ * Code from here to __end_interrupts gets copied down to real
+ * address 0x100 when we are running a relocatable kernel.
+ * Therefore any relative branches in this section must only
+ * branch to labels in this section.
+ */
+ . = 0x100
+ .globl __start_interrupts
+__start_interrupts:
+
+ .globl system_reset_pSeries;
+system_reset_pSeries:
+ HMT_MEDIUM;
+ SET_SCRATCH0(r13)
+#ifdef CONFIG_PPC_P7_NAP
+BEGIN_FTR_SECTION
+ /* Running native on arch 2.06 or later, check if we are
+ * waking up from nap. We only handle no state loss and
+ * supervisor state loss. We do -not- handle hypervisor
+ * state loss at this time.
+ */
+ mfspr r13,SPRN_SRR1
+ rlwinm. r13,r13,47-31,30,31
+ beq 9f
+
+ /* waking up from powersave (nap) state */
+ cmpwi cr1,r13,2
+ /* Total loss of HV state is fatal, we could try to use the
+ * PIR to locate a PACA, then use an emergency stack etc...
+ * but for now, let's just stay stuck here
+ */
+ bgt cr1,.
+ GET_PACA(r13)
+
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+ lbz r0,PACAPROCSTART(r13)
+ cmpwi r0,0x80
+ bne 1f
+ li r0,1
+ stb r0,PACAPROCSTART(r13)
+ b kvm_start_guest
+1:
+#endif
+
+ beq cr1,2f
+ b .power7_wakeup_noloss
+2: b .power7_wakeup_loss
+9:
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+#endif /* CONFIG_PPC_P7_NAP */
+ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD,
+ NOTEST, 0x100)
+
+ . = 0x200
+machine_check_pSeries_1:
+ /* This is moved out of line as it can be patched by FW, but
+ * some code path might still want to branch into the original
+ * vector
+ */
+ b machine_check_pSeries
+
+ . = 0x300
+ .globl data_access_pSeries
+data_access_pSeries:
+ HMT_MEDIUM
+ SET_SCRATCH0(r13)
+#ifndef CONFIG_POWER4_ONLY
+BEGIN_FTR_SECTION
+ b data_access_check_stab
+data_access_not_stab:
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB)
+#endif
+ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD,
+ KVMTEST, 0x300)
+
+ . = 0x380
+ .globl data_access_slb_pSeries
+data_access_slb_pSeries:
+ HMT_MEDIUM
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x380)
+ std r3,PACA_EXSLB+EX_R3(r13)
+ mfspr r3,SPRN_DAR
+#ifdef __DISABLED__
+ /* Keep that around for when we re-implement dynamic VSIDs */
+ cmpdi r3,0
+ bge slb_miss_user_pseries
+#endif /* __DISABLED__ */
+ mfspr r12,SPRN_SRR1
+#ifndef CONFIG_RELOCATABLE
+ b .slb_miss_realmode
+#else
+ /*
+ * We can't just use a direct branch to .slb_miss_realmode
+ * because the distance from here to there depends on where
+ * the kernel ends up being put.
+ */
+ mfctr r11
+ ld r10,PACAKBASE(r13)
+ LOAD_HANDLER(r10, .slb_miss_realmode)
+ mtctr r10
+ bctr
+#endif
+
+ STD_EXCEPTION_PSERIES(0x400, 0x400, instruction_access)
+
+ . = 0x480
+ .globl instruction_access_slb_pSeries
+instruction_access_slb_pSeries:
+ HMT_MEDIUM
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480)
+ std r3,PACA_EXSLB+EX_R3(r13)
+ mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
+#ifdef __DISABLED__
+ /* Keep that around for when we re-implement dynamic VSIDs */
+ cmpdi r3,0
+ bge slb_miss_user_pseries
+#endif /* __DISABLED__ */
+ mfspr r12,SPRN_SRR1
+#ifndef CONFIG_RELOCATABLE
+ b .slb_miss_realmode
+#else
+ mfctr r11
+ ld r10,PACAKBASE(r13)
+ LOAD_HANDLER(r10, .slb_miss_realmode)
+ mtctr r10
+ bctr
+#endif
+
+ /* We open code these as we can't have a ". = x" (even with
+ * x = "." within a feature section
+ */
+ . = 0x500;
+ .globl hardware_interrupt_pSeries;
+ .globl hardware_interrupt_hv;
+hardware_interrupt_pSeries:
+hardware_interrupt_hv:
+ BEGIN_FTR_SECTION
+ _MASKABLE_EXCEPTION_PSERIES(0x502, hardware_interrupt,
+ EXC_HV, SOFTEN_TEST_HV)
+ KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502)
+ FTR_SECTION_ELSE
+ _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt,
+ EXC_STD, SOFTEN_TEST_HV_201)
+ KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500)
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+
+ STD_EXCEPTION_PSERIES(0x600, 0x600, alignment)
+ KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x600)
+
+ STD_EXCEPTION_PSERIES(0x700, 0x700, program_check)
+ KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x700)
+
+ STD_EXCEPTION_PSERIES(0x800, 0x800, fp_unavailable)
+ KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x800)
+
+ MASKABLE_EXCEPTION_PSERIES(0x900, 0x900, decrementer)
+ MASKABLE_EXCEPTION_HV(0x980, 0x982, decrementer)
+
+ STD_EXCEPTION_PSERIES(0xa00, 0xa00, trap_0a)
+ KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xa00)
+
+ STD_EXCEPTION_PSERIES(0xb00, 0xb00, trap_0b)
+ KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xb00)
+
+ . = 0xc00
+ .globl system_call_pSeries
+system_call_pSeries:
+ HMT_MEDIUM
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+ SET_SCRATCH0(r13)
+ GET_PACA(r13)
+ std r9,PACA_EXGEN+EX_R9(r13)
+ std r10,PACA_EXGEN+EX_R10(r13)
+ mfcr r9
+ KVMTEST(0xc00)
+ GET_SCRATCH0(r13)
+#endif
+BEGIN_FTR_SECTION
+ cmpdi r0,0x1ebe
+ beq- 1f
+END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
+ mr r9,r13
+ GET_PACA(r13)
+ mfspr r11,SPRN_SRR0
+ mfspr r12,SPRN_SRR1
+ ld r10,PACAKBASE(r13)
+ LOAD_HANDLER(r10, system_call_entry)
+ mtspr SPRN_SRR0,r10
+ ld r10,PACAKMSR(r13)
+ mtspr SPRN_SRR1,r10
+ rfid
+ b . /* prevent speculative execution */
+
+ KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xc00)
+
+/* Fast LE/BE switch system call */
+1: mfspr r12,SPRN_SRR1
+ xori r12,r12,MSR_LE
+ mtspr SPRN_SRR1,r12
+ rfid /* return to userspace */
+ b .
+
+ STD_EXCEPTION_PSERIES(0xd00, 0xd00, single_step)
+ KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xd00)
+
+ /* At 0xe??? we have a bunch of hypervisor exceptions, we branch
+ * out of line to handle them
+ */
+ . = 0xe00
+ b h_data_storage_hv
+ . = 0xe20
+ b h_instr_storage_hv
+ . = 0xe40
+ b emulation_assist_hv
+ . = 0xe50
+ b hmi_exception_hv
+ . = 0xe60
+ b hmi_exception_hv
+
+ /* We need to deal with the Altivec unavailable exception
+ * here which is at 0xf20, thus in the middle of the
+ * prolog code of the PerformanceMonitor one. A little
+ * trickery is thus necessary
+ */
+performance_monitor_pSeries_1:
+ . = 0xf00
+ b performance_monitor_pSeries
+
+altivec_unavailable_pSeries_1:
+ . = 0xf20
+ b altivec_unavailable_pSeries
+
+vsx_unavailable_pSeries_1:
+ . = 0xf40
+ b vsx_unavailable_pSeries
+
+#ifdef CONFIG_CBE_RAS
+ STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error)
+ KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1202)
+#endif /* CONFIG_CBE_RAS */
+
+ STD_EXCEPTION_PSERIES(0x1300, 0x1300, instruction_breakpoint)
+ KVM_HANDLER_PR_SKIP(PACA_EXGEN, EXC_STD, 0x1300)
+
+#ifdef CONFIG_CBE_RAS
+ STD_EXCEPTION_HV(0x1600, 0x1602, cbe_maintenance)
+ KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1602)
+#endif /* CONFIG_CBE_RAS */
+
+ STD_EXCEPTION_PSERIES(0x1700, 0x1700, altivec_assist)
+ KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x1700)
+
+#ifdef CONFIG_CBE_RAS
+ STD_EXCEPTION_HV(0x1800, 0x1802, cbe_thermal)
+ KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1802)
+#endif /* CONFIG_CBE_RAS */
+
+ . = 0x3000
+
+/*** Out of line interrupts support ***/
+
+ /* moved from 0x200 */
+machine_check_pSeries:
+ .globl machine_check_fwnmi
+machine_check_fwnmi:
+ HMT_MEDIUM
+ SET_SCRATCH0(r13) /* save r13 */
+ EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common,
+ EXC_STD, KVMTEST, 0x200)
+ KVM_HANDLER_SKIP(PACA_EXMC, EXC_STD, 0x200)
+
+#ifndef CONFIG_POWER4_ONLY
+ /* moved from 0x300 */
+data_access_check_stab:
+ GET_PACA(r13)
+ std r9,PACA_EXSLB+EX_R9(r13)
+ std r10,PACA_EXSLB+EX_R10(r13)
+ mfspr r10,SPRN_DAR
+ mfspr r9,SPRN_DSISR
+ srdi r10,r10,60
+ rlwimi r10,r9,16,0x20
+#ifdef CONFIG_KVM_BOOK3S_PR
+ lbz r9,HSTATE_IN_GUEST(r13)
+ rlwimi r10,r9,8,0x300
+#endif
+ mfcr r9
+ cmpwi r10,0x2c
+ beq do_stab_bolted_pSeries
+ mtcrf 0x80,r9
+ ld r9,PACA_EXSLB+EX_R9(r13)
+ ld r10,PACA_EXSLB+EX_R10(r13)
+ b data_access_not_stab
+do_stab_bolted_pSeries:
+ std r11,PACA_EXSLB+EX_R11(r13)
+ std r12,PACA_EXSLB+EX_R12(r13)
+ GET_SCRATCH0(r10)
+ std r10,PACA_EXSLB+EX_R13(r13)
+ EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD)
+#endif /* CONFIG_POWER4_ONLY */
+
+ KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300)
+ KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380)
+ KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x400)
+ KVM_HANDLER_PR(PACA_EXSLB, EXC_STD, 0x480)
+ KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x900)
+ KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x982)
+
+ .align 7
+ /* moved from 0xe00 */
+ STD_EXCEPTION_HV(., 0xe02, h_data_storage)
+ KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0xe02)
+ STD_EXCEPTION_HV(., 0xe22, h_instr_storage)
+ KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe22)
+ STD_EXCEPTION_HV(., 0xe42, emulation_assist)
+ KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe42)
+ STD_EXCEPTION_HV(., 0xe62, hmi_exception) /* need to flush cache ? */
+ KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe62)
+
+ /* moved from 0xf00 */
+ STD_EXCEPTION_PSERIES(., 0xf00, performance_monitor)
+ KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf00)
+ STD_EXCEPTION_PSERIES(., 0xf20, altivec_unavailable)
+ KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf20)
+ STD_EXCEPTION_PSERIES(., 0xf40, vsx_unavailable)
+ KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40)
+
+/*
+ * An interrupt came in while soft-disabled. We set paca->irq_happened,
+ * then, if it was a decrementer interrupt, we bump the dec to max and
+ * and return, else we hard disable and return. This is called with
+ * r10 containing the value to OR to the paca field.
+ */
+#define MASKED_INTERRUPT(_H) \
+masked_##_H##interrupt: \
+ std r11,PACA_EXGEN+EX_R11(r13); \
+ lbz r11,PACAIRQHAPPENED(r13); \
+ or r11,r11,r10; \
+ stb r11,PACAIRQHAPPENED(r13); \
+ andi. r10,r10,PACA_IRQ_DEC; \
+ beq 1f; \
+ lis r10,0x7fff; \
+ ori r10,r10,0xffff; \
+ mtspr SPRN_DEC,r10; \
+ b 2f; \
+1: mfspr r10,SPRN_##_H##SRR1; \
+ rldicl r10,r10,48,1; /* clear MSR_EE */ \
+ rotldi r10,r10,16; \
+ mtspr SPRN_##_H##SRR1,r10; \
+2: mtcrf 0x80,r9; \
+ ld r9,PACA_EXGEN+EX_R9(r13); \
+ ld r10,PACA_EXGEN+EX_R10(r13); \
+ ld r11,PACA_EXGEN+EX_R11(r13); \
+ GET_SCRATCH0(r13); \
+ ##_H##rfid; \
+ b .
+
+ MASKED_INTERRUPT()
+ MASKED_INTERRUPT(H)
+
+/*
+ * Called from arch_local_irq_enable when an interrupt needs
+ * to be resent. r3 contains 0x500 or 0x900 to indicate which
+ * kind of interrupt. MSR:EE is already off. We generate a
+ * stackframe like if a real interrupt had happened.
+ *
+ * Note: While MSR:EE is off, we need to make sure that _MSR
+ * in the generated frame has EE set to 1 or the exception
+ * handler will not properly re-enable them.
+ */
+_GLOBAL(__replay_interrupt)
+ /* We are going to jump to the exception common code which
+ * will retrieve various register values from the PACA which
+ * we don't give a damn about, so we don't bother storing them.
+ */
+ mfmsr r12
+ mflr r11
+ mfcr r9
+ ori r12,r12,MSR_EE
+ andi. r3,r3,0x0800
+ bne decrementer_common
+ b hardware_interrupt_common
+
+#ifdef CONFIG_PPC_PSERIES
+/*
+ * Vectors for the FWNMI option. Share common code.
+ */
+ .globl system_reset_fwnmi
+ .align 7
+system_reset_fwnmi:
+ HMT_MEDIUM
+ SET_SCRATCH0(r13) /* save r13 */
+ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD,
+ NOTEST, 0x100)
+
+#endif /* CONFIG_PPC_PSERIES */
+
+#ifdef __DISABLED__
+/*
+ * This is used for when the SLB miss handler has to go virtual,
+ * which doesn't happen for now anymore but will once we re-implement
+ * dynamic VSIDs for shared page tables
+ */
+slb_miss_user_pseries:
+ std r10,PACA_EXGEN+EX_R10(r13)
+ std r11,PACA_EXGEN+EX_R11(r13)
+ std r12,PACA_EXGEN+EX_R12(r13)
+ GET_SCRATCH0(r10)
+ ld r11,PACA_EXSLB+EX_R9(r13)
+ ld r12,PACA_EXSLB+EX_R3(r13)
+ std r10,PACA_EXGEN+EX_R13(r13)
+ std r11,PACA_EXGEN+EX_R9(r13)
+ std r12,PACA_EXGEN+EX_R3(r13)
+ clrrdi r12,r13,32
+ mfmsr r10
+ mfspr r11,SRR0 /* save SRR0 */
+ ori r12,r12,slb_miss_user_common@l /* virt addr of handler */
+ ori r10,r10,MSR_IR|MSR_DR|MSR_RI
+ mtspr SRR0,r12
+ mfspr r12,SRR1 /* and SRR1 */
+ mtspr SRR1,r10
+ rfid
+ b . /* prevent spec. execution */
+#endif /* __DISABLED__ */
+
+ .align 7
+ .globl __end_interrupts
+__end_interrupts:
+
+/*
+ * Code from here down to __end_handlers is invoked from the
+ * exception prologs above. Because the prologs assemble the
+ * addresses of these handlers using the LOAD_HANDLER macro,
+ * which uses an addi instruction, these handlers must be in
+ * the first 32k of the kernel image.
+ */
+
+/*** Common interrupt handlers ***/
+
+ STD_EXCEPTION_COMMON(0x100, system_reset, .system_reset_exception)
+
+ /*
+ * Machine check is different because we use a different
+ * save area: PACA_EXMC instead of PACA_EXGEN.
+ */
+ .align 7
+ .globl machine_check_common
+machine_check_common:
+ EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC)
+ FINISH_NAP
+ DISABLE_INTS
+ bl .save_nvgprs
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .machine_check_exception
+ b .ret_from_except
+
+ STD_EXCEPTION_COMMON_ASYNC(0x500, hardware_interrupt, do_IRQ)
+ STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, .timer_interrupt)
+ STD_EXCEPTION_COMMON(0xa00, trap_0a, .unknown_exception)
+ STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception)
+ STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception)
+ STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception)
+ STD_EXCEPTION_COMMON(0xe40, emulation_assist, .program_check_exception)
+ STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception)
+ STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, .performance_monitor_exception)
+ STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception)
+#ifdef CONFIG_ALTIVEC
+ STD_EXCEPTION_COMMON(0x1700, altivec_assist, .altivec_assist_exception)
+#else
+ STD_EXCEPTION_COMMON(0x1700, altivec_assist, .unknown_exception)
+#endif
+#ifdef CONFIG_CBE_RAS
+ STD_EXCEPTION_COMMON(0x1200, cbe_system_error, .cbe_system_error_exception)
+ STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, .cbe_maintenance_exception)
+ STD_EXCEPTION_COMMON(0x1800, cbe_thermal, .cbe_thermal_exception)
+#endif /* CONFIG_CBE_RAS */
+
+ .align 7
+system_call_entry:
+ b system_call_common
+
+ppc64_runlatch_on_trampoline:
+ b .__ppc64_runlatch_on
+
+/*
+ * Here we have detected that the kernel stack pointer is bad.
+ * R9 contains the saved CR, r13 points to the paca,
+ * r10 contains the (bad) kernel stack pointer,
+ * r11 and r12 contain the saved SRR0 and SRR1.
+ * We switch to using an emergency stack, save the registers there,
+ * and call kernel_bad_stack(), which panics.
+ */
+bad_stack:
+ ld r1,PACAEMERGSP(r13)
+ subi r1,r1,64+INT_FRAME_SIZE
+ std r9,_CCR(r1)
+ std r10,GPR1(r1)
+ std r11,_NIP(r1)
+ std r12,_MSR(r1)
+ mfspr r11,SPRN_DAR
+ mfspr r12,SPRN_DSISR
+ std r11,_DAR(r1)
+ std r12,_DSISR(r1)
+ mflr r10
+ mfctr r11
+ mfxer r12
+ std r10,_LINK(r1)
+ std r11,_CTR(r1)
+ std r12,_XER(r1)
+ SAVE_GPR(0,r1)
+ SAVE_GPR(2,r1)
+ ld r10,EX_R3(r3)
+ std r10,GPR3(r1)
+ SAVE_GPR(4,r1)
+ SAVE_4GPRS(5,r1)
+ ld r9,EX_R9(r3)
+ ld r10,EX_R10(r3)
+ SAVE_2GPRS(9,r1)
+ ld r9,EX_R11(r3)
+ ld r10,EX_R12(r3)
+ ld r11,EX_R13(r3)
+ std r9,GPR11(r1)
+ std r10,GPR12(r1)
+ std r11,GPR13(r1)
+BEGIN_FTR_SECTION
+ ld r10,EX_CFAR(r3)
+ std r10,ORIG_GPR3(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ SAVE_8GPRS(14,r1)
+ SAVE_10GPRS(22,r1)
+ lhz r12,PACA_TRAP_SAVE(r13)
+ std r12,_TRAP(r1)
+ addi r11,r1,INT_FRAME_SIZE
+ std r11,0(r1)
+ li r12,0
+ std r12,0(r11)
+ ld r2,PACATOC(r13)
+ ld r11,exception_marker@toc(r2)
+ std r12,RESULT(r1)
+ std r11,STACK_FRAME_OVERHEAD-16(r1)
+1: addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .kernel_bad_stack
+ b 1b
+
+/*
+ * Here r13 points to the paca, r9 contains the saved CR,
+ * SRR0 and SRR1 are saved in r11 and r12,
+ * r9 - r13 are saved in paca->exgen.
+ */
+ .align 7
+ .globl data_access_common
+data_access_common:
+ mfspr r10,SPRN_DAR
+ std r10,PACA_EXGEN+EX_DAR(r13)
+ mfspr r10,SPRN_DSISR
+ stw r10,PACA_EXGEN+EX_DSISR(r13)
+ EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN)
+ DISABLE_INTS
+ ld r12,_MSR(r1)
+ ld r3,PACA_EXGEN+EX_DAR(r13)
+ lwz r4,PACA_EXGEN+EX_DSISR(r13)
+ li r5,0x300
+ b .do_hash_page /* Try to handle as hpte fault */
+
+ .align 7
+ .globl h_data_storage_common
+h_data_storage_common:
+ mfspr r10,SPRN_HDAR
+ std r10,PACA_EXGEN+EX_DAR(r13)
+ mfspr r10,SPRN_HDSISR
+ stw r10,PACA_EXGEN+EX_DSISR(r13)
+ EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN)
+ bl .save_nvgprs
+ DISABLE_INTS
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .unknown_exception
+ b .ret_from_except
+
+ .align 7
+ .globl instruction_access_common
+instruction_access_common:
+ EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN)
+ DISABLE_INTS
+ ld r12,_MSR(r1)
+ ld r3,_NIP(r1)
+ andis. r4,r12,0x5820
+ li r5,0x400
+ b .do_hash_page /* Try to handle as hpte fault */
+
+ STD_EXCEPTION_COMMON(0xe20, h_instr_storage, .unknown_exception)
+
+/*
+ * Here is the common SLB miss user that is used when going to virtual
+ * mode for SLB misses, that is currently not used
+ */
+#ifdef __DISABLED__
+ .align 7
+ .globl slb_miss_user_common
+slb_miss_user_common:
+ mflr r10
+ std r3,PACA_EXGEN+EX_DAR(r13)
+ stw r9,PACA_EXGEN+EX_CCR(r13)
+ std r10,PACA_EXGEN+EX_LR(r13)
+ std r11,PACA_EXGEN+EX_SRR0(r13)
+ bl .slb_allocate_user
+
+ ld r10,PACA_EXGEN+EX_LR(r13)
+ ld r3,PACA_EXGEN+EX_R3(r13)
+ lwz r9,PACA_EXGEN+EX_CCR(r13)
+ ld r11,PACA_EXGEN+EX_SRR0(r13)
+ mtlr r10
+ beq- slb_miss_fault
+
+ andi. r10,r12,MSR_RI /* check for unrecoverable exception */
+ beq- unrecov_user_slb
+ mfmsr r10
+
+.machine push
+.machine "power4"
+ mtcrf 0x80,r9
+.machine pop
+
+ clrrdi r10,r10,2 /* clear RI before setting SRR0/1 */
+ mtmsrd r10,1
+
+ mtspr SRR0,r11
+ mtspr SRR1,r12
+
+ ld r9,PACA_EXGEN+EX_R9(r13)
+ ld r10,PACA_EXGEN+EX_R10(r13)
+ ld r11,PACA_EXGEN+EX_R11(r13)
+ ld r12,PACA_EXGEN+EX_R12(r13)
+ ld r13,PACA_EXGEN+EX_R13(r13)
+ rfid
+ b .
+
+slb_miss_fault:
+ EXCEPTION_PROLOG_COMMON(0x380, PACA_EXGEN)
+ ld r4,PACA_EXGEN+EX_DAR(r13)
+ li r5,0
+ std r4,_DAR(r1)
+ std r5,_DSISR(r1)
+ b handle_page_fault
+
+unrecov_user_slb:
+ EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN)
+ DISABLE_INTS
+ bl .save_nvgprs
+1: addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .unrecoverable_exception
+ b 1b
+
+#endif /* __DISABLED__ */
+
+
+/*
+ * r13 points to the PACA, r9 contains the saved CR,
+ * r12 contain the saved SRR1, SRR0 is still ready for return
+ * r3 has the faulting address
+ * r9 - r13 are saved in paca->exslb.
+ * r3 is saved in paca->slb_r3
+ * We assume we aren't going to take any exceptions during this procedure.
+ */
+_GLOBAL(slb_miss_realmode)
+ mflr r10
+#ifdef CONFIG_RELOCATABLE
+ mtctr r11
+#endif
+
+ stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
+ std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
+
+ bl .slb_allocate_realmode
+
+ /* All done -- return from exception. */
+
+ ld r10,PACA_EXSLB+EX_LR(r13)
+ ld r3,PACA_EXSLB+EX_R3(r13)
+ lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
+
+ mtlr r10
+
+ andi. r10,r12,MSR_RI /* check for unrecoverable exception */
+ beq- 2f
+
+.machine push
+.machine "power4"
+ mtcrf 0x80,r9
+ mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
+.machine pop
+
+ ld r9,PACA_EXSLB+EX_R9(r13)
+ ld r10,PACA_EXSLB+EX_R10(r13)
+ ld r11,PACA_EXSLB+EX_R11(r13)
+ ld r12,PACA_EXSLB+EX_R12(r13)
+ ld r13,PACA_EXSLB+EX_R13(r13)
+ rfid
+ b . /* prevent speculative execution */
+
+2: mfspr r11,SPRN_SRR0
+ ld r10,PACAKBASE(r13)
+ LOAD_HANDLER(r10,unrecov_slb)
+ mtspr SPRN_SRR0,r10
+ ld r10,PACAKMSR(r13)
+ mtspr SPRN_SRR1,r10
+ rfid
+ b .
+
+unrecov_slb:
+ EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
+ DISABLE_INTS
+ bl .save_nvgprs
+1: addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .unrecoverable_exception
+ b 1b
+
+
+#ifdef CONFIG_PPC_970_NAP
+power4_fixup_nap:
+ andc r9,r9,r10
+ std r9,TI_LOCAL_FLAGS(r11)
+ ld r10,_LINK(r1) /* make idle task do the */
+ std r10,_NIP(r1) /* equivalent of a blr */
+ blr
+#endif
+
+ .align 7
+ .globl alignment_common
+alignment_common:
+ mfspr r10,SPRN_DAR
+ std r10,PACA_EXGEN+EX_DAR(r13)
+ mfspr r10,SPRN_DSISR
+ stw r10,PACA_EXGEN+EX_DSISR(r13)
+ EXCEPTION_PROLOG_COMMON(0x600, PACA_EXGEN)
+ ld r3,PACA_EXGEN+EX_DAR(r13)
+ lwz r4,PACA_EXGEN+EX_DSISR(r13)
+ std r3,_DAR(r1)
+ std r4,_DSISR(r1)
+ bl .save_nvgprs
+ DISABLE_INTS
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .alignment_exception
+ b .ret_from_except
+
+ .align 7
+ .globl program_check_common
+program_check_common:
+ EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN)
+ bl .save_nvgprs
+ DISABLE_INTS
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .program_check_exception
+ b .ret_from_except
+
+ .align 7
+ .globl fp_unavailable_common
+fp_unavailable_common:
+ EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN)
+ bne 1f /* if from user, just load it up */
+ bl .save_nvgprs
+ DISABLE_INTS
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .kernel_fp_unavailable_exception
+ BUG_OPCODE
+1: bl .load_up_fpu
+ b fast_exception_return
+
+ .align 7
+ .globl altivec_unavailable_common
+altivec_unavailable_common:
+ EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN)
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+ beq 1f
+ bl .load_up_altivec
+ b fast_exception_return
+1:
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif
+ bl .save_nvgprs
+ DISABLE_INTS
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .altivec_unavailable_exception
+ b .ret_from_except
+
+ .align 7
+ .globl vsx_unavailable_common
+vsx_unavailable_common:
+ EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN)
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+ beq 1f
+ b .load_up_vsx
+1:
+END_FTR_SECTION_IFSET(CPU_FTR_VSX)
+#endif
+ bl .save_nvgprs
+ DISABLE_INTS
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .vsx_unavailable_exception
+ b .ret_from_except
+
+ .align 7
+ .globl __end_handlers
+__end_handlers:
+
+/*
+ * Hash table stuff
+ */
+ .align 7
+_STATIC(do_hash_page)
+ std r3,_DAR(r1)
+ std r4,_DSISR(r1)
+
+ andis. r0,r4,0xa410 /* weird error? */
+ bne- handle_page_fault /* if not, try to insert a HPTE */
+ andis. r0,r4,DSISR_DABRMATCH@h
+ bne- handle_dabr_fault
+
+BEGIN_FTR_SECTION
+ andis. r0,r4,0x0020 /* Is it a segment table fault? */
+ bne- do_ste_alloc /* If so handle it */
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB)
+
+ clrrdi r11,r1,THREAD_SHIFT
+ lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
+ andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */
+ bne 77f /* then don't call hash_page now */
+ /*
+ * We need to set the _PAGE_USER bit if MSR_PR is set or if we are
+ * accessing a userspace segment (even from the kernel). We assume
+ * kernel addresses always have the high bit set.
+ */
+ rlwinm r4,r4,32-25+9,31-9,31-9 /* DSISR_STORE -> _PAGE_RW */
+ rotldi r0,r3,15 /* Move high bit into MSR_PR posn */
+ orc r0,r12,r0 /* MSR_PR | ~high_bit */
+ rlwimi r4,r0,32-13,30,30 /* becomes _PAGE_USER access bit */
+ ori r4,r4,1 /* add _PAGE_PRESENT */
+ rlwimi r4,r5,22+2,31-2,31-2 /* Set _PAGE_EXEC if trap is 0x400 */
+
+ /*
+ * r3 contains the faulting address
+ * r4 contains the required access permissions
+ * r5 contains the trap number
+ *
+ * at return r3 = 0 for success, 1 for page fault, negative for error
+ */
+ bl .hash_page /* build HPTE if possible */
+ cmpdi r3,0 /* see if hash_page succeeded */
+
+ /* Success */
+ beq fast_exc_return_irq /* Return from exception on success */
+
+ /* Error */
+ blt- 13f
+
+/* Here we have a page fault that hash_page can't handle. */
+handle_page_fault:
+11: ld r4,_DAR(r1)
+ ld r5,_DSISR(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .do_page_fault
+ cmpdi r3,0
+ beq+ 12f
+ bl .save_nvgprs
+ mr r5,r3
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ lwz r4,_DAR(r1)
+ bl .bad_page_fault
+ b .ret_from_except
+
+/* We have a data breakpoint exception - handle it */
+handle_dabr_fault:
+ bl .save_nvgprs
+ ld r4,_DAR(r1)
+ ld r5,_DSISR(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .do_dabr
+12: b .ret_from_except_lite
+
+
+/* We have a page fault that hash_page could handle but HV refused
+ * the PTE insertion
+ */
+13: bl .save_nvgprs
+ mr r5,r3
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ ld r4,_DAR(r1)
+ bl .low_hash_fault
+ b .ret_from_except
+
+/*
+ * We come here as a result of a DSI at a point where we don't want
+ * to call hash_page, such as when we are accessing memory (possibly
+ * user memory) inside a PMU interrupt that occurred while interrupts
+ * were soft-disabled. We want to invoke the exception handler for
+ * the access, or panic if there isn't a handler.
+ */
+77: bl .save_nvgprs
+ mr r4,r3
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ li r5,SIGSEGV
+ bl .bad_page_fault
+ b .ret_from_except
+
+ /* here we have a segment miss */
+do_ste_alloc:
+ bl .ste_allocate /* try to insert stab entry */
+ cmpdi r3,0
+ bne- handle_page_fault
+ b fast_exception_return
+
+/*
+ * r13 points to the PACA, r9 contains the saved CR,
+ * r11 and r12 contain the saved SRR0 and SRR1.
+ * r9 - r13 are saved in paca->exslb.
+ * We assume we aren't going to take any exceptions during this procedure.
+ * We assume (DAR >> 60) == 0xc.
+ */
+ .align 7
+_GLOBAL(do_stab_bolted)
+ stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
+ std r11,PACA_EXSLB+EX_SRR0(r13) /* save SRR0 in exc. frame */
+
+ /* Hash to the primary group */
+ ld r10,PACASTABVIRT(r13)
+ mfspr r11,SPRN_DAR
+ srdi r11,r11,28
+ rldimi r10,r11,7,52 /* r10 = first ste of the group */
+
+ /* Calculate VSID */
+ /* This is a kernel address, so protovsid = ESID */
+ ASM_VSID_SCRAMBLE(r11, r9, 256M)
+ rldic r9,r11,12,16 /* r9 = vsid << 12 */
+
+ /* Search the primary group for a free entry */
+1: ld r11,0(r10) /* Test valid bit of the current ste */
+ andi. r11,r11,0x80
+ beq 2f
+ addi r10,r10,16
+ andi. r11,r10,0x70
+ bne 1b
+
+ /* Stick for only searching the primary group for now. */
+ /* At least for now, we use a very simple random castout scheme */
+ /* Use the TB as a random number ; OR in 1 to avoid entry 0 */
+ mftb r11
+ rldic r11,r11,4,57 /* r11 = (r11 << 4) & 0x70 */
+ ori r11,r11,0x10
+
+ /* r10 currently points to an ste one past the group of interest */
+ /* make it point to the randomly selected entry */
+ subi r10,r10,128
+ or r10,r10,r11 /* r10 is the entry to invalidate */
+
+ isync /* mark the entry invalid */
+ ld r11,0(r10)
+ rldicl r11,r11,56,1 /* clear the valid bit */
+ rotldi r11,r11,8
+ std r11,0(r10)
+ sync
+
+ clrrdi r11,r11,28 /* Get the esid part of the ste */
+ slbie r11
+
+2: std r9,8(r10) /* Store the vsid part of the ste */
+ eieio
+
+ mfspr r11,SPRN_DAR /* Get the new esid */
+ clrrdi r11,r11,28 /* Permits a full 32b of ESID */
+ ori r11,r11,0x90 /* Turn on valid and kp */
+ std r11,0(r10) /* Put new entry back into the stab */
+
+ sync
+
+ /* All done -- return from exception. */
+ lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
+ ld r11,PACA_EXSLB+EX_SRR0(r13) /* get saved SRR0 */
+
+ andi. r10,r12,MSR_RI
+ beq- unrecov_slb
+
+ mtcrf 0x80,r9 /* restore CR */
+
+ mfmsr r10
+ clrrdi r10,r10,2
+ mtmsrd r10,1
+
+ mtspr SPRN_SRR0,r11
+ mtspr SPRN_SRR1,r12
+ ld r9,PACA_EXSLB+EX_R9(r13)
+ ld r10,PACA_EXSLB+EX_R10(r13)
+ ld r11,PACA_EXSLB+EX_R11(r13)
+ ld r12,PACA_EXSLB+EX_R12(r13)
+ ld r13,PACA_EXSLB+EX_R13(r13)
+ rfid
+ b . /* prevent speculative execution */
+
+#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
+/*
+ * Data area reserved for FWNMI option.
+ * This address (0x7000) is fixed by the RPA.
+ */
+ .= 0x7000
+ .globl fwnmi_data_area
+fwnmi_data_area:
+
+ /* pseries and powernv need to keep the whole page from
+ * 0x7000 to 0x8000 free for use by the firmware
+ */
+ . = 0x8000
+#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */
+
+/* Space for CPU0's segment table */
+ .balign 4096
+ .globl initial_stab
+initial_stab:
+ .space 4096
+
+#ifdef CONFIG_PPC_POWERNV
+_GLOBAL(opal_mc_secondary_handler)
+ HMT_MEDIUM
+ SET_SCRATCH0(r13)
+ GET_PACA(r13)
+ clrldi r3,r3,2
+ tovirt(r3,r3)
+ std r3,PACA_OPAL_MC_EVT(r13)
+ ld r13,OPAL_MC_SRR0(r3)
+ mtspr SPRN_SRR0,r13
+ ld r13,OPAL_MC_SRR1(r3)
+ mtspr SPRN_SRR1,r13
+ ld r3,OPAL_MC_GPR3(r3)
+ GET_SCRATCH0(r13)
+ b machine_check_pSeries
+#endif /* CONFIG_PPC_POWERNV */
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
new file mode 100644
index 00000000..18bdf74f
--- /dev/null
+++ b/arch/powerpc/kernel/fadump.c
@@ -0,0 +1,1317 @@
+/*
+ * Firmware Assisted dump: A robust mechanism to get reliable kernel crash
+ * dump with assistance from firmware. This approach does not use kexec,
+ * instead firmware assists in booting the kdump kernel while preserving
+ * memory contents. The most of the code implementation has been adapted
+ * from phyp assisted dump implementation written by Linas Vepstas and
+ * Manish Ahuja
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright 2011 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+#define pr_fmt(fmt) "fadump: " fmt
+
+#include <linux/string.h>
+#include <linux/memblock.h>
+#include <linux/delay.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/crash_dump.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+
+#include <asm/page.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/fadump.h>
+#include <asm/debug.h>
+#include <asm/setup.h>
+
+static struct fw_dump fw_dump;
+static struct fadump_mem_struct fdm;
+static const struct fadump_mem_struct *fdm_active;
+
+static DEFINE_MUTEX(fadump_mutex);
+struct fad_crash_memory_ranges crash_memory_ranges[INIT_CRASHMEM_RANGES];
+int crash_mem_ranges;
+
+/* Scan the Firmware Assisted dump configuration details. */
+int __init early_init_dt_scan_fw_dump(unsigned long node,
+ const char *uname, int depth, void *data)
+{
+ __be32 *sections;
+ int i, num_sections;
+ unsigned long size;
+ const int *token;
+
+ if (depth != 1 || strcmp(uname, "rtas") != 0)
+ return 0;
+
+ /*
+ * Check if Firmware Assisted dump is supported. if yes, check
+ * if dump has been initiated on last reboot.
+ */
+ token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL);
+ if (!token)
+ return 0;
+
+ fw_dump.fadump_supported = 1;
+ fw_dump.ibm_configure_kernel_dump = *token;
+
+ /*
+ * The 'ibm,kernel-dump' rtas node is present only if there is
+ * dump data waiting for us.
+ */
+ fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL);
+ if (fdm_active)
+ fw_dump.dump_active = 1;
+
+ /* Get the sizes required to store dump data for the firmware provided
+ * dump sections.
+ * For each dump section type supported, a 32bit cell which defines
+ * the ID of a supported section followed by two 32 bit cells which
+ * gives teh size of the section in bytes.
+ */
+ sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
+ &size);
+
+ if (!sections)
+ return 0;
+
+ num_sections = size / (3 * sizeof(u32));
+
+ for (i = 0; i < num_sections; i++, sections += 3) {
+ u32 type = (u32)of_read_number(sections, 1);
+
+ switch (type) {
+ case FADUMP_CPU_STATE_DATA:
+ fw_dump.cpu_state_data_size =
+ of_read_ulong(&sections[1], 2);
+ break;
+ case FADUMP_HPTE_REGION:
+ fw_dump.hpte_region_size =
+ of_read_ulong(&sections[1], 2);
+ break;
+ }
+ }
+ return 1;
+}
+
+int is_fadump_active(void)
+{
+ return fw_dump.dump_active;
+}
+
+/* Print firmware assisted dump configurations for debugging purpose. */
+static void fadump_show_config(void)
+{
+ pr_debug("Support for firmware-assisted dump (fadump): %s\n",
+ (fw_dump.fadump_supported ? "present" : "no support"));
+
+ if (!fw_dump.fadump_supported)
+ return;
+
+ pr_debug("Fadump enabled : %s\n",
+ (fw_dump.fadump_enabled ? "yes" : "no"));
+ pr_debug("Dump Active : %s\n",
+ (fw_dump.dump_active ? "yes" : "no"));
+ pr_debug("Dump section sizes:\n");
+ pr_debug(" CPU state data size: %lx\n", fw_dump.cpu_state_data_size);
+ pr_debug(" HPTE region size : %lx\n", fw_dump.hpte_region_size);
+ pr_debug("Boot memory size : %lx\n", fw_dump.boot_memory_size);
+}
+
+static unsigned long init_fadump_mem_struct(struct fadump_mem_struct *fdm,
+ unsigned long addr)
+{
+ if (!fdm)
+ return 0;
+
+ memset(fdm, 0, sizeof(struct fadump_mem_struct));
+ addr = addr & PAGE_MASK;
+
+ fdm->header.dump_format_version = 0x00000001;
+ fdm->header.dump_num_sections = 3;
+ fdm->header.dump_status_flag = 0;
+ fdm->header.offset_first_dump_section =
+ (u32)offsetof(struct fadump_mem_struct, cpu_state_data);
+
+ /*
+ * Fields for disk dump option.
+ * We are not using disk dump option, hence set these fields to 0.
+ */
+ fdm->header.dd_block_size = 0;
+ fdm->header.dd_block_offset = 0;
+ fdm->header.dd_num_blocks = 0;
+ fdm->header.dd_offset_disk_path = 0;
+
+ /* set 0 to disable an automatic dump-reboot. */
+ fdm->header.max_time_auto = 0;
+
+ /* Kernel dump sections */
+ /* cpu state data section. */
+ fdm->cpu_state_data.request_flag = FADUMP_REQUEST_FLAG;
+ fdm->cpu_state_data.source_data_type = FADUMP_CPU_STATE_DATA;
+ fdm->cpu_state_data.source_address = 0;
+ fdm->cpu_state_data.source_len = fw_dump.cpu_state_data_size;
+ fdm->cpu_state_data.destination_address = addr;
+ addr += fw_dump.cpu_state_data_size;
+
+ /* hpte region section */
+ fdm->hpte_region.request_flag = FADUMP_REQUEST_FLAG;
+ fdm->hpte_region.source_data_type = FADUMP_HPTE_REGION;
+ fdm->hpte_region.source_address = 0;
+ fdm->hpte_region.source_len = fw_dump.hpte_region_size;
+ fdm->hpte_region.destination_address = addr;
+ addr += fw_dump.hpte_region_size;
+
+ /* RMA region section */
+ fdm->rmr_region.request_flag = FADUMP_REQUEST_FLAG;
+ fdm->rmr_region.source_data_type = FADUMP_REAL_MODE_REGION;
+ fdm->rmr_region.source_address = RMA_START;
+ fdm->rmr_region.source_len = fw_dump.boot_memory_size;
+ fdm->rmr_region.destination_address = addr;
+ addr += fw_dump.boot_memory_size;
+
+ return addr;
+}
+
+/**
+ * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM
+ *
+ * Function to find the largest memory size we need to reserve during early
+ * boot process. This will be the size of the memory that is required for a
+ * kernel to boot successfully.
+ *
+ * This function has been taken from phyp-assisted dump feature implementation.
+ *
+ * returns larger of 256MB or 5% rounded down to multiples of 256MB.
+ *
+ * TODO: Come up with better approach to find out more accurate memory size
+ * that is required for a kernel to boot successfully.
+ *
+ */
+static inline unsigned long fadump_calculate_reserve_size(void)
+{
+ unsigned long size;
+
+ /*
+ * Check if the size is specified through fadump_reserve_mem= cmdline
+ * option. If yes, then use that.
+ */
+ if (fw_dump.reserve_bootvar)
+ return fw_dump.reserve_bootvar;
+
+ /* divide by 20 to get 5% of value */
+ size = memblock_end_of_DRAM() / 20;
+
+ /* round it down in multiples of 256 */
+ size = size & ~0x0FFFFFFFUL;
+
+ /* Truncate to memory_limit. We don't want to over reserve the memory.*/
+ if (memory_limit && size > memory_limit)
+ size = memory_limit;
+
+ return (size > MIN_BOOT_MEM ? size : MIN_BOOT_MEM);
+}
+
+/*
+ * Calculate the total memory size required to be reserved for
+ * firmware-assisted dump registration.
+ */
+static unsigned long get_fadump_area_size(void)
+{
+ unsigned long size = 0;
+
+ size += fw_dump.cpu_state_data_size;
+ size += fw_dump.hpte_region_size;
+ size += fw_dump.boot_memory_size;
+ size += sizeof(struct fadump_crash_info_header);
+ size += sizeof(struct elfhdr); /* ELF core header.*/
+ size += sizeof(struct elf_phdr); /* place holder for cpu notes */
+ /* Program headers for crash memory regions. */
+ size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2);
+
+ size = PAGE_ALIGN(size);
+ return size;
+}
+
+int __init fadump_reserve_mem(void)
+{
+ unsigned long base, size, memory_boundary;
+
+ if (!fw_dump.fadump_enabled)
+ return 0;
+
+ if (!fw_dump.fadump_supported) {
+ printk(KERN_INFO "Firmware-assisted dump is not supported on"
+ " this hardware\n");
+ fw_dump.fadump_enabled = 0;
+ return 0;
+ }
+ /*
+ * Initialize boot memory size
+ * If dump is active then we have already calculated the size during
+ * first kernel.
+ */
+ if (fdm_active)
+ fw_dump.boot_memory_size = fdm_active->rmr_region.source_len;
+ else
+ fw_dump.boot_memory_size = fadump_calculate_reserve_size();
+
+ /*
+ * Calculate the memory boundary.
+ * If memory_limit is less than actual memory boundary then reserve
+ * the memory for fadump beyond the memory_limit and adjust the
+ * memory_limit accordingly, so that the running kernel can run with
+ * specified memory_limit.
+ */
+ if (memory_limit && memory_limit < memblock_end_of_DRAM()) {
+ size = get_fadump_area_size();
+ if ((memory_limit + size) < memblock_end_of_DRAM())
+ memory_limit += size;
+ else
+ memory_limit = memblock_end_of_DRAM();
+ printk(KERN_INFO "Adjusted memory_limit for firmware-assisted"
+ " dump, now %#016llx\n",
+ (unsigned long long)memory_limit);
+ }
+ if (memory_limit)
+ memory_boundary = memory_limit;
+ else
+ memory_boundary = memblock_end_of_DRAM();
+
+ if (fw_dump.dump_active) {
+ printk(KERN_INFO "Firmware-assisted dump is active.\n");
+ /*
+ * If last boot has crashed then reserve all the memory
+ * above boot_memory_size so that we don't touch it until
+ * dump is written to disk by userspace tool. This memory
+ * will be released for general use once the dump is saved.
+ */
+ base = fw_dump.boot_memory_size;
+ size = memory_boundary - base;
+ memblock_reserve(base, size);
+ printk(KERN_INFO "Reserved %ldMB of memory at %ldMB "
+ "for saving crash dump\n",
+ (unsigned long)(size >> 20),
+ (unsigned long)(base >> 20));
+
+ fw_dump.fadumphdr_addr =
+ fdm_active->rmr_region.destination_address +
+ fdm_active->rmr_region.source_len;
+ pr_debug("fadumphdr_addr = %p\n",
+ (void *) fw_dump.fadumphdr_addr);
+ } else {
+ /* Reserve the memory at the top of memory. */
+ size = get_fadump_area_size();
+ base = memory_boundary - size;
+ memblock_reserve(base, size);
+ printk(KERN_INFO "Reserved %ldMB of memory at %ldMB "
+ "for firmware-assisted dump\n",
+ (unsigned long)(size >> 20),
+ (unsigned long)(base >> 20));
+ }
+ fw_dump.reserve_dump_area_start = base;
+ fw_dump.reserve_dump_area_size = size;
+ return 1;
+}
+
+/* Look for fadump= cmdline option. */
+static int __init early_fadump_param(char *p)
+{
+ if (!p)
+ return 1;
+
+ if (strncmp(p, "on", 2) == 0)
+ fw_dump.fadump_enabled = 1;
+ else if (strncmp(p, "off", 3) == 0)
+ fw_dump.fadump_enabled = 0;
+
+ return 0;
+}
+early_param("fadump", early_fadump_param);
+
+/* Look for fadump_reserve_mem= cmdline option */
+static int __init early_fadump_reserve_mem(char *p)
+{
+ if (p)
+ fw_dump.reserve_bootvar = memparse(p, &p);
+ return 0;
+}
+early_param("fadump_reserve_mem", early_fadump_reserve_mem);
+
+static void register_fw_dump(struct fadump_mem_struct *fdm)
+{
+ int rc;
+ unsigned int wait_time;
+
+ pr_debug("Registering for firmware-assisted kernel dump...\n");
+
+ /* TODO: Add upper time limit for the delay */
+ do {
+ rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
+ FADUMP_REGISTER, fdm,
+ sizeof(struct fadump_mem_struct));
+
+ wait_time = rtas_busy_delay_time(rc);
+ if (wait_time)
+ mdelay(wait_time);
+
+ } while (wait_time);
+
+ switch (rc) {
+ case -1:
+ printk(KERN_ERR "Failed to register firmware-assisted kernel"
+ " dump. Hardware Error(%d).\n", rc);
+ break;
+ case -3:
+ printk(KERN_ERR "Failed to register firmware-assisted kernel"
+ " dump. Parameter Error(%d).\n", rc);
+ break;
+ case -9:
+ printk(KERN_ERR "firmware-assisted kernel dump is already "
+ " registered.");
+ fw_dump.dump_registered = 1;
+ break;
+ case 0:
+ printk(KERN_INFO "firmware-assisted kernel dump registration"
+ " is successful\n");
+ fw_dump.dump_registered = 1;
+ break;
+ }
+}
+
+void crash_fadump(struct pt_regs *regs, const char *str)
+{
+ struct fadump_crash_info_header *fdh = NULL;
+
+ if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr)
+ return;
+
+ fdh = __va(fw_dump.fadumphdr_addr);
+ crashing_cpu = smp_processor_id();
+ fdh->crashing_cpu = crashing_cpu;
+ crash_save_vmcoreinfo();
+
+ if (regs)
+ fdh->regs = *regs;
+ else
+ ppc_save_regs(&fdh->regs);
+
+ fdh->cpu_online_mask = *cpu_online_mask;
+
+ /* Call ibm,os-term rtas call to trigger firmware assisted dump */
+ rtas_os_term((char *)str);
+}
+
+#define GPR_MASK 0xffffff0000000000
+static inline int fadump_gpr_index(u64 id)
+{
+ int i = -1;
+ char str[3];
+
+ if ((id & GPR_MASK) == REG_ID("GPR")) {
+ /* get the digits at the end */
+ id &= ~GPR_MASK;
+ id >>= 24;
+ str[2] = '\0';
+ str[1] = id & 0xff;
+ str[0] = (id >> 8) & 0xff;
+ sscanf(str, "%d", &i);
+ if (i > 31)
+ i = -1;
+ }
+ return i;
+}
+
+static inline void fadump_set_regval(struct pt_regs *regs, u64 reg_id,
+ u64 reg_val)
+{
+ int i;
+
+ i = fadump_gpr_index(reg_id);
+ if (i >= 0)
+ regs->gpr[i] = (unsigned long)reg_val;
+ else if (reg_id == REG_ID("NIA"))
+ regs->nip = (unsigned long)reg_val;
+ else if (reg_id == REG_ID("MSR"))
+ regs->msr = (unsigned long)reg_val;
+ else if (reg_id == REG_ID("CTR"))
+ regs->ctr = (unsigned long)reg_val;
+ else if (reg_id == REG_ID("LR"))
+ regs->link = (unsigned long)reg_val;
+ else if (reg_id == REG_ID("XER"))
+ regs->xer = (unsigned long)reg_val;
+ else if (reg_id == REG_ID("CR"))
+ regs->ccr = (unsigned long)reg_val;
+ else if (reg_id == REG_ID("DAR"))
+ regs->dar = (unsigned long)reg_val;
+ else if (reg_id == REG_ID("DSISR"))
+ regs->dsisr = (unsigned long)reg_val;
+}
+
+static struct fadump_reg_entry*
+fadump_read_registers(struct fadump_reg_entry *reg_entry, struct pt_regs *regs)
+{
+ memset(regs, 0, sizeof(struct pt_regs));
+
+ while (reg_entry->reg_id != REG_ID("CPUEND")) {
+ fadump_set_regval(regs, reg_entry->reg_id,
+ reg_entry->reg_value);
+ reg_entry++;
+ }
+ reg_entry++;
+ return reg_entry;
+}
+
+static u32 *fadump_append_elf_note(u32 *buf, char *name, unsigned type,
+ void *data, size_t data_len)
+{
+ struct elf_note note;
+
+ note.n_namesz = strlen(name) + 1;
+ note.n_descsz = data_len;
+ note.n_type = type;
+ memcpy(buf, &note, sizeof(note));
+ buf += (sizeof(note) + 3)/4;
+ memcpy(buf, name, note.n_namesz);
+ buf += (note.n_namesz + 3)/4;
+ memcpy(buf, data, note.n_descsz);
+ buf += (note.n_descsz + 3)/4;
+
+ return buf;
+}
+
+static void fadump_final_note(u32 *buf)
+{
+ struct elf_note note;
+
+ note.n_namesz = 0;
+ note.n_descsz = 0;
+ note.n_type = 0;
+ memcpy(buf, &note, sizeof(note));
+}
+
+static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
+{
+ struct elf_prstatus prstatus;
+
+ memset(&prstatus, 0, sizeof(prstatus));
+ /*
+ * FIXME: How do i get PID? Do I really need it?
+ * prstatus.pr_pid = ????
+ */
+ elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
+ buf = fadump_append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
+ &prstatus, sizeof(prstatus));
+ return buf;
+}
+
+static void fadump_update_elfcore_header(char *bufp)
+{
+ struct elfhdr *elf;
+ struct elf_phdr *phdr;
+
+ elf = (struct elfhdr *)bufp;
+ bufp += sizeof(struct elfhdr);
+
+ /* First note is a place holder for cpu notes info. */
+ phdr = (struct elf_phdr *)bufp;
+
+ if (phdr->p_type == PT_NOTE) {
+ phdr->p_paddr = fw_dump.cpu_notes_buf;
+ phdr->p_offset = phdr->p_paddr;
+ phdr->p_filesz = fw_dump.cpu_notes_buf_size;
+ phdr->p_memsz = fw_dump.cpu_notes_buf_size;
+ }
+ return;
+}
+
+static void *fadump_cpu_notes_buf_alloc(unsigned long size)
+{
+ void *vaddr;
+ struct page *page;
+ unsigned long order, count, i;
+
+ order = get_order(size);
+ vaddr = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
+ if (!vaddr)
+ return NULL;
+
+ count = 1 << order;
+ page = virt_to_page(vaddr);
+ for (i = 0; i < count; i++)
+ SetPageReserved(page + i);
+ return vaddr;
+}
+
+static void fadump_cpu_notes_buf_free(unsigned long vaddr, unsigned long size)
+{
+ struct page *page;
+ unsigned long order, count, i;
+
+ order = get_order(size);
+ count = 1 << order;
+ page = virt_to_page(vaddr);
+ for (i = 0; i < count; i++)
+ ClearPageReserved(page + i);
+ __free_pages(page, order);
+}
+
+/*
+ * Read CPU state dump data and convert it into ELF notes.
+ * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
+ * used to access the data to allow for additional fields to be added without
+ * affecting compatibility. Each list of registers for a CPU starts with
+ * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes,
+ * 8 Byte ASCII identifier and 8 Byte register value. The register entry
+ * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part
+ * of register value. For more details refer to PAPR document.
+ *
+ * Only for the crashing cpu we ignore the CPU dump data and get exact
+ * state from fadump crash info structure populated by first kernel at the
+ * time of crash.
+ */
+static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm)
+{
+ struct fadump_reg_save_area_header *reg_header;
+ struct fadump_reg_entry *reg_entry;
+ struct fadump_crash_info_header *fdh = NULL;
+ void *vaddr;
+ unsigned long addr;
+ u32 num_cpus, *note_buf;
+ struct pt_regs regs;
+ int i, rc = 0, cpu = 0;
+
+ if (!fdm->cpu_state_data.bytes_dumped)
+ return -EINVAL;
+
+ addr = fdm->cpu_state_data.destination_address;
+ vaddr = __va(addr);
+
+ reg_header = vaddr;
+ if (reg_header->magic_number != REGSAVE_AREA_MAGIC) {
+ printk(KERN_ERR "Unable to read register save area.\n");
+ return -ENOENT;
+ }
+ pr_debug("--------CPU State Data------------\n");
+ pr_debug("Magic Number: %llx\n", reg_header->magic_number);
+ pr_debug("NumCpuOffset: %x\n", reg_header->num_cpu_offset);
+
+ vaddr += reg_header->num_cpu_offset;
+ num_cpus = *((u32 *)(vaddr));
+ pr_debug("NumCpus : %u\n", num_cpus);
+ vaddr += sizeof(u32);
+ reg_entry = (struct fadump_reg_entry *)vaddr;
+
+ /* Allocate buffer to hold cpu crash notes. */
+ fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
+ fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size);
+ note_buf = fadump_cpu_notes_buf_alloc(fw_dump.cpu_notes_buf_size);
+ if (!note_buf) {
+ printk(KERN_ERR "Failed to allocate 0x%lx bytes for "
+ "cpu notes buffer\n", fw_dump.cpu_notes_buf_size);
+ return -ENOMEM;
+ }
+ fw_dump.cpu_notes_buf = __pa(note_buf);
+
+ pr_debug("Allocated buffer for cpu notes of size %ld at %p\n",
+ (num_cpus * sizeof(note_buf_t)), note_buf);
+
+ if (fw_dump.fadumphdr_addr)
+ fdh = __va(fw_dump.fadumphdr_addr);
+
+ for (i = 0; i < num_cpus; i++) {
+ if (reg_entry->reg_id != REG_ID("CPUSTRT")) {
+ printk(KERN_ERR "Unable to read CPU state data\n");
+ rc = -ENOENT;
+ goto error_out;
+ }
+ /* Lower 4 bytes of reg_value contains logical cpu id */
+ cpu = reg_entry->reg_value & FADUMP_CPU_ID_MASK;
+ if (!cpumask_test_cpu(cpu, &fdh->cpu_online_mask)) {
+ SKIP_TO_NEXT_CPU(reg_entry);
+ continue;
+ }
+ pr_debug("Reading register data for cpu %d...\n", cpu);
+ if (fdh && fdh->crashing_cpu == cpu) {
+ regs = fdh->regs;
+ note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
+ SKIP_TO_NEXT_CPU(reg_entry);
+ } else {
+ reg_entry++;
+ reg_entry = fadump_read_registers(reg_entry, &regs);
+ note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
+ }
+ }
+ fadump_final_note(note_buf);
+
+ pr_debug("Updating elfcore header (%llx) with cpu notes\n",
+ fdh->elfcorehdr_addr);
+ fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr));
+ return 0;
+
+error_out:
+ fadump_cpu_notes_buf_free((unsigned long)__va(fw_dump.cpu_notes_buf),
+ fw_dump.cpu_notes_buf_size);
+ fw_dump.cpu_notes_buf = 0;
+ fw_dump.cpu_notes_buf_size = 0;
+ return rc;
+
+}
+
+/*
+ * Validate and process the dump data stored by firmware before exporting
+ * it through '/proc/vmcore'.
+ */
+static int __init process_fadump(const struct fadump_mem_struct *fdm_active)
+{
+ struct fadump_crash_info_header *fdh;
+ int rc = 0;
+
+ if (!fdm_active || !fw_dump.fadumphdr_addr)
+ return -EINVAL;
+
+ /* Check if the dump data is valid. */
+ if ((fdm_active->header.dump_status_flag == FADUMP_ERROR_FLAG) ||
+ (fdm_active->cpu_state_data.error_flags != 0) ||
+ (fdm_active->rmr_region.error_flags != 0)) {
+ printk(KERN_ERR "Dump taken by platform is not valid\n");
+ return -EINVAL;
+ }
+ if ((fdm_active->rmr_region.bytes_dumped !=
+ fdm_active->rmr_region.source_len) ||
+ !fdm_active->cpu_state_data.bytes_dumped) {
+ printk(KERN_ERR "Dump taken by platform is incomplete\n");
+ return -EINVAL;
+ }
+
+ /* Validate the fadump crash info header */
+ fdh = __va(fw_dump.fadumphdr_addr);
+ if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
+ printk(KERN_ERR "Crash info header is not valid.\n");
+ return -EINVAL;
+ }
+
+ rc = fadump_build_cpu_notes(fdm_active);
+ if (rc)
+ return rc;
+
+ /*
+ * We are done validating dump info and elfcore header is now ready
+ * to be exported. set elfcorehdr_addr so that vmcore module will
+ * export the elfcore header through '/proc/vmcore'.
+ */
+ elfcorehdr_addr = fdh->elfcorehdr_addr;
+
+ return 0;
+}
+
+static inline void fadump_add_crash_memory(unsigned long long base,
+ unsigned long long end)
+{
+ if (base == end)
+ return;
+
+ pr_debug("crash_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
+ crash_mem_ranges, base, end - 1, (end - base));
+ crash_memory_ranges[crash_mem_ranges].base = base;
+ crash_memory_ranges[crash_mem_ranges].size = end - base;
+ crash_mem_ranges++;
+}
+
+static void fadump_exclude_reserved_area(unsigned long long start,
+ unsigned long long end)
+{
+ unsigned long long ra_start, ra_end;
+
+ ra_start = fw_dump.reserve_dump_area_start;
+ ra_end = ra_start + fw_dump.reserve_dump_area_size;
+
+ if ((ra_start < end) && (ra_end > start)) {
+ if ((start < ra_start) && (end > ra_end)) {
+ fadump_add_crash_memory(start, ra_start);
+ fadump_add_crash_memory(ra_end, end);
+ } else if (start < ra_start) {
+ fadump_add_crash_memory(start, ra_start);
+ } else if (ra_end < end) {
+ fadump_add_crash_memory(ra_end, end);
+ }
+ } else
+ fadump_add_crash_memory(start, end);
+}
+
+static int fadump_init_elfcore_header(char *bufp)
+{
+ struct elfhdr *elf;
+
+ elf = (struct elfhdr *) bufp;
+ bufp += sizeof(struct elfhdr);
+ memcpy(elf->e_ident, ELFMAG, SELFMAG);
+ elf->e_ident[EI_CLASS] = ELF_CLASS;
+ elf->e_ident[EI_DATA] = ELF_DATA;
+ elf->e_ident[EI_VERSION] = EV_CURRENT;
+ elf->e_ident[EI_OSABI] = ELF_OSABI;
+ memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
+ elf->e_type = ET_CORE;
+ elf->e_machine = ELF_ARCH;
+ elf->e_version = EV_CURRENT;
+ elf->e_entry = 0;
+ elf->e_phoff = sizeof(struct elfhdr);
+ elf->e_shoff = 0;
+ elf->e_flags = ELF_CORE_EFLAGS;
+ elf->e_ehsize = sizeof(struct elfhdr);
+ elf->e_phentsize = sizeof(struct elf_phdr);
+ elf->e_phnum = 0;
+ elf->e_shentsize = 0;
+ elf->e_shnum = 0;
+ elf->e_shstrndx = 0;
+
+ return 0;
+}
+
+/*
+ * Traverse through memblock structure and setup crash memory ranges. These
+ * ranges will be used create PT_LOAD program headers in elfcore header.
+ */
+static void fadump_setup_crash_memory_ranges(void)
+{
+ struct memblock_region *reg;
+ unsigned long long start, end;
+
+ pr_debug("Setup crash memory ranges.\n");
+ crash_mem_ranges = 0;
+ /*
+ * add the first memory chunk (RMA_START through boot_memory_size) as
+ * a separate memory chunk. The reason is, at the time crash firmware
+ * will move the content of this memory chunk to different location
+ * specified during fadump registration. We need to create a separate
+ * program header for this chunk with the correct offset.
+ */
+ fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size);
+
+ for_each_memblock(memory, reg) {
+ start = (unsigned long long)reg->base;
+ end = start + (unsigned long long)reg->size;
+ if (start == RMA_START && end >= fw_dump.boot_memory_size)
+ start = fw_dump.boot_memory_size;
+
+ /* add this range excluding the reserved dump area. */
+ fadump_exclude_reserved_area(start, end);
+ }
+}
+
+/*
+ * If the given physical address falls within the boot memory region then
+ * return the relocated address that points to the dump region reserved
+ * for saving initial boot memory contents.
+ */
+static inline unsigned long fadump_relocate(unsigned long paddr)
+{
+ if (paddr > RMA_START && paddr < fw_dump.boot_memory_size)
+ return fdm.rmr_region.destination_address + paddr;
+ else
+ return paddr;
+}
+
+static int fadump_create_elfcore_headers(char *bufp)
+{
+ struct elfhdr *elf;
+ struct elf_phdr *phdr;
+ int i;
+
+ fadump_init_elfcore_header(bufp);
+ elf = (struct elfhdr *)bufp;
+ bufp += sizeof(struct elfhdr);
+
+ /*
+ * setup ELF PT_NOTE, place holder for cpu notes info. The notes info
+ * will be populated during second kernel boot after crash. Hence
+ * this PT_NOTE will always be the first elf note.
+ *
+ * NOTE: Any new ELF note addition should be placed after this note.
+ */
+ phdr = (struct elf_phdr *)bufp;
+ bufp += sizeof(struct elf_phdr);
+ phdr->p_type = PT_NOTE;
+ phdr->p_flags = 0;
+ phdr->p_vaddr = 0;
+ phdr->p_align = 0;
+
+ phdr->p_offset = 0;
+ phdr->p_paddr = 0;
+ phdr->p_filesz = 0;
+ phdr->p_memsz = 0;
+
+ (elf->e_phnum)++;
+
+ /* setup ELF PT_NOTE for vmcoreinfo */
+ phdr = (struct elf_phdr *)bufp;
+ bufp += sizeof(struct elf_phdr);
+ phdr->p_type = PT_NOTE;
+ phdr->p_flags = 0;
+ phdr->p_vaddr = 0;
+ phdr->p_align = 0;
+
+ phdr->p_paddr = fadump_relocate(paddr_vmcoreinfo_note());
+ phdr->p_offset = phdr->p_paddr;
+ phdr->p_memsz = vmcoreinfo_max_size;
+ phdr->p_filesz = vmcoreinfo_max_size;
+
+ /* Increment number of program headers. */
+ (elf->e_phnum)++;
+
+ /* setup PT_LOAD sections. */
+
+ for (i = 0; i < crash_mem_ranges; i++) {
+ unsigned long long mbase, msize;
+ mbase = crash_memory_ranges[i].base;
+ msize = crash_memory_ranges[i].size;
+
+ if (!msize)
+ continue;
+
+ phdr = (struct elf_phdr *)bufp;
+ bufp += sizeof(struct elf_phdr);
+ phdr->p_type = PT_LOAD;
+ phdr->p_flags = PF_R|PF_W|PF_X;
+ phdr->p_offset = mbase;
+
+ if (mbase == RMA_START) {
+ /*
+ * The entire RMA region will be moved by firmware
+ * to the specified destination_address. Hence set
+ * the correct offset.
+ */
+ phdr->p_offset = fdm.rmr_region.destination_address;
+ }
+
+ phdr->p_paddr = mbase;
+ phdr->p_vaddr = (unsigned long)__va(mbase);
+ phdr->p_filesz = msize;
+ phdr->p_memsz = msize;
+ phdr->p_align = 0;
+
+ /* Increment number of program headers. */
+ (elf->e_phnum)++;
+ }
+ return 0;
+}
+
+static unsigned long init_fadump_header(unsigned long addr)
+{
+ struct fadump_crash_info_header *fdh;
+
+ if (!addr)
+ return 0;
+
+ fw_dump.fadumphdr_addr = addr;
+ fdh = __va(addr);
+ addr += sizeof(struct fadump_crash_info_header);
+
+ memset(fdh, 0, sizeof(struct fadump_crash_info_header));
+ fdh->magic_number = FADUMP_CRASH_INFO_MAGIC;
+ fdh->elfcorehdr_addr = addr;
+ /* We will set the crashing cpu id in crash_fadump() during crash. */
+ fdh->crashing_cpu = CPU_UNKNOWN;
+
+ return addr;
+}
+
+static void register_fadump(void)
+{
+ unsigned long addr;
+ void *vaddr;
+
+ /*
+ * If no memory is reserved then we can not register for firmware-
+ * assisted dump.
+ */
+ if (!fw_dump.reserve_dump_area_size)
+ return;
+
+ fadump_setup_crash_memory_ranges();
+
+ addr = fdm.rmr_region.destination_address + fdm.rmr_region.source_len;
+ /* Initialize fadump crash info header. */
+ addr = init_fadump_header(addr);
+ vaddr = __va(addr);
+
+ pr_debug("Creating ELF core headers at %#016lx\n", addr);
+ fadump_create_elfcore_headers(vaddr);
+
+ /* register the future kernel dump with firmware. */
+ register_fw_dump(&fdm);
+}
+
+static int fadump_unregister_dump(struct fadump_mem_struct *fdm)
+{
+ int rc = 0;
+ unsigned int wait_time;
+
+ pr_debug("Un-register firmware-assisted dump\n");
+
+ /* TODO: Add upper time limit for the delay */
+ do {
+ rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
+ FADUMP_UNREGISTER, fdm,
+ sizeof(struct fadump_mem_struct));
+
+ wait_time = rtas_busy_delay_time(rc);
+ if (wait_time)
+ mdelay(wait_time);
+ } while (wait_time);
+
+ if (rc) {
+ printk(KERN_ERR "Failed to un-register firmware-assisted dump."
+ " unexpected error(%d).\n", rc);
+ return rc;
+ }
+ fw_dump.dump_registered = 0;
+ return 0;
+}
+
+static int fadump_invalidate_dump(struct fadump_mem_struct *fdm)
+{
+ int rc = 0;
+ unsigned int wait_time;
+
+ pr_debug("Invalidating firmware-assisted dump registration\n");
+
+ /* TODO: Add upper time limit for the delay */
+ do {
+ rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
+ FADUMP_INVALIDATE, fdm,
+ sizeof(struct fadump_mem_struct));
+
+ wait_time = rtas_busy_delay_time(rc);
+ if (wait_time)
+ mdelay(wait_time);
+ } while (wait_time);
+
+ if (rc) {
+ printk(KERN_ERR "Failed to invalidate firmware-assisted dump "
+ "rgistration. unexpected error(%d).\n", rc);
+ return rc;
+ }
+ fw_dump.dump_active = 0;
+ fdm_active = NULL;
+ return 0;
+}
+
+void fadump_cleanup(void)
+{
+ /* Invalidate the registration only if dump is active. */
+ if (fw_dump.dump_active) {
+ init_fadump_mem_struct(&fdm,
+ fdm_active->cpu_state_data.destination_address);
+ fadump_invalidate_dump(&fdm);
+ }
+}
+
+/*
+ * Release the memory that was reserved in early boot to preserve the memory
+ * contents. The released memory will be available for general use.
+ */
+static void fadump_release_memory(unsigned long begin, unsigned long end)
+{
+ unsigned long addr;
+ unsigned long ra_start, ra_end;
+
+ ra_start = fw_dump.reserve_dump_area_start;
+ ra_end = ra_start + fw_dump.reserve_dump_area_size;
+
+ for (addr = begin; addr < end; addr += PAGE_SIZE) {
+ /*
+ * exclude the dump reserve area. Will reuse it for next
+ * fadump registration.
+ */
+ if (addr <= ra_end && ((addr + PAGE_SIZE) > ra_start))
+ continue;
+
+ ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT));
+ init_page_count(pfn_to_page(addr >> PAGE_SHIFT));
+ free_page((unsigned long)__va(addr));
+ totalram_pages++;
+ }
+}
+
+static void fadump_invalidate_release_mem(void)
+{
+ unsigned long reserved_area_start, reserved_area_end;
+ unsigned long destination_address;
+
+ mutex_lock(&fadump_mutex);
+ if (!fw_dump.dump_active) {
+ mutex_unlock(&fadump_mutex);
+ return;
+ }
+
+ destination_address = fdm_active->cpu_state_data.destination_address;
+ fadump_cleanup();
+ mutex_unlock(&fadump_mutex);
+
+ /*
+ * Save the current reserved memory bounds we will require them
+ * later for releasing the memory for general use.
+ */
+ reserved_area_start = fw_dump.reserve_dump_area_start;
+ reserved_area_end = reserved_area_start +
+ fw_dump.reserve_dump_area_size;
+ /*
+ * Setup reserve_dump_area_start and its size so that we can
+ * reuse this reserved memory for Re-registration.
+ */
+ fw_dump.reserve_dump_area_start = destination_address;
+ fw_dump.reserve_dump_area_size = get_fadump_area_size();
+
+ fadump_release_memory(reserved_area_start, reserved_area_end);
+ if (fw_dump.cpu_notes_buf) {
+ fadump_cpu_notes_buf_free(
+ (unsigned long)__va(fw_dump.cpu_notes_buf),
+ fw_dump.cpu_notes_buf_size);
+ fw_dump.cpu_notes_buf = 0;
+ fw_dump.cpu_notes_buf_size = 0;
+ }
+ /* Initialize the kernel dump memory structure for FAD registration. */
+ init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
+}
+
+static ssize_t fadump_release_memory_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ if (!fw_dump.dump_active)
+ return -EPERM;
+
+ if (buf[0] == '1') {
+ /*
+ * Take away the '/proc/vmcore'. We are releasing the dump
+ * memory, hence it will not be valid anymore.
+ */
+ vmcore_cleanup();
+ fadump_invalidate_release_mem();
+
+ } else
+ return -EINVAL;
+ return count;
+}
+
+static ssize_t fadump_enabled_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", fw_dump.fadump_enabled);
+}
+
+static ssize_t fadump_register_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", fw_dump.dump_registered);
+}
+
+static ssize_t fadump_register_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ int ret = 0;
+
+ if (!fw_dump.fadump_enabled || fdm_active)
+ return -EPERM;
+
+ mutex_lock(&fadump_mutex);
+
+ switch (buf[0]) {
+ case '0':
+ if (fw_dump.dump_registered == 0) {
+ ret = -EINVAL;
+ goto unlock_out;
+ }
+ /* Un-register Firmware-assisted dump */
+ fadump_unregister_dump(&fdm);
+ break;
+ case '1':
+ if (fw_dump.dump_registered == 1) {
+ ret = -EINVAL;
+ goto unlock_out;
+ }
+ /* Register Firmware-assisted dump */
+ register_fadump();
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+unlock_out:
+ mutex_unlock(&fadump_mutex);
+ return ret < 0 ? ret : count;
+}
+
+static int fadump_region_show(struct seq_file *m, void *private)
+{
+ const struct fadump_mem_struct *fdm_ptr;
+
+ if (!fw_dump.fadump_enabled)
+ return 0;
+
+ mutex_lock(&fadump_mutex);
+ if (fdm_active)
+ fdm_ptr = fdm_active;
+ else {
+ mutex_unlock(&fadump_mutex);
+ fdm_ptr = &fdm;
+ }
+
+ seq_printf(m,
+ "CPU : [%#016llx-%#016llx] %#llx bytes, "
+ "Dumped: %#llx\n",
+ fdm_ptr->cpu_state_data.destination_address,
+ fdm_ptr->cpu_state_data.destination_address +
+ fdm_ptr->cpu_state_data.source_len - 1,
+ fdm_ptr->cpu_state_data.source_len,
+ fdm_ptr->cpu_state_data.bytes_dumped);
+ seq_printf(m,
+ "HPTE: [%#016llx-%#016llx] %#llx bytes, "
+ "Dumped: %#llx\n",
+ fdm_ptr->hpte_region.destination_address,
+ fdm_ptr->hpte_region.destination_address +
+ fdm_ptr->hpte_region.source_len - 1,
+ fdm_ptr->hpte_region.source_len,
+ fdm_ptr->hpte_region.bytes_dumped);
+ seq_printf(m,
+ "DUMP: [%#016llx-%#016llx] %#llx bytes, "
+ "Dumped: %#llx\n",
+ fdm_ptr->rmr_region.destination_address,
+ fdm_ptr->rmr_region.destination_address +
+ fdm_ptr->rmr_region.source_len - 1,
+ fdm_ptr->rmr_region.source_len,
+ fdm_ptr->rmr_region.bytes_dumped);
+
+ if (!fdm_active ||
+ (fw_dump.reserve_dump_area_start ==
+ fdm_ptr->cpu_state_data.destination_address))
+ goto out;
+
+ /* Dump is active. Show reserved memory region. */
+ seq_printf(m,
+ " : [%#016llx-%#016llx] %#llx bytes, "
+ "Dumped: %#llx\n",
+ (unsigned long long)fw_dump.reserve_dump_area_start,
+ fdm_ptr->cpu_state_data.destination_address - 1,
+ fdm_ptr->cpu_state_data.destination_address -
+ fw_dump.reserve_dump_area_start,
+ fdm_ptr->cpu_state_data.destination_address -
+ fw_dump.reserve_dump_area_start);
+out:
+ if (fdm_active)
+ mutex_unlock(&fadump_mutex);
+ return 0;
+}
+
+static struct kobj_attribute fadump_release_attr = __ATTR(fadump_release_mem,
+ 0200, NULL,
+ fadump_release_memory_store);
+static struct kobj_attribute fadump_attr = __ATTR(fadump_enabled,
+ 0444, fadump_enabled_show,
+ NULL);
+static struct kobj_attribute fadump_register_attr = __ATTR(fadump_registered,
+ 0644, fadump_register_show,
+ fadump_register_store);
+
+static int fadump_region_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, fadump_region_show, inode->i_private);
+}
+
+static const struct file_operations fadump_region_fops = {
+ .open = fadump_region_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static void fadump_init_files(void)
+{
+ struct dentry *debugfs_file;
+ int rc = 0;
+
+ rc = sysfs_create_file(kernel_kobj, &fadump_attr.attr);
+ if (rc)
+ printk(KERN_ERR "fadump: unable to create sysfs file"
+ " fadump_enabled (%d)\n", rc);
+
+ rc = sysfs_create_file(kernel_kobj, &fadump_register_attr.attr);
+ if (rc)
+ printk(KERN_ERR "fadump: unable to create sysfs file"
+ " fadump_registered (%d)\n", rc);
+
+ debugfs_file = debugfs_create_file("fadump_region", 0444,
+ powerpc_debugfs_root, NULL,
+ &fadump_region_fops);
+ if (!debugfs_file)
+ printk(KERN_ERR "fadump: unable to create debugfs file"
+ " fadump_region\n");
+
+ if (fw_dump.dump_active) {
+ rc = sysfs_create_file(kernel_kobj, &fadump_release_attr.attr);
+ if (rc)
+ printk(KERN_ERR "fadump: unable to create sysfs file"
+ " fadump_release_mem (%d)\n", rc);
+ }
+ return;
+}
+
+/*
+ * Prepare for firmware-assisted dump.
+ */
+int __init setup_fadump(void)
+{
+ if (!fw_dump.fadump_enabled)
+ return 0;
+
+ if (!fw_dump.fadump_supported) {
+ printk(KERN_ERR "Firmware-assisted dump is not supported on"
+ " this hardware\n");
+ return 0;
+ }
+
+ fadump_show_config();
+ /*
+ * If dump data is available then see if it is valid and prepare for
+ * saving it to the disk.
+ */
+ if (fw_dump.dump_active) {
+ /*
+ * if dump process fails then invalidate the registration
+ * and release memory before proceeding for re-registration.
+ */
+ if (process_fadump(fdm_active) < 0)
+ fadump_invalidate_release_mem();
+ }
+ /* Initialize the kernel dump memory structure for FAD registration. */
+ else if (fw_dump.reserve_dump_area_size)
+ init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
+ fadump_init_files();
+
+ return 1;
+}
+subsys_initcall(setup_fadump);
diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c
new file mode 100644
index 00000000..2eae4478
--- /dev/null
+++ b/arch/powerpc/kernel/firmware.c
@@ -0,0 +1,22 @@
+/*
+ * Extracted from cputable.c
+ *
+ * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * Modifications for ppc64:
+ * Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
+ * Copyright (C) 2005 Stephen Rothwell, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/export.h>
+#include <linux/cache.h>
+
+#include <asm/firmware.h>
+
+unsigned long powerpc_firmware_features __read_mostly;
+EXPORT_SYMBOL_GPL(powerpc_firmware_features);
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
new file mode 100644
index 00000000..de369558
--- /dev/null
+++ b/arch/powerpc/kernel/fpu.S
@@ -0,0 +1,177 @@
+/*
+ * FPU support code, moved here from head.S so that it can be used
+ * by chips which use other head-whatever.S files.
+ *
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
+ * Copyright (C) 1996 Paul Mackerras.
+ * Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/pgtable.h>
+#include <asm/cputable.h>
+#include <asm/cache.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
+
+#ifdef CONFIG_VSX
+#define REST_32FPVSRS(n,c,base) \
+BEGIN_FTR_SECTION \
+ b 2f; \
+END_FTR_SECTION_IFSET(CPU_FTR_VSX); \
+ REST_32FPRS(n,base); \
+ b 3f; \
+2: REST_32VSRS(n,c,base); \
+3:
+
+#define SAVE_32FPVSRS(n,c,base) \
+BEGIN_FTR_SECTION \
+ b 2f; \
+END_FTR_SECTION_IFSET(CPU_FTR_VSX); \
+ SAVE_32FPRS(n,base); \
+ b 3f; \
+2: SAVE_32VSRS(n,c,base); \
+3:
+#else
+#define REST_32FPVSRS(n,b,base) REST_32FPRS(n, base)
+#define SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base)
+#endif
+
+/*
+ * This task wants to use the FPU now.
+ * On UP, disable FP for the task which had the FPU previously,
+ * and save its floating-point registers in its thread_struct.
+ * Load up this task's FP registers from its thread_struct,
+ * enable the FPU for the current task and return to the task.
+ */
+_GLOBAL(load_up_fpu)
+ mfmsr r5
+ ori r5,r5,MSR_FP
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+ oris r5,r5,MSR_VSX@h
+END_FTR_SECTION_IFSET(CPU_FTR_VSX)
+#endif
+ SYNC
+ MTMSRD(r5) /* enable use of fpu now */
+ isync
+/*
+ * For SMP, we don't do lazy FPU switching because it just gets too
+ * horrendously complex, especially when a task switches from one CPU
+ * to another. Instead we call giveup_fpu in switch_to.
+ */
+#ifndef CONFIG_SMP
+ LOAD_REG_ADDRBASE(r3, last_task_used_math)
+ toreal(r3)
+ PPC_LL r4,ADDROFF(last_task_used_math)(r3)
+ PPC_LCMPI 0,r4,0
+ beq 1f
+ toreal(r4)
+ addi r4,r4,THREAD /* want last_task_used_math->thread */
+ SAVE_32FPVSRS(0, r5, r4)
+ mffs fr0
+ stfd fr0,THREAD_FPSCR(r4)
+ PPC_LL r5,PT_REGS(r4)
+ toreal(r5)
+ PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+ li r10,MSR_FP|MSR_FE0|MSR_FE1
+ andc r4,r4,r10 /* disable FP for previous task */
+ PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#endif /* CONFIG_SMP */
+ /* enable use of FP after return */
+#ifdef CONFIG_PPC32
+ mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */
+ lwz r4,THREAD_FPEXC_MODE(r5)
+ ori r9,r9,MSR_FP /* enable FP for current */
+ or r9,r9,r4
+#else
+ ld r4,PACACURRENT(r13)
+ addi r5,r4,THREAD /* Get THREAD */
+ lwz r4,THREAD_FPEXC_MODE(r5)
+ ori r12,r12,MSR_FP
+ or r12,r12,r4
+ std r12,_MSR(r1)
+#endif
+ lfd fr0,THREAD_FPSCR(r5)
+ MTFSF_L(fr0)
+ REST_32FPVSRS(0, r4, r5)
+#ifndef CONFIG_SMP
+ subi r4,r5,THREAD
+ fromreal(r4)
+ PPC_STL r4,ADDROFF(last_task_used_math)(r3)
+#endif /* CONFIG_SMP */
+ /* restore registers and return */
+ /* we haven't used ctr or xer or lr */
+ blr
+
+/*
+ * giveup_fpu(tsk)
+ * Disable FP for the task given as the argument,
+ * and save the floating-point registers in its thread_struct.
+ * Enables the FPU for use in the kernel on return.
+ */
+_GLOBAL(giveup_fpu)
+ mfmsr r5
+ ori r5,r5,MSR_FP
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+ oris r5,r5,MSR_VSX@h
+END_FTR_SECTION_IFSET(CPU_FTR_VSX)
+#endif
+ SYNC_601
+ ISYNC_601
+ MTMSRD(r5) /* enable use of fpu now */
+ SYNC_601
+ isync
+ PPC_LCMPI 0,r3,0
+ beqlr- /* if no previous owner, done */
+ addi r3,r3,THREAD /* want THREAD of task */
+ PPC_LL r5,PT_REGS(r3)
+ PPC_LCMPI 0,r5,0
+ SAVE_32FPVSRS(0, r4 ,r3)
+ mffs fr0
+ stfd fr0,THREAD_FPSCR(r3)
+ beq 1f
+ PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+ li r3,MSR_FP|MSR_FE0|MSR_FE1
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+ oris r3,r3,MSR_VSX@h
+END_FTR_SECTION_IFSET(CPU_FTR_VSX)
+#endif
+ andc r4,r4,r3 /* disable FP for previous task */
+ PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#ifndef CONFIG_SMP
+ li r5,0
+ LOAD_REG_ADDRBASE(r4,last_task_used_math)
+ PPC_STL r5,ADDROFF(last_task_used_math)(r4)
+#endif /* CONFIG_SMP */
+ blr
+
+/*
+ * These are used in the alignment trap handler when emulating
+ * single-precision loads and stores.
+ */
+
+_GLOBAL(cvt_fd)
+ lfs 0,0(r3)
+ stfd 0,0(r4)
+ blr
+
+_GLOBAL(cvt_df)
+ lfd 0,0(r3)
+ stfs 0,0(r4)
+ blr
diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
new file mode 100644
index 00000000..a92c79be
--- /dev/null
+++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
@@ -0,0 +1,235 @@
+
+/* 1. Find the index of the entry we're executing in */
+ bl invstr /* Find our address */
+invstr: mflr r6 /* Make it accessible */
+ mfmsr r7
+ rlwinm r4,r7,27,31,31 /* extract MSR[IS] */
+ mfspr r7, SPRN_PID0
+ slwi r7,r7,16
+ or r7,r7,r4
+ mtspr SPRN_MAS6,r7
+ tlbsx 0,r6 /* search MSR[IS], SPID=PID0 */
+ mfspr r7,SPRN_MAS1
+ andis. r7,r7,MAS1_VALID@h
+ bne match_TLB
+
+ mfspr r7,SPRN_MMUCFG
+ rlwinm r7,r7,21,28,31 /* extract MMUCFG[NPIDS] */
+ cmpwi r7,3
+ bne match_TLB /* skip if NPIDS != 3 */
+
+ mfspr r7,SPRN_PID1
+ slwi r7,r7,16
+ or r7,r7,r4
+ mtspr SPRN_MAS6,r7
+ tlbsx 0,r6 /* search MSR[IS], SPID=PID1 */
+ mfspr r7,SPRN_MAS1
+ andis. r7,r7,MAS1_VALID@h
+ bne match_TLB
+ mfspr r7, SPRN_PID2
+ slwi r7,r7,16
+ or r7,r7,r4
+ mtspr SPRN_MAS6,r7
+ tlbsx 0,r6 /* Fall through, we had to match */
+
+match_TLB:
+ mfspr r7,SPRN_MAS0
+ rlwinm r3,r7,16,20,31 /* Extract MAS0(Entry) */
+
+ mfspr r7,SPRN_MAS1 /* Insure IPROT set */
+ oris r7,r7,MAS1_IPROT@h
+ mtspr SPRN_MAS1,r7
+ tlbwe
+
+/* 2. Invalidate all entries except the entry we're executing in */
+ mfspr r9,SPRN_TLB1CFG
+ andi. r9,r9,0xfff
+ li r6,0 /* Set Entry counter to 0 */
+1: lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */
+ rlwimi r7,r6,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r6) */
+ mtspr SPRN_MAS0,r7
+ tlbre
+ mfspr r7,SPRN_MAS1
+ rlwinm r7,r7,0,2,31 /* Clear MAS1 Valid and IPROT */
+ cmpw r3,r6
+ beq skpinv /* Dont update the current execution TLB */
+ mtspr SPRN_MAS1,r7
+ tlbwe
+ isync
+skpinv: addi r6,r6,1 /* Increment */
+ cmpw r6,r9 /* Are we done? */
+ bne 1b /* If not, repeat */
+
+ /* Invalidate TLB0 */
+ li r6,0x04
+ tlbivax 0,r6
+ TLBSYNC
+ /* Invalidate TLB1 */
+ li r6,0x0c
+ tlbivax 0,r6
+ TLBSYNC
+
+/* 3. Setup a temp mapping and jump to it */
+ andi. r5, r3, 0x1 /* Find an entry not used and is non-zero */
+ addi r5, r5, 0x1
+ lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */
+ rlwimi r7,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */
+ mtspr SPRN_MAS0,r7
+ tlbre
+
+ /* grab and fixup the RPN */
+ mfspr r6,SPRN_MAS1 /* extract MAS1[SIZE] */
+ rlwinm r6,r6,25,27,31
+ li r8,-1
+ addi r6,r6,10
+ slw r6,r8,r6 /* convert to mask */
+
+ bl 1f /* Find our address */
+1: mflr r7
+
+ mfspr r8,SPRN_MAS3
+#ifdef CONFIG_PHYS_64BIT
+ mfspr r23,SPRN_MAS7
+#endif
+ and r8,r6,r8
+ subfic r9,r6,-4096
+ and r9,r9,r7
+
+ or r25,r8,r9
+ ori r8,r25,(MAS3_SX|MAS3_SW|MAS3_SR)
+
+ /* Just modify the entry ID and EPN for the temp mapping */
+ lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */
+ rlwimi r7,r5,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r5) */
+ mtspr SPRN_MAS0,r7
+ xori r6,r4,1 /* Setup TMP mapping in the other Address space */
+ slwi r6,r6,12
+ oris r6,r6,(MAS1_VALID|MAS1_IPROT)@h
+ ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_4K))@l
+ mtspr SPRN_MAS1,r6
+ mfspr r6,SPRN_MAS2
+ li r7,0 /* temp EPN = 0 */
+ rlwimi r7,r6,0,20,31
+ mtspr SPRN_MAS2,r7
+ mtspr SPRN_MAS3,r8
+ tlbwe
+
+ xori r6,r4,1
+ slwi r6,r6,5 /* setup new context with other address space */
+ bl 1f /* Find our address */
+1: mflr r9
+ rlwimi r7,r9,0,20,31
+ addi r7,r7,(2f - 1b)
+ mtspr SPRN_SRR0,r7
+ mtspr SPRN_SRR1,r6
+ rfi
+2:
+/* 4. Clear out PIDs & Search info */
+ li r6,0
+ mtspr SPRN_MAS6,r6
+ mtspr SPRN_PID0,r6
+
+ mfspr r7,SPRN_MMUCFG
+ rlwinm r7,r7,21,28,31 /* extract MMUCFG[NPIDS] */
+ cmpwi r7,3
+ bne 2f /* skip if NPIDS != 3 */
+
+ mtspr SPRN_PID1,r6
+ mtspr SPRN_PID2,r6
+
+/* 5. Invalidate mapping we started in */
+2:
+ lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */
+ rlwimi r7,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */
+ mtspr SPRN_MAS0,r7
+ tlbre
+ mfspr r6,SPRN_MAS1
+ rlwinm r6,r6,0,2,0 /* clear IPROT */
+ mtspr SPRN_MAS1,r6
+ tlbwe
+ /* Invalidate TLB1 */
+ li r9,0x0c
+ tlbivax 0,r9
+ TLBSYNC
+
+/* The mapping only needs to be cache-coherent on SMP */
+#ifdef CONFIG_SMP
+#define M_IF_SMP MAS2_M
+#else
+#define M_IF_SMP 0
+#endif
+
+#if defined(ENTRY_MAPPING_BOOT_SETUP)
+
+/* 6. Setup KERNELBASE mapping in TLB1[0] */
+ lis r6,0x1000 /* Set MAS0(TLBSEL) = TLB1(1), ESEL = 0 */
+ mtspr SPRN_MAS0,r6
+ lis r6,(MAS1_VALID|MAS1_IPROT)@h
+ ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_64M))@l
+ mtspr SPRN_MAS1,r6
+ lis r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_SMP)@h
+ ori r6,r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_SMP)@l
+ mtspr SPRN_MAS2,r6
+ mtspr SPRN_MAS3,r8
+ tlbwe
+
+/* 7. Jump to KERNELBASE mapping */
+ lis r6,(KERNELBASE & ~0xfff)@h
+ ori r6,r6,(KERNELBASE & ~0xfff)@l
+
+#elif defined(ENTRY_MAPPING_KEXEC_SETUP)
+/*
+ * 6. Setup a 1:1 mapping in TLB1. Esel 0 is unsued, 1 or 2 contains the tmp
+ * mapping so we start at 3. We setup 8 mappings, each 256MiB in size. This
+ * will cover the first 2GiB of memory.
+ */
+
+ lis r10, (MAS1_VALID|MAS1_IPROT)@h
+ ori r10,r10, (MAS1_TSIZE(BOOK3E_PAGESZ_256M))@l
+ li r11, 0
+ li r0, 8
+ mtctr r0
+
+next_tlb_setup:
+ addi r0, r11, 3
+ rlwinm r0, r0, 16, 4, 15 // Compute esel
+ rlwinm r9, r11, 28, 0, 3 // Compute [ER]PN
+ oris r0, r0, (MAS0_TLBSEL(1))@h
+ mtspr SPRN_MAS0,r0
+ mtspr SPRN_MAS1,r10
+ mtspr SPRN_MAS2,r9
+ ori r9, r9, (MAS3_SX|MAS3_SW|MAS3_SR)
+ mtspr SPRN_MAS3,r9
+ tlbwe
+ addi r11, r11, 1
+ bdnz+ next_tlb_setup
+
+/* 7. Jump to our 1:1 mapping */
+ mr r6, r25
+#else
+ #error You need to specify the mapping or not use this at all.
+#endif
+
+ lis r7,MSR_KERNEL@h
+ ori r7,r7,MSR_KERNEL@l
+ bl 1f /* Find our address */
+1: mflr r9
+ rlwimi r6,r9,0,20,31
+ addi r6,r6,(2f - 1b)
+ mtspr SPRN_SRR0,r6
+ mtspr SPRN_SRR1,r7
+ rfi /* start execution out of TLB1[0] entry */
+
+/* 8. Clear out the temp mapping */
+2: lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */
+ rlwimi r7,r5,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r5) */
+ mtspr SPRN_MAS0,r7
+ tlbre
+ mfspr r8,SPRN_MAS1
+ rlwinm r8,r8,0,2,0 /* clear IPROT */
+ mtspr SPRN_MAS1,r8
+ tlbwe
+ /* Invalidate TLB1 */
+ li r9,0x0c
+ tlbivax 0,r9
+ TLBSYNC
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
new file mode 100644
index 00000000..bf99cfa6
--- /dev/null
+++ b/arch/powerpc/kernel/ftrace.c
@@ -0,0 +1,610 @@
+/*
+ * Code for replacing ftrace calls with jumps.
+ *
+ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
+ *
+ * Thanks goes out to P.A. Semi, Inc for supplying me with a PPC64 box.
+ *
+ * Added function graph tracer code, taken from x86 that was written
+ * by Frederic Weisbecker, and ported to PPC by Steven Rostedt.
+ *
+ */
+
+#include <linux/spinlock.h>
+#include <linux/hardirq.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/ftrace.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/list.h>
+
+#include <asm/cacheflush.h>
+#include <asm/code-patching.h>
+#include <asm/ftrace.h>
+#include <asm/syscall.h>
+
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+static unsigned int
+ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
+{
+ unsigned int op;
+
+ addr = ppc_function_entry((void *)addr);
+
+ /* if (link) set op to 'bl' else 'b' */
+ op = create_branch((unsigned int *)ip, addr, link ? 1 : 0);
+
+ return op;
+}
+
+static int
+ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new)
+{
+ unsigned int replaced;
+
+ /*
+ * Note: Due to modules and __init, code can
+ * disappear and change, we need to protect against faulting
+ * as well as code changing. We do this by using the
+ * probe_kernel_* functions.
+ *
+ * No real locking needed, this code is run through
+ * kstop_machine, or before SMP starts.
+ */
+
+ /* read the text we want to modify */
+ if (probe_kernel_read(&replaced, (void *)ip, MCOUNT_INSN_SIZE))
+ return -EFAULT;
+
+ /* Make sure it is what we expect it to be */
+ if (replaced != old)
+ return -EINVAL;
+
+ /* replace the text with the new text */
+ if (probe_kernel_write((void *)ip, &new, MCOUNT_INSN_SIZE))
+ return -EPERM;
+
+ flush_icache_range(ip, ip + 8);
+
+ return 0;
+}
+
+/*
+ * Helper functions that are the same for both PPC64 and PPC32.
+ */
+static int test_24bit_addr(unsigned long ip, unsigned long addr)
+{
+
+ /* use the create_branch to verify that this offset can be branched */
+ return create_branch((unsigned int *)ip, addr, 0);
+}
+
+#ifdef CONFIG_MODULES
+
+static int is_bl_op(unsigned int op)
+{
+ return (op & 0xfc000003) == 0x48000001;
+}
+
+static unsigned long find_bl_target(unsigned long ip, unsigned int op)
+{
+ static int offset;
+
+ offset = (op & 0x03fffffc);
+ /* make it signed */
+ if (offset & 0x02000000)
+ offset |= 0xfe000000;
+
+ return ip + (long)offset;
+}
+
+#ifdef CONFIG_PPC64
+static int
+__ftrace_make_nop(struct module *mod,
+ struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned int op;
+ unsigned int jmp[5];
+ unsigned long ptr;
+ unsigned long ip = rec->ip;
+ unsigned long tramp;
+ int offset;
+
+ /* read where this goes */
+ if (probe_kernel_read(&op, (void *)ip, sizeof(int)))
+ return -EFAULT;
+
+ /* Make sure that that this is still a 24bit jump */
+ if (!is_bl_op(op)) {
+ printk(KERN_ERR "Not expected bl: opcode is %x\n", op);
+ return -EINVAL;
+ }
+
+ /* lets find where the pointer goes */
+ tramp = find_bl_target(ip, op);
+
+ /*
+ * On PPC64 the trampoline looks like:
+ * 0x3d, 0x82, 0x00, 0x00, addis r12,r2, <high>
+ * 0x39, 0x8c, 0x00, 0x00, addi r12,r12, <low>
+ * Where the bytes 2,3,6 and 7 make up the 32bit offset
+ * to the TOC that holds the pointer.
+ * to jump to.
+ * 0xf8, 0x41, 0x00, 0x28, std r2,40(r1)
+ * 0xe9, 0x6c, 0x00, 0x20, ld r11,32(r12)
+ * The actually address is 32 bytes from the offset
+ * into the TOC.
+ * 0xe8, 0x4c, 0x00, 0x28, ld r2,40(r12)
+ */
+
+ pr_devel("ip:%lx jumps to %lx r2: %lx", ip, tramp, mod->arch.toc);
+
+ /* Find where the trampoline jumps to */
+ if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) {
+ printk(KERN_ERR "Failed to read %lx\n", tramp);
+ return -EFAULT;
+ }
+
+ pr_devel(" %08x %08x", jmp[0], jmp[1]);
+
+ /* verify that this is what we expect it to be */
+ if (((jmp[0] & 0xffff0000) != 0x3d820000) ||
+ ((jmp[1] & 0xffff0000) != 0x398c0000) ||
+ (jmp[2] != 0xf8410028) ||
+ (jmp[3] != 0xe96c0020) ||
+ (jmp[4] != 0xe84c0028)) {
+ printk(KERN_ERR "Not a trampoline\n");
+ return -EINVAL;
+ }
+
+ /* The bottom half is signed extended */
+ offset = ((unsigned)((unsigned short)jmp[0]) << 16) +
+ (int)((short)jmp[1]);
+
+ pr_devel(" %x ", offset);
+
+ /* get the address this jumps too */
+ tramp = mod->arch.toc + offset + 32;
+ pr_devel("toc: %lx", tramp);
+
+ if (probe_kernel_read(jmp, (void *)tramp, 8)) {
+ printk(KERN_ERR "Failed to read %lx\n", tramp);
+ return -EFAULT;
+ }
+
+ pr_devel(" %08x %08x\n", jmp[0], jmp[1]);
+
+ ptr = ((unsigned long)jmp[0] << 32) + jmp[1];
+
+ /* This should match what was called */
+ if (ptr != ppc_function_entry((void *)addr)) {
+ printk(KERN_ERR "addr does not match %lx\n", ptr);
+ return -EINVAL;
+ }
+
+ /*
+ * We want to nop the line, but the next line is
+ * 0xe8, 0x41, 0x00, 0x28 ld r2,40(r1)
+ * This needs to be turned to a nop too.
+ */
+ if (probe_kernel_read(&op, (void *)(ip+4), MCOUNT_INSN_SIZE))
+ return -EFAULT;
+
+ if (op != 0xe8410028) {
+ printk(KERN_ERR "Next line is not ld! (%08x)\n", op);
+ return -EINVAL;
+ }
+
+ /*
+ * Milton Miller pointed out that we can not blindly do nops.
+ * If a task was preempted when calling a trace function,
+ * the nops will remove the way to restore the TOC in r2
+ * and the r2 TOC will get corrupted.
+ */
+
+ /*
+ * Replace:
+ * bl <tramp> <==== will be replaced with "b 1f"
+ * ld r2,40(r1)
+ * 1:
+ */
+ op = 0x48000008; /* b +8 */
+
+ if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE))
+ return -EPERM;
+
+
+ flush_icache_range(ip, ip + 8);
+
+ return 0;
+}
+
+#else /* !PPC64 */
+static int
+__ftrace_make_nop(struct module *mod,
+ struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned int op;
+ unsigned int jmp[4];
+ unsigned long ip = rec->ip;
+ unsigned long tramp;
+
+ if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE))
+ return -EFAULT;
+
+ /* Make sure that that this is still a 24bit jump */
+ if (!is_bl_op(op)) {
+ printk(KERN_ERR "Not expected bl: opcode is %x\n", op);
+ return -EINVAL;
+ }
+
+ /* lets find where the pointer goes */
+ tramp = find_bl_target(ip, op);
+
+ /*
+ * On PPC32 the trampoline looks like:
+ * 0x3d, 0x60, 0x00, 0x00 lis r11,sym@ha
+ * 0x39, 0x6b, 0x00, 0x00 addi r11,r11,sym@l
+ * 0x7d, 0x69, 0x03, 0xa6 mtctr r11
+ * 0x4e, 0x80, 0x04, 0x20 bctr
+ */
+
+ pr_devel("ip:%lx jumps to %lx", ip, tramp);
+
+ /* Find where the trampoline jumps to */
+ if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) {
+ printk(KERN_ERR "Failed to read %lx\n", tramp);
+ return -EFAULT;
+ }
+
+ pr_devel(" %08x %08x ", jmp[0], jmp[1]);
+
+ /* verify that this is what we expect it to be */
+ if (((jmp[0] & 0xffff0000) != 0x3d600000) ||
+ ((jmp[1] & 0xffff0000) != 0x396b0000) ||
+ (jmp[2] != 0x7d6903a6) ||
+ (jmp[3] != 0x4e800420)) {
+ printk(KERN_ERR "Not a trampoline\n");
+ return -EINVAL;
+ }
+
+ tramp = (jmp[1] & 0xffff) |
+ ((jmp[0] & 0xffff) << 16);
+ if (tramp & 0x8000)
+ tramp -= 0x10000;
+
+ pr_devel(" %lx ", tramp);
+
+ if (tramp != addr) {
+ printk(KERN_ERR
+ "Trampoline location %08lx does not match addr\n",
+ tramp);
+ return -EINVAL;
+ }
+
+ op = PPC_INST_NOP;
+
+ if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE))
+ return -EPERM;
+
+ flush_icache_range(ip, ip + 8);
+
+ return 0;
+}
+#endif /* PPC64 */
+#endif /* CONFIG_MODULES */
+
+int ftrace_make_nop(struct module *mod,
+ struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long ip = rec->ip;
+ unsigned int old, new;
+
+ /*
+ * If the calling address is more that 24 bits away,
+ * then we had to use a trampoline to make the call.
+ * Otherwise just update the call site.
+ */
+ if (test_24bit_addr(ip, addr)) {
+ /* within range */
+ old = ftrace_call_replace(ip, addr, 1);
+ new = PPC_INST_NOP;
+ return ftrace_modify_code(ip, old, new);
+ }
+
+#ifdef CONFIG_MODULES
+ /*
+ * Out of range jumps are called from modules.
+ * We should either already have a pointer to the module
+ * or it has been passed in.
+ */
+ if (!rec->arch.mod) {
+ if (!mod) {
+ printk(KERN_ERR "No module loaded addr=%lx\n",
+ addr);
+ return -EFAULT;
+ }
+ rec->arch.mod = mod;
+ } else if (mod) {
+ if (mod != rec->arch.mod) {
+ printk(KERN_ERR
+ "Record mod %p not equal to passed in mod %p\n",
+ rec->arch.mod, mod);
+ return -EINVAL;
+ }
+ /* nothing to do if mod == rec->arch.mod */
+ } else
+ mod = rec->arch.mod;
+
+ return __ftrace_make_nop(mod, rec, addr);
+#else
+ /* We should not get here without modules */
+ return -EINVAL;
+#endif /* CONFIG_MODULES */
+}
+
+#ifdef CONFIG_MODULES
+#ifdef CONFIG_PPC64
+static int
+__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned int op[2];
+ unsigned long ip = rec->ip;
+
+ /* read where this goes */
+ if (probe_kernel_read(op, (void *)ip, MCOUNT_INSN_SIZE * 2))
+ return -EFAULT;
+
+ /*
+ * It should be pointing to two nops or
+ * b +8; ld r2,40(r1)
+ */
+ if (((op[0] != 0x48000008) || (op[1] != 0xe8410028)) &&
+ ((op[0] != PPC_INST_NOP) || (op[1] != PPC_INST_NOP))) {
+ printk(KERN_ERR "Expected NOPs but have %x %x\n", op[0], op[1]);
+ return -EINVAL;
+ }
+
+ /* If we never set up a trampoline to ftrace_caller, then bail */
+ if (!rec->arch.mod->arch.tramp) {
+ printk(KERN_ERR "No ftrace trampoline\n");
+ return -EINVAL;
+ }
+
+ /* create the branch to the trampoline */
+ op[0] = create_branch((unsigned int *)ip,
+ rec->arch.mod->arch.tramp, BRANCH_SET_LINK);
+ if (!op[0]) {
+ printk(KERN_ERR "REL24 out of range!\n");
+ return -EINVAL;
+ }
+
+ /* ld r2,40(r1) */
+ op[1] = 0xe8410028;
+
+ pr_devel("write to %lx\n", rec->ip);
+
+ if (probe_kernel_write((void *)ip, op, MCOUNT_INSN_SIZE * 2))
+ return -EPERM;
+
+ flush_icache_range(ip, ip + 8);
+
+ return 0;
+}
+#else
+static int
+__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned int op;
+ unsigned long ip = rec->ip;
+
+ /* read where this goes */
+ if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE))
+ return -EFAULT;
+
+ /* It should be pointing to a nop */
+ if (op != PPC_INST_NOP) {
+ printk(KERN_ERR "Expected NOP but have %x\n", op);
+ return -EINVAL;
+ }
+
+ /* If we never set up a trampoline to ftrace_caller, then bail */
+ if (!rec->arch.mod->arch.tramp) {
+ printk(KERN_ERR "No ftrace trampoline\n");
+ return -EINVAL;
+ }
+
+ /* create the branch to the trampoline */
+ op = create_branch((unsigned int *)ip,
+ rec->arch.mod->arch.tramp, BRANCH_SET_LINK);
+ if (!op) {
+ printk(KERN_ERR "REL24 out of range!\n");
+ return -EINVAL;
+ }
+
+ pr_devel("write to %lx\n", rec->ip);
+
+ if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE))
+ return -EPERM;
+
+ flush_icache_range(ip, ip + 8);
+
+ return 0;
+}
+#endif /* CONFIG_PPC64 */
+#endif /* CONFIG_MODULES */
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long ip = rec->ip;
+ unsigned int old, new;
+
+ /*
+ * If the calling address is more that 24 bits away,
+ * then we had to use a trampoline to make the call.
+ * Otherwise just update the call site.
+ */
+ if (test_24bit_addr(ip, addr)) {
+ /* within range */
+ old = PPC_INST_NOP;
+ new = ftrace_call_replace(ip, addr, 1);
+ return ftrace_modify_code(ip, old, new);
+ }
+
+#ifdef CONFIG_MODULES
+ /*
+ * Out of range jumps are called from modules.
+ * Being that we are converting from nop, it had better
+ * already have a module defined.
+ */
+ if (!rec->arch.mod) {
+ printk(KERN_ERR "No module loaded\n");
+ return -EINVAL;
+ }
+
+ return __ftrace_make_call(rec, addr);
+#else
+ /* We should not get here without modules */
+ return -EINVAL;
+#endif /* CONFIG_MODULES */
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ unsigned long ip = (unsigned long)(&ftrace_call);
+ unsigned int old, new;
+ int ret;
+
+ old = *(unsigned int *)&ftrace_call;
+ new = ftrace_call_replace(ip, (unsigned long)func, 1);
+ ret = ftrace_modify_code(ip, old, new);
+
+ return ret;
+}
+
+int __init ftrace_dyn_arch_init(void *data)
+{
+ /* caller expects data to be zero */
+ unsigned long *p = data;
+
+ *p = 0;
+
+ return 0;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+extern void ftrace_graph_call(void);
+extern void ftrace_graph_stub(void);
+
+int ftrace_enable_ftrace_graph_caller(void)
+{
+ unsigned long ip = (unsigned long)(&ftrace_graph_call);
+ unsigned long addr = (unsigned long)(&ftrace_graph_caller);
+ unsigned long stub = (unsigned long)(&ftrace_graph_stub);
+ unsigned int old, new;
+
+ old = ftrace_call_replace(ip, stub, 0);
+ new = ftrace_call_replace(ip, addr, 0);
+
+ return ftrace_modify_code(ip, old, new);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+ unsigned long ip = (unsigned long)(&ftrace_graph_call);
+ unsigned long addr = (unsigned long)(&ftrace_graph_caller);
+ unsigned long stub = (unsigned long)(&ftrace_graph_stub);
+ unsigned int old, new;
+
+ old = ftrace_call_replace(ip, addr, 0);
+ new = ftrace_call_replace(ip, stub, 0);
+
+ return ftrace_modify_code(ip, old, new);
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_PPC64
+extern void mod_return_to_handler(void);
+#endif
+
+/*
+ * Hook the return address and push it in the stack of return addrs
+ * in current thread info.
+ */
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
+{
+ unsigned long old;
+ int faulted;
+ struct ftrace_graph_ent trace;
+ unsigned long return_hooker = (unsigned long)&return_to_handler;
+
+ if (unlikely(atomic_read(&current->tracing_graph_pause)))
+ return;
+
+#ifdef CONFIG_PPC64
+ /* non core kernel code needs to save and restore the TOC */
+ if (REGION_ID(self_addr) != KERNEL_REGION_ID)
+ return_hooker = (unsigned long)&mod_return_to_handler;
+#endif
+
+ return_hooker = ppc_function_entry((void *)return_hooker);
+
+ /*
+ * Protect against fault, even if it shouldn't
+ * happen. This tool is too much intrusive to
+ * ignore such a protection.
+ */
+ asm volatile(
+ "1: " PPC_LL "%[old], 0(%[parent])\n"
+ "2: " PPC_STL "%[return_hooker], 0(%[parent])\n"
+ " li %[faulted], 0\n"
+ "3:\n"
+
+ ".section .fixup, \"ax\"\n"
+ "4: li %[faulted], 1\n"
+ " b 3b\n"
+ ".previous\n"
+
+ ".section __ex_table,\"a\"\n"
+ PPC_LONG_ALIGN "\n"
+ PPC_LONG "1b,4b\n"
+ PPC_LONG "2b,4b\n"
+ ".previous"
+
+ : [old] "=&r" (old), [faulted] "=r" (faulted)
+ : [parent] "r" (parent), [return_hooker] "r" (return_hooker)
+ : "memory"
+ );
+
+ if (unlikely(faulted)) {
+ ftrace_graph_stop();
+ WARN_ON(1);
+ return;
+ }
+
+ if (ftrace_push_return_trace(old, self_addr, &trace.depth, 0) == -EBUSY) {
+ *parent = old;
+ return;
+ }
+
+ trace.func = self_addr;
+
+ /* Only trace if the calling function expects to */
+ if (!ftrace_graph_entry(&trace)) {
+ current->curr_ret_stack--;
+ *parent = old;
+ }
+}
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+#if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PPC64)
+unsigned long __init arch_syscall_addr(int nr)
+{
+ return sys_call_table[nr*2];
+}
+#endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
new file mode 100644
index 00000000..dc0488b6
--- /dev/null
+++ b/arch/powerpc/kernel/head_32.S
@@ -0,0 +1,1296 @@
+/*
+ * PowerPC version
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
+ * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
+ * Adapted for Power Macintosh by Paul Mackerras.
+ * Low-level exception handlers and MMU support
+ * rewritten by Paul Mackerras.
+ * Copyright (C) 1996 Paul Mackerras.
+ * MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
+ *
+ * This file contains the low-level support and setup for the
+ * PowerPC platform, including trap and interrupt dispatch.
+ * (The PPC 8xx embedded CPUs use head_8xx.S instead.)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/init.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/pgtable.h>
+#include <asm/cputable.h>
+#include <asm/cache.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
+#include <asm/bug.h>
+#include <asm/kvm_book3s_asm.h>
+
+/* 601 only have IBAT; cr0.eq is set on 601 when using this macro */
+#define LOAD_BAT(n, reg, RA, RB) \
+ /* see the comment for clear_bats() -- Cort */ \
+ li RA,0; \
+ mtspr SPRN_IBAT##n##U,RA; \
+ mtspr SPRN_DBAT##n##U,RA; \
+ lwz RA,(n*16)+0(reg); \
+ lwz RB,(n*16)+4(reg); \
+ mtspr SPRN_IBAT##n##U,RA; \
+ mtspr SPRN_IBAT##n##L,RB; \
+ beq 1f; \
+ lwz RA,(n*16)+8(reg); \
+ lwz RB,(n*16)+12(reg); \
+ mtspr SPRN_DBAT##n##U,RA; \
+ mtspr SPRN_DBAT##n##L,RB; \
+1:
+
+ __HEAD
+ .stabs "arch/powerpc/kernel/",N_SO,0,0,0f
+ .stabs "head_32.S",N_SO,0,0,0f
+0:
+_ENTRY(_stext);
+
+/*
+ * _start is defined this way because the XCOFF loader in the OpenFirmware
+ * on the powermac expects the entry point to be a procedure descriptor.
+ */
+_ENTRY(_start);
+ /*
+ * These are here for legacy reasons, the kernel used to
+ * need to look like a coff function entry for the pmac
+ * but we're always started by some kind of bootloader now.
+ * -- Cort
+ */
+ nop /* used by __secondary_hold on prep (mtx) and chrp smp */
+ nop /* used by __secondary_hold on prep (mtx) and chrp smp */
+ nop
+
+/* PMAC
+ * Enter here with the kernel text, data and bss loaded starting at
+ * 0, running with virtual == physical mapping.
+ * r5 points to the prom entry point (the client interface handler
+ * address). Address translation is turned on, with the prom
+ * managing the hash table. Interrupts are disabled. The stack
+ * pointer (r1) points to just below the end of the half-meg region
+ * from 0x380000 - 0x400000, which is mapped in already.
+ *
+ * If we are booted from MacOS via BootX, we enter with the kernel
+ * image loaded somewhere, and the following values in registers:
+ * r3: 'BooX' (0x426f6f58)
+ * r4: virtual address of boot_infos_t
+ * r5: 0
+ *
+ * PREP
+ * This is jumped to on prep systems right after the kernel is relocated
+ * to its proper place in memory by the boot loader. The expected layout
+ * of the regs is:
+ * r3: ptr to residual data
+ * r4: initrd_start or if no initrd then 0
+ * r5: initrd_end - unused if r4 is 0
+ * r6: Start of command line string
+ * r7: End of command line string
+ *
+ * This just gets a minimal mmu environment setup so we can call
+ * start_here() to do the real work.
+ * -- Cort
+ */
+
+ .globl __start
+__start:
+/*
+ * We have to do any OF calls before we map ourselves to KERNELBASE,
+ * because OF may have I/O devices mapped into that area
+ * (particularly on CHRP).
+ */
+ cmpwi 0,r5,0
+ beq 1f
+
+#ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
+ /* find out where we are now */
+ bcl 20,31,$+4
+0: mflr r8 /* r8 = runtime addr here */
+ addis r8,r8,(_stext - 0b)@ha
+ addi r8,r8,(_stext - 0b)@l /* current runtime base addr */
+ bl prom_init
+#endif /* CONFIG_PPC_OF_BOOT_TRAMPOLINE */
+
+ /* We never return. We also hit that trap if trying to boot
+ * from OF while CONFIG_PPC_OF_BOOT_TRAMPOLINE isn't selected */
+ trap
+
+/*
+ * Check for BootX signature when supporting PowerMac and branch to
+ * appropriate trampoline if it's present
+ */
+#ifdef CONFIG_PPC_PMAC
+1: lis r31,0x426f
+ ori r31,r31,0x6f58
+ cmpw 0,r3,r31
+ bne 1f
+ bl bootx_init
+ trap
+#endif /* CONFIG_PPC_PMAC */
+
+1: mr r31,r3 /* save device tree ptr */
+ li r24,0 /* cpu # */
+
+/*
+ * early_init() does the early machine identification and does
+ * the necessary low-level setup and clears the BSS
+ * -- Cort <cort@fsmlabs.com>
+ */
+ bl early_init
+
+/* Switch MMU off, clear BATs and flush TLB. At this point, r3 contains
+ * the physical address we are running at, returned by early_init()
+ */
+ bl mmu_off
+__after_mmu_off:
+ bl clear_bats
+ bl flush_tlbs
+
+ bl initial_bats
+#if defined(CONFIG_BOOTX_TEXT)
+ bl setup_disp_bat
+#endif
+#ifdef CONFIG_PPC_EARLY_DEBUG_CPM
+ bl setup_cpm_bat
+#endif
+#ifdef CONFIG_PPC_EARLY_DEBUG_USBGECKO
+ bl setup_usbgecko_bat
+#endif
+
+/*
+ * Call setup_cpu for CPU 0 and initialize 6xx Idle
+ */
+ bl reloc_offset
+ li r24,0 /* cpu# */
+ bl call_setup_cpu /* Call setup_cpu for this CPU */
+#ifdef CONFIG_6xx
+ bl reloc_offset
+ bl init_idle_6xx
+#endif /* CONFIG_6xx */
+
+
+/*
+ * We need to run with _start at physical address 0.
+ * On CHRP, we are loaded at 0x10000 since OF on CHRP uses
+ * the exception vectors at 0 (and therefore this copy
+ * overwrites OF's exception vectors with our own).
+ * The MMU is off at this point.
+ */
+ bl reloc_offset
+ mr r26,r3
+ addis r4,r3,KERNELBASE@h /* current address of _start */
+ lis r5,PHYSICAL_START@h
+ cmplw 0,r4,r5 /* already running at PHYSICAL_START? */
+ bne relocate_kernel
+/*
+ * we now have the 1st 16M of ram mapped with the bats.
+ * prep needs the mmu to be turned on here, but pmac already has it on.
+ * this shouldn't bother the pmac since it just gets turned on again
+ * as we jump to our code at KERNELBASE. -- Cort
+ * Actually no, pmac doesn't have it on any more. BootX enters with MMU
+ * off, and in other cases, we now turn it off before changing BATs above.
+ */
+turn_on_mmu:
+ mfmsr r0
+ ori r0,r0,MSR_DR|MSR_IR
+ mtspr SPRN_SRR1,r0
+ lis r0,start_here@h
+ ori r0,r0,start_here@l
+ mtspr SPRN_SRR0,r0
+ SYNC
+ RFI /* enables MMU */
+
+/*
+ * We need __secondary_hold as a place to hold the other cpus on
+ * an SMP machine, even when we are running a UP kernel.
+ */
+ . = 0xc0 /* for prep bootloader */
+ li r3,1 /* MTX only has 1 cpu */
+ .globl __secondary_hold
+__secondary_hold:
+ /* tell the master we're here */
+ stw r3,__secondary_hold_acknowledge@l(0)
+#ifdef CONFIG_SMP
+100: lwz r4,0(0)
+ /* wait until we're told to start */
+ cmpw 0,r4,r3
+ bne 100b
+ /* our cpu # was at addr 0 - go */
+ mr r24,r3 /* cpu # */
+ b __secondary_start
+#else
+ b .
+#endif /* CONFIG_SMP */
+
+ .globl __secondary_hold_spinloop
+__secondary_hold_spinloop:
+ .long 0
+ .globl __secondary_hold_acknowledge
+__secondary_hold_acknowledge:
+ .long -1
+
+/*
+ * Exception entry code. This code runs with address translation
+ * turned off, i.e. using physical addresses.
+ * We assume sprg3 has the physical address of the current
+ * task's thread_struct.
+ */
+#define EXCEPTION_PROLOG \
+ mtspr SPRN_SPRG_SCRATCH0,r10; \
+ mtspr SPRN_SPRG_SCRATCH1,r11; \
+ mfcr r10; \
+ EXCEPTION_PROLOG_1; \
+ EXCEPTION_PROLOG_2
+
+#define EXCEPTION_PROLOG_1 \
+ mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \
+ andi. r11,r11,MSR_PR; \
+ tophys(r11,r1); /* use tophys(r1) if kernel */ \
+ beq 1f; \
+ mfspr r11,SPRN_SPRG_THREAD; \
+ lwz r11,THREAD_INFO-THREAD(r11); \
+ addi r11,r11,THREAD_SIZE; \
+ tophys(r11,r11); \
+1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */
+
+
+#define EXCEPTION_PROLOG_2 \
+ CLR_TOP32(r11); \
+ stw r10,_CCR(r11); /* save registers */ \
+ stw r12,GPR12(r11); \
+ stw r9,GPR9(r11); \
+ mfspr r10,SPRN_SPRG_SCRATCH0; \
+ stw r10,GPR10(r11); \
+ mfspr r12,SPRN_SPRG_SCRATCH1; \
+ stw r12,GPR11(r11); \
+ mflr r10; \
+ stw r10,_LINK(r11); \
+ mfspr r12,SPRN_SRR0; \
+ mfspr r9,SPRN_SRR1; \
+ stw r1,GPR1(r11); \
+ stw r1,0(r11); \
+ tovirt(r1,r11); /* set new kernel sp */ \
+ li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \
+ MTMSRD(r10); /* (except for mach check in rtas) */ \
+ stw r0,GPR0(r11); \
+ lis r10,STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */ \
+ addi r10,r10,STACK_FRAME_REGS_MARKER@l; \
+ stw r10,8(r11); \
+ SAVE_4GPRS(3, r11); \
+ SAVE_2GPRS(7, r11)
+
+/*
+ * Note: code which follows this uses cr0.eq (set if from kernel),
+ * r11, r12 (SRR0), and r9 (SRR1).
+ *
+ * Note2: once we have set r1 we are in a position to take exceptions
+ * again, and we could thus set MSR:RI at that point.
+ */
+
+/*
+ * Exception vectors.
+ */
+#define EXCEPTION(n, label, hdlr, xfer) \
+ . = n; \
+ DO_KVM n; \
+label: \
+ EXCEPTION_PROLOG; \
+ addi r3,r1,STACK_FRAME_OVERHEAD; \
+ xfer(n, hdlr)
+
+#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret) \
+ li r10,trap; \
+ stw r10,_TRAP(r11); \
+ li r10,MSR_KERNEL; \
+ copyee(r10, r9); \
+ bl tfer; \
+i##n: \
+ .long hdlr; \
+ .long ret
+
+#define COPY_EE(d, s) rlwimi d,s,0,16,16
+#define NOCOPY(d, s)
+
+#define EXC_XFER_STD(n, hdlr) \
+ EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \
+ ret_from_except_full)
+
+#define EXC_XFER_LITE(n, hdlr) \
+ EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \
+ ret_from_except)
+
+#define EXC_XFER_EE(n, hdlr) \
+ EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE, transfer_to_handler_full, \
+ ret_from_except_full)
+
+#define EXC_XFER_EE_LITE(n, hdlr) \
+ EXC_XFER_TEMPLATE(n, hdlr, n+1, COPY_EE, transfer_to_handler, \
+ ret_from_except)
+
+/* System reset */
+/* core99 pmac starts the seconary here by changing the vector, and
+ putting it back to what it was (unknown_exception) when done. */
+ EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD)
+
+/* Machine check */
+/*
+ * On CHRP, this is complicated by the fact that we could get a
+ * machine check inside RTAS, and we have no guarantee that certain
+ * critical registers will have the values we expect. The set of
+ * registers that might have bad values includes all the GPRs
+ * and all the BATs. We indicate that we are in RTAS by putting
+ * a non-zero value, the address of the exception frame to use,
+ * in SPRG2. The machine check handler checks SPRG2 and uses its
+ * value if it is non-zero. If we ever needed to free up SPRG2,
+ * we could use a field in the thread_info or thread_struct instead.
+ * (Other exception handlers assume that r1 is a valid kernel stack
+ * pointer when we take an exception from supervisor mode.)
+ * -- paulus.
+ */
+ . = 0x200
+ DO_KVM 0x200
+ mtspr SPRN_SPRG_SCRATCH0,r10
+ mtspr SPRN_SPRG_SCRATCH1,r11
+ mfcr r10
+#ifdef CONFIG_PPC_CHRP
+ mfspr r11,SPRN_SPRG_RTAS
+ cmpwi 0,r11,0
+ bne 7f
+#endif /* CONFIG_PPC_CHRP */
+ EXCEPTION_PROLOG_1
+7: EXCEPTION_PROLOG_2
+ addi r3,r1,STACK_FRAME_OVERHEAD
+#ifdef CONFIG_PPC_CHRP
+ mfspr r4,SPRN_SPRG_RTAS
+ cmpwi cr1,r4,0
+ bne cr1,1f
+#endif
+ EXC_XFER_STD(0x200, machine_check_exception)
+#ifdef CONFIG_PPC_CHRP
+1: b machine_check_in_rtas
+#endif
+
+/* Data access exception. */
+ . = 0x300
+ DO_KVM 0x300
+DataAccess:
+ EXCEPTION_PROLOG
+ mfspr r10,SPRN_DSISR
+ stw r10,_DSISR(r11)
+ andis. r0,r10,0xa470 /* weird error? */
+ bne 1f /* if not, try to put a PTE */
+ mfspr r4,SPRN_DAR /* into the hash table */
+ rlwinm r3,r10,32-15,21,21 /* DSISR_STORE -> _PAGE_RW */
+ bl hash_page
+1: lwz r5,_DSISR(r11) /* get DSISR value */
+ mfspr r4,SPRN_DAR
+ EXC_XFER_LITE(0x300, handle_page_fault)
+
+
+/* Instruction access exception. */
+ . = 0x400
+ DO_KVM 0x400
+InstructionAccess:
+ EXCEPTION_PROLOG
+ andis. r0,r9,0x4000 /* no pte found? */
+ beq 1f /* if so, try to put a PTE */
+ li r3,0 /* into the hash table */
+ mr r4,r12 /* SRR0 is fault address */
+ bl hash_page
+1: mr r4,r12
+ mr r5,r9
+ EXC_XFER_LITE(0x400, handle_page_fault)
+
+/* External interrupt */
+ EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
+
+/* Alignment exception */
+ . = 0x600
+ DO_KVM 0x600
+Alignment:
+ EXCEPTION_PROLOG
+ mfspr r4,SPRN_DAR
+ stw r4,_DAR(r11)
+ mfspr r5,SPRN_DSISR
+ stw r5,_DSISR(r11)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ EXC_XFER_EE(0x600, alignment_exception)
+
+/* Program check exception */
+ EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
+
+/* Floating-point unavailable */
+ . = 0x800
+ DO_KVM 0x800
+FPUnavailable:
+BEGIN_FTR_SECTION
+/*
+ * Certain Freescale cores don't have a FPU and treat fp instructions
+ * as a FP Unavailable exception. Redirect to illegal/emulation handling.
+ */
+ b ProgramCheck
+END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE)
+ EXCEPTION_PROLOG
+ beq 1f
+ bl load_up_fpu /* if from user, just load it up */
+ b fast_exception_return
+1: addi r3,r1,STACK_FRAME_OVERHEAD
+ EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception)
+
+/* Decrementer */
+ EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
+
+ EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE)
+
+/* System call */
+ . = 0xc00
+ DO_KVM 0xc00
+SystemCall:
+ EXCEPTION_PROLOG
+ EXC_XFER_EE_LITE(0xc00, DoSyscall)
+
+/* Single step - not used on 601 */
+ EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
+ EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE)
+
+/*
+ * The Altivec unavailable trap is at 0x0f20. Foo.
+ * We effectively remap it to 0x3000.
+ * We include an altivec unavailable exception vector even if
+ * not configured for Altivec, so that you can't panic a
+ * non-altivec kernel running on a machine with altivec just
+ * by executing an altivec instruction.
+ */
+ . = 0xf00
+ DO_KVM 0xf00
+ b PerformanceMonitor
+
+ . = 0xf20
+ DO_KVM 0xf20
+ b AltiVecUnavailable
+
+/*
+ * Handle TLB miss for instruction on 603/603e.
+ * Note: we get an alternate set of r0 - r3 to use automatically.
+ */
+ . = 0x1000
+InstructionTLBMiss:
+/*
+ * r0: scratch
+ * r1: linux style pte ( later becomes ppc hardware pte )
+ * r2: ptr to linux-style pte
+ * r3: scratch
+ */
+ /* Get PTE (linux-style) and check access */
+ mfspr r3,SPRN_IMISS
+ lis r1,PAGE_OFFSET@h /* check if kernel address */
+ cmplw 0,r1,r3
+ mfspr r2,SPRN_SPRG_THREAD
+ li r1,_PAGE_USER|_PAGE_PRESENT /* low addresses tested as user */
+ lwz r2,PGDIR(r2)
+ bge- 112f
+ mfspr r2,SPRN_SRR1 /* and MSR_PR bit from SRR1 */
+ rlwimi r1,r2,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */
+ lis r2,swapper_pg_dir@ha /* if kernel address, use */
+ addi r2,r2,swapper_pg_dir@l /* kernel page table */
+112: tophys(r2,r2)
+ rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
+ lwz r2,0(r2) /* get pmd entry */
+ rlwinm. r2,r2,0,0,19 /* extract address of pte page */
+ beq- InstructionAddressInvalid /* return if no mapping */
+ rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */
+ lwz r0,0(r2) /* get linux-style pte */
+ andc. r1,r1,r0 /* check access & ~permission */
+ bne- InstructionAddressInvalid /* return if access not permitted */
+ ori r0,r0,_PAGE_ACCESSED /* set _PAGE_ACCESSED in pte */
+ /*
+ * NOTE! We are assuming this is not an SMP system, otherwise
+ * we would need to update the pte atomically with lwarx/stwcx.
+ */
+ stw r0,0(r2) /* update PTE (accessed bit) */
+ /* Convert linux-style PTE to low word of PPC-style PTE */
+ rlwinm r1,r0,32-10,31,31 /* _PAGE_RW -> PP lsb */
+ rlwinm r2,r0,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */
+ and r1,r1,r2 /* writable if _RW and _DIRTY */
+ rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
+ rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */
+ ori r1,r1,0xe04 /* clear out reserved bits */
+ andc r1,r0,r1 /* PP = user? (rw&dirty? 2: 3): 0 */
+BEGIN_FTR_SECTION
+ rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
+END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
+ mtspr SPRN_RPA,r1
+ tlbli r3
+ mfspr r3,SPRN_SRR1 /* Need to restore CR0 */
+ mtcrf 0x80,r3
+ rfi
+InstructionAddressInvalid:
+ mfspr r3,SPRN_SRR1
+ rlwinm r1,r3,9,6,6 /* Get load/store bit */
+
+ addis r1,r1,0x2000
+ mtspr SPRN_DSISR,r1 /* (shouldn't be needed) */
+ andi. r2,r3,0xFFFF /* Clear upper bits of SRR1 */
+ or r2,r2,r1
+ mtspr SPRN_SRR1,r2
+ mfspr r1,SPRN_IMISS /* Get failing address */
+ rlwinm. r2,r2,0,31,31 /* Check for little endian access */
+ rlwimi r2,r2,1,30,30 /* change 1 -> 3 */
+ xor r1,r1,r2
+ mtspr SPRN_DAR,r1 /* Set fault address */
+ mfmsr r0 /* Restore "normal" registers */
+ xoris r0,r0,MSR_TGPR>>16
+ mtcrf 0x80,r3 /* Restore CR0 */
+ mtmsr r0
+ b InstructionAccess
+
+/*
+ * Handle TLB miss for DATA Load operation on 603/603e
+ */
+ . = 0x1100
+DataLoadTLBMiss:
+/*
+ * r0: scratch
+ * r1: linux style pte ( later becomes ppc hardware pte )
+ * r2: ptr to linux-style pte
+ * r3: scratch
+ */
+ /* Get PTE (linux-style) and check access */
+ mfspr r3,SPRN_DMISS
+ lis r1,PAGE_OFFSET@h /* check if kernel address */
+ cmplw 0,r1,r3
+ mfspr r2,SPRN_SPRG_THREAD
+ li r1,_PAGE_USER|_PAGE_PRESENT /* low addresses tested as user */
+ lwz r2,PGDIR(r2)
+ bge- 112f
+ mfspr r2,SPRN_SRR1 /* and MSR_PR bit from SRR1 */
+ rlwimi r1,r2,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */
+ lis r2,swapper_pg_dir@ha /* if kernel address, use */
+ addi r2,r2,swapper_pg_dir@l /* kernel page table */
+112: tophys(r2,r2)
+ rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
+ lwz r2,0(r2) /* get pmd entry */
+ rlwinm. r2,r2,0,0,19 /* extract address of pte page */
+ beq- DataAddressInvalid /* return if no mapping */
+ rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */
+ lwz r0,0(r2) /* get linux-style pte */
+ andc. r1,r1,r0 /* check access & ~permission */
+ bne- DataAddressInvalid /* return if access not permitted */
+ ori r0,r0,_PAGE_ACCESSED /* set _PAGE_ACCESSED in pte */
+ /*
+ * NOTE! We are assuming this is not an SMP system, otherwise
+ * we would need to update the pte atomically with lwarx/stwcx.
+ */
+ stw r0,0(r2) /* update PTE (accessed bit) */
+ /* Convert linux-style PTE to low word of PPC-style PTE */
+ rlwinm r1,r0,32-10,31,31 /* _PAGE_RW -> PP lsb */
+ rlwinm r2,r0,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */
+ and r1,r1,r2 /* writable if _RW and _DIRTY */
+ rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
+ rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */
+ ori r1,r1,0xe04 /* clear out reserved bits */
+ andc r1,r0,r1 /* PP = user? (rw&dirty? 2: 3): 0 */
+BEGIN_FTR_SECTION
+ rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
+END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
+ mtspr SPRN_RPA,r1
+ mfspr r2,SPRN_SRR1 /* Need to restore CR0 */
+ mtcrf 0x80,r2
+BEGIN_MMU_FTR_SECTION
+ li r0,1
+ mfspr r1,SPRN_SPRG_603_LRU
+ rlwinm r2,r3,20,27,31 /* Get Address bits 15:19 */
+ slw r0,r0,r2
+ xor r1,r0,r1
+ srw r0,r1,r2
+ mtspr SPRN_SPRG_603_LRU,r1
+ mfspr r2,SPRN_SRR1
+ rlwimi r2,r0,31-14,14,14
+ mtspr SPRN_SRR1,r2
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
+ tlbld r3
+ rfi
+DataAddressInvalid:
+ mfspr r3,SPRN_SRR1
+ rlwinm r1,r3,9,6,6 /* Get load/store bit */
+ addis r1,r1,0x2000
+ mtspr SPRN_DSISR,r1
+ andi. r2,r3,0xFFFF /* Clear upper bits of SRR1 */
+ mtspr SPRN_SRR1,r2
+ mfspr r1,SPRN_DMISS /* Get failing address */
+ rlwinm. r2,r2,0,31,31 /* Check for little endian access */
+ beq 20f /* Jump if big endian */
+ xori r1,r1,3
+20: mtspr SPRN_DAR,r1 /* Set fault address */
+ mfmsr r0 /* Restore "normal" registers */
+ xoris r0,r0,MSR_TGPR>>16
+ mtcrf 0x80,r3 /* Restore CR0 */
+ mtmsr r0
+ b DataAccess
+
+/*
+ * Handle TLB miss for DATA Store on 603/603e
+ */
+ . = 0x1200
+DataStoreTLBMiss:
+/*
+ * r0: scratch
+ * r1: linux style pte ( later becomes ppc hardware pte )
+ * r2: ptr to linux-style pte
+ * r3: scratch
+ */
+ /* Get PTE (linux-style) and check access */
+ mfspr r3,SPRN_DMISS
+ lis r1,PAGE_OFFSET@h /* check if kernel address */
+ cmplw 0,r1,r3
+ mfspr r2,SPRN_SPRG_THREAD
+ li r1,_PAGE_RW|_PAGE_USER|_PAGE_PRESENT /* access flags */
+ lwz r2,PGDIR(r2)
+ bge- 112f
+ mfspr r2,SPRN_SRR1 /* and MSR_PR bit from SRR1 */
+ rlwimi r1,r2,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */
+ lis r2,swapper_pg_dir@ha /* if kernel address, use */
+ addi r2,r2,swapper_pg_dir@l /* kernel page table */
+112: tophys(r2,r2)
+ rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
+ lwz r2,0(r2) /* get pmd entry */
+ rlwinm. r2,r2,0,0,19 /* extract address of pte page */
+ beq- DataAddressInvalid /* return if no mapping */
+ rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */
+ lwz r0,0(r2) /* get linux-style pte */
+ andc. r1,r1,r0 /* check access & ~permission */
+ bne- DataAddressInvalid /* return if access not permitted */
+ ori r0,r0,_PAGE_ACCESSED|_PAGE_DIRTY
+ /*
+ * NOTE! We are assuming this is not an SMP system, otherwise
+ * we would need to update the pte atomically with lwarx/stwcx.
+ */
+ stw r0,0(r2) /* update PTE (accessed/dirty bits) */
+ /* Convert linux-style PTE to low word of PPC-style PTE */
+ rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
+ li r1,0xe05 /* clear out reserved bits & PP lsb */
+ andc r1,r0,r1 /* PP = user? 2: 0 */
+BEGIN_FTR_SECTION
+ rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
+END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
+ mtspr SPRN_RPA,r1
+ mfspr r2,SPRN_SRR1 /* Need to restore CR0 */
+ mtcrf 0x80,r2
+BEGIN_MMU_FTR_SECTION
+ li r0,1
+ mfspr r1,SPRN_SPRG_603_LRU
+ rlwinm r2,r3,20,27,31 /* Get Address bits 15:19 */
+ slw r0,r0,r2
+ xor r1,r0,r1
+ srw r0,r1,r2
+ mtspr SPRN_SPRG_603_LRU,r1
+ mfspr r2,SPRN_SRR1
+ rlwimi r2,r0,31-14,14,14
+ mtspr SPRN_SRR1,r2
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
+ tlbld r3
+ rfi
+
+#ifndef CONFIG_ALTIVEC
+#define altivec_assist_exception unknown_exception
+#endif
+
+ EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_EE)
+ EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_EE)
+ EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_EE)
+ EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD)
+ EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_EE)
+ EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2f00, MOLTrampoline, unknown_exception, EXC_XFER_EE_LITE)
+
+ .globl mol_trampoline
+ .set mol_trampoline, i0x2f00
+
+ . = 0x3000
+
+AltiVecUnavailable:
+ EXCEPTION_PROLOG
+#ifdef CONFIG_ALTIVEC
+ beq 1f
+ bl load_up_altivec /* if from user, just load it up */
+ b fast_exception_return
+#endif /* CONFIG_ALTIVEC */
+1: addi r3,r1,STACK_FRAME_OVERHEAD
+ EXC_XFER_EE_LITE(0xf20, altivec_unavailable_exception)
+
+PerformanceMonitor:
+ EXCEPTION_PROLOG
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ EXC_XFER_STD(0xf00, performance_monitor_exception)
+
+
+/*
+ * This code is jumped to from the startup code to copy
+ * the kernel image to physical address PHYSICAL_START.
+ */
+relocate_kernel:
+ addis r9,r26,klimit@ha /* fetch klimit */
+ lwz r25,klimit@l(r9)
+ addis r25,r25,-KERNELBASE@h
+ lis r3,PHYSICAL_START@h /* Destination base address */
+ li r6,0 /* Destination offset */
+ li r5,0x4000 /* # bytes of memory to copy */
+ bl copy_and_flush /* copy the first 0x4000 bytes */
+ addi r0,r3,4f@l /* jump to the address of 4f */
+ mtctr r0 /* in copy and do the rest. */
+ bctr /* jump to the copy */
+4: mr r5,r25
+ bl copy_and_flush /* copy the rest */
+ b turn_on_mmu
+
+/*
+ * Copy routine used to copy the kernel to start at physical address 0
+ * and flush and invalidate the caches as needed.
+ * r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset
+ * on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5.
+ */
+_ENTRY(copy_and_flush)
+ addi r5,r5,-4
+ addi r6,r6,-4
+4: li r0,L1_CACHE_BYTES/4
+ mtctr r0
+3: addi r6,r6,4 /* copy a cache line */
+ lwzx r0,r6,r4
+ stwx r0,r6,r3
+ bdnz 3b
+ dcbst r6,r3 /* write it to memory */
+ sync
+ icbi r6,r3 /* flush the icache line */
+ cmplw 0,r6,r5
+ blt 4b
+ sync /* additional sync needed on g4 */
+ isync
+ addi r5,r5,4
+ addi r6,r6,4
+ blr
+
+#ifdef CONFIG_SMP
+ .globl __secondary_start_mpc86xx
+__secondary_start_mpc86xx:
+ mfspr r3, SPRN_PIR
+ stw r3, __secondary_hold_acknowledge@l(0)
+ mr r24, r3 /* cpu # */
+ b __secondary_start
+
+ .globl __secondary_start_pmac_0
+__secondary_start_pmac_0:
+ /* NB the entries for cpus 0, 1, 2 must each occupy 8 bytes. */
+ li r24,0
+ b 1f
+ li r24,1
+ b 1f
+ li r24,2
+ b 1f
+ li r24,3
+1:
+ /* on powersurge, we come in here with IR=0 and DR=1, and DBAT 0
+ set to map the 0xf0000000 - 0xffffffff region */
+ mfmsr r0
+ rlwinm r0,r0,0,28,26 /* clear DR (0x10) */
+ SYNC
+ mtmsr r0
+ isync
+
+ .globl __secondary_start
+__secondary_start:
+ /* Copy some CPU settings from CPU 0 */
+ bl __restore_cpu_setup
+
+ lis r3,-KERNELBASE@h
+ mr r4,r24
+ bl call_setup_cpu /* Call setup_cpu for this CPU */
+#ifdef CONFIG_6xx
+ lis r3,-KERNELBASE@h
+ bl init_idle_6xx
+#endif /* CONFIG_6xx */
+
+ /* get current_thread_info and current */
+ lis r1,secondary_ti@ha
+ tophys(r1,r1)
+ lwz r1,secondary_ti@l(r1)
+ tophys(r2,r1)
+ lwz r2,TI_TASK(r2)
+
+ /* stack */
+ addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+ li r0,0
+ tophys(r3,r1)
+ stw r0,0(r3)
+
+ /* load up the MMU */
+ bl load_up_mmu
+
+ /* ptr to phys current thread */
+ tophys(r4,r2)
+ addi r4,r4,THREAD /* phys address of our thread_struct */
+ CLR_TOP32(r4)
+ mtspr SPRN_SPRG_THREAD,r4
+ li r3,0
+ mtspr SPRN_SPRG_RTAS,r3 /* 0 => not in RTAS */
+
+ /* enable MMU and jump to start_secondary */
+ li r4,MSR_KERNEL
+ FIX_SRR1(r4,r5)
+ lis r3,start_secondary@h
+ ori r3,r3,start_secondary@l
+ mtspr SPRN_SRR0,r3
+ mtspr SPRN_SRR1,r4
+ SYNC
+ RFI
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_KVM_BOOK3S_HANDLER
+#include "../kvm/book3s_rmhandlers.S"
+#endif
+
+/*
+ * Those generic dummy functions are kept for CPUs not
+ * included in CONFIG_6xx
+ */
+#if !defined(CONFIG_6xx)
+_ENTRY(__save_cpu_setup)
+ blr
+_ENTRY(__restore_cpu_setup)
+ blr
+#endif /* !defined(CONFIG_6xx) */
+
+
+/*
+ * Load stuff into the MMU. Intended to be called with
+ * IR=0 and DR=0.
+ */
+load_up_mmu:
+ sync /* Force all PTE updates to finish */
+ isync
+ tlbia /* Clear all TLB entries */
+ sync /* wait for tlbia/tlbie to finish */
+ TLBSYNC /* ... on all CPUs */
+ /* Load the SDR1 register (hash table base & size) */
+ lis r6,_SDR1@ha
+ tophys(r6,r6)
+ lwz r6,_SDR1@l(r6)
+ mtspr SPRN_SDR1,r6
+ li r0,16 /* load up segment register values */
+ mtctr r0 /* for context 0 */
+ lis r3,0x2000 /* Ku = 1, VSID = 0 */
+ li r4,0
+3: mtsrin r3,r4
+ addi r3,r3,0x111 /* increment VSID */
+ addis r4,r4,0x1000 /* address of next segment */
+ bdnz 3b
+
+/* Load the BAT registers with the values set up by MMU_init.
+ MMU_init takes care of whether we're on a 601 or not. */
+ mfpvr r3
+ srwi r3,r3,16
+ cmpwi r3,1
+ lis r3,BATS@ha
+ addi r3,r3,BATS@l
+ tophys(r3,r3)
+ LOAD_BAT(0,r3,r4,r5)
+ LOAD_BAT(1,r3,r4,r5)
+ LOAD_BAT(2,r3,r4,r5)
+ LOAD_BAT(3,r3,r4,r5)
+BEGIN_MMU_FTR_SECTION
+ LOAD_BAT(4,r3,r4,r5)
+ LOAD_BAT(5,r3,r4,r5)
+ LOAD_BAT(6,r3,r4,r5)
+ LOAD_BAT(7,r3,r4,r5)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+ blr
+
+/*
+ * This is where the main kernel code starts.
+ */
+start_here:
+ /* ptr to current */
+ lis r2,init_task@h
+ ori r2,r2,init_task@l
+ /* Set up for using our exception vectors */
+ /* ptr to phys current thread */
+ tophys(r4,r2)
+ addi r4,r4,THREAD /* init task's THREAD */
+ CLR_TOP32(r4)
+ mtspr SPRN_SPRG_THREAD,r4
+ li r3,0
+ mtspr SPRN_SPRG_RTAS,r3 /* 0 => not in RTAS */
+
+ /* stack */
+ lis r1,init_thread_union@ha
+ addi r1,r1,init_thread_union@l
+ li r0,0
+ stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+/*
+ * Do early platform-specific initialization,
+ * and set up the MMU.
+ */
+ li r3,0
+ mr r4,r31
+ bl machine_init
+ bl __save_cpu_setup
+ bl MMU_init
+
+/*
+ * Go back to running unmapped so we can load up new values
+ * for SDR1 (hash table pointer) and the segment registers
+ * and change to using our exception vectors.
+ */
+ lis r4,2f@h
+ ori r4,r4,2f@l
+ tophys(r4,r4)
+ li r3,MSR_KERNEL & ~(MSR_IR|MSR_DR)
+ FIX_SRR1(r3,r5)
+ mtspr SPRN_SRR0,r4
+ mtspr SPRN_SRR1,r3
+ SYNC
+ RFI
+/* Load up the kernel context */
+2: bl load_up_mmu
+
+#ifdef CONFIG_BDI_SWITCH
+ /* Add helper information for the Abatron bdiGDB debugger.
+ * We do this here because we know the mmu is disabled, and
+ * will be enabled for real in just a few instructions.
+ */
+ lis r5, abatron_pteptrs@h
+ ori r5, r5, abatron_pteptrs@l
+ stw r5, 0xf0(r0) /* This much match your Abatron config */
+ lis r6, swapper_pg_dir@h
+ ori r6, r6, swapper_pg_dir@l
+ tophys(r5, r5)
+ stw r6, 0(r5)
+#endif /* CONFIG_BDI_SWITCH */
+
+/* Now turn on the MMU for real! */
+ li r4,MSR_KERNEL
+ FIX_SRR1(r4,r5)
+ lis r3,start_kernel@h
+ ori r3,r3,start_kernel@l
+ mtspr SPRN_SRR0,r3
+ mtspr SPRN_SRR1,r4
+ SYNC
+ RFI
+
+/*
+ * void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next);
+ *
+ * Set up the segment registers for a new context.
+ */
+_ENTRY(switch_mmu_context)
+ lwz r3,MMCONTEXTID(r4)
+ cmpwi cr0,r3,0
+ blt- 4f
+ mulli r3,r3,897 /* multiply context by skew factor */
+ rlwinm r3,r3,4,8,27 /* VSID = (context & 0xfffff) << 4 */
+ addis r3,r3,0x6000 /* Set Ks, Ku bits */
+ li r0,NUM_USER_SEGMENTS
+ mtctr r0
+
+#ifdef CONFIG_BDI_SWITCH
+ /* Context switch the PTE pointer for the Abatron BDI2000.
+ * The PGDIR is passed as second argument.
+ */
+ lwz r4,MM_PGD(r4)
+ lis r5, KERNELBASE@h
+ lwz r5, 0xf0(r5)
+ stw r4, 0x4(r5)
+#endif
+ li r4,0
+ isync
+3:
+ mtsrin r3,r4
+ addi r3,r3,0x111 /* next VSID */
+ rlwinm r3,r3,0,8,3 /* clear out any overflow from VSID field */
+ addis r4,r4,0x1000 /* address of next segment */
+ bdnz 3b
+ sync
+ isync
+ blr
+4: trap
+ EMIT_BUG_ENTRY 4b,__FILE__,__LINE__,0
+ blr
+
+/*
+ * An undocumented "feature" of 604e requires that the v bit
+ * be cleared before changing BAT values.
+ *
+ * Also, newer IBM firmware does not clear bat3 and 4 so
+ * this makes sure it's done.
+ * -- Cort
+ */
+clear_bats:
+ li r10,0
+ mfspr r9,SPRN_PVR
+ rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */
+ cmpwi r9, 1
+ beq 1f
+
+ mtspr SPRN_DBAT0U,r10
+ mtspr SPRN_DBAT0L,r10
+ mtspr SPRN_DBAT1U,r10
+ mtspr SPRN_DBAT1L,r10
+ mtspr SPRN_DBAT2U,r10
+ mtspr SPRN_DBAT2L,r10
+ mtspr SPRN_DBAT3U,r10
+ mtspr SPRN_DBAT3L,r10
+1:
+ mtspr SPRN_IBAT0U,r10
+ mtspr SPRN_IBAT0L,r10
+ mtspr SPRN_IBAT1U,r10
+ mtspr SPRN_IBAT1L,r10
+ mtspr SPRN_IBAT2U,r10
+ mtspr SPRN_IBAT2L,r10
+ mtspr SPRN_IBAT3U,r10
+ mtspr SPRN_IBAT3L,r10
+BEGIN_MMU_FTR_SECTION
+ /* Here's a tweak: at this point, CPU setup have
+ * not been called yet, so HIGH_BAT_EN may not be
+ * set in HID0 for the 745x processors. However, it
+ * seems that doesn't affect our ability to actually
+ * write to these SPRs.
+ */
+ mtspr SPRN_DBAT4U,r10
+ mtspr SPRN_DBAT4L,r10
+ mtspr SPRN_DBAT5U,r10
+ mtspr SPRN_DBAT5L,r10
+ mtspr SPRN_DBAT6U,r10
+ mtspr SPRN_DBAT6L,r10
+ mtspr SPRN_DBAT7U,r10
+ mtspr SPRN_DBAT7L,r10
+ mtspr SPRN_IBAT4U,r10
+ mtspr SPRN_IBAT4L,r10
+ mtspr SPRN_IBAT5U,r10
+ mtspr SPRN_IBAT5L,r10
+ mtspr SPRN_IBAT6U,r10
+ mtspr SPRN_IBAT6L,r10
+ mtspr SPRN_IBAT7U,r10
+ mtspr SPRN_IBAT7L,r10
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+ blr
+
+flush_tlbs:
+ lis r10, 0x40
+1: addic. r10, r10, -0x1000
+ tlbie r10
+ bgt 1b
+ sync
+ blr
+
+mmu_off:
+ addi r4, r3, __after_mmu_off - _start
+ mfmsr r3
+ andi. r0,r3,MSR_DR|MSR_IR /* MMU enabled? */
+ beqlr
+ andc r3,r3,r0
+ mtspr SPRN_SRR0,r4
+ mtspr SPRN_SRR1,r3
+ sync
+ RFI
+
+/*
+ * On 601, we use 3 BATs to map up to 24M of RAM at _PAGE_OFFSET
+ * (we keep one for debugging) and on others, we use one 256M BAT.
+ */
+initial_bats:
+ lis r11,PAGE_OFFSET@h
+ mfspr r9,SPRN_PVR
+ rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */
+ cmpwi 0,r9,1
+ bne 4f
+ ori r11,r11,4 /* set up BAT registers for 601 */
+ li r8,0x7f /* valid, block length = 8MB */
+ mtspr SPRN_IBAT0U,r11 /* N.B. 601 has valid bit in */
+ mtspr SPRN_IBAT0L,r8 /* lower BAT register */
+ addis r11,r11,0x800000@h
+ addis r8,r8,0x800000@h
+ mtspr SPRN_IBAT1U,r11
+ mtspr SPRN_IBAT1L,r8
+ addis r11,r11,0x800000@h
+ addis r8,r8,0x800000@h
+ mtspr SPRN_IBAT2U,r11
+ mtspr SPRN_IBAT2L,r8
+ isync
+ blr
+
+4: tophys(r8,r11)
+#ifdef CONFIG_SMP
+ ori r8,r8,0x12 /* R/W access, M=1 */
+#else
+ ori r8,r8,2 /* R/W access */
+#endif /* CONFIG_SMP */
+ ori r11,r11,BL_256M<<2|0x2 /* set up BAT registers for 604 */
+
+ mtspr SPRN_DBAT0L,r8 /* N.B. 6xx (not 601) have valid */
+ mtspr SPRN_DBAT0U,r11 /* bit in upper BAT register */
+ mtspr SPRN_IBAT0L,r8
+ mtspr SPRN_IBAT0U,r11
+ isync
+ blr
+
+
+#ifdef CONFIG_BOOTX_TEXT
+setup_disp_bat:
+ /*
+ * setup the display bat prepared for us in prom.c
+ */
+ mflr r8
+ bl reloc_offset
+ mtlr r8
+ addis r8,r3,disp_BAT@ha
+ addi r8,r8,disp_BAT@l
+ cmpwi cr0,r8,0
+ beqlr
+ lwz r11,0(r8)
+ lwz r8,4(r8)
+ mfspr r9,SPRN_PVR
+ rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */
+ cmpwi 0,r9,1
+ beq 1f
+ mtspr SPRN_DBAT3L,r8
+ mtspr SPRN_DBAT3U,r11
+ blr
+1: mtspr SPRN_IBAT3L,r8
+ mtspr SPRN_IBAT3U,r11
+ blr
+#endif /* CONFIG_BOOTX_TEXT */
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_CPM
+setup_cpm_bat:
+ lis r8, 0xf000
+ ori r8, r8, 0x002a
+ mtspr SPRN_DBAT1L, r8
+
+ lis r11, 0xf000
+ ori r11, r11, (BL_1M << 2) | 2
+ mtspr SPRN_DBAT1U, r11
+
+ blr
+#endif
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_USBGECKO
+setup_usbgecko_bat:
+ /* prepare a BAT for early io */
+#if defined(CONFIG_GAMECUBE)
+ lis r8, 0x0c00
+#elif defined(CONFIG_WII)
+ lis r8, 0x0d00
+#else
+#error Invalid platform for USB Gecko based early debugging.
+#endif
+ /*
+ * The virtual address used must match the virtual address
+ * associated to the fixmap entry FIX_EARLY_DEBUG_BASE.
+ */
+ lis r11, 0xfffe /* top 128K */
+ ori r8, r8, 0x002a /* uncached, guarded ,rw */
+ ori r11, r11, 0x2 /* 128K, Vs=1, Vp=0 */
+ mtspr SPRN_DBAT1L, r8
+ mtspr SPRN_DBAT1U, r11
+ blr
+#endif
+
+#ifdef CONFIG_8260
+/* Jump into the system reset for the rom.
+ * We first disable the MMU, and then jump to the ROM reset address.
+ *
+ * r3 is the board info structure, r4 is the location for starting.
+ * I use this for building a small kernel that can load other kernels,
+ * rather than trying to write or rely on a rom monitor that can tftp load.
+ */
+ .globl m8260_gorom
+m8260_gorom:
+ mfmsr r0
+ rlwinm r0,r0,0,17,15 /* clear MSR_EE in r0 */
+ sync
+ mtmsr r0
+ sync
+ mfspr r11, SPRN_HID0
+ lis r10, 0
+ ori r10,r10,HID0_ICE|HID0_DCE
+ andc r11, r11, r10
+ mtspr SPRN_HID0, r11
+ isync
+ li r5, MSR_ME|MSR_RI
+ lis r6,2f@h
+ addis r6,r6,-KERNELBASE@h
+ ori r6,r6,2f@l
+ mtspr SPRN_SRR0,r6
+ mtspr SPRN_SRR1,r5
+ isync
+ sync
+ rfi
+2:
+ mtlr r4
+ blr
+#endif
+
+
+/*
+ * We put a few things here that have to be page-aligned.
+ * This stuff goes at the beginning of the data segment,
+ * which is page-aligned.
+ */
+ .data
+ .globl sdata
+sdata:
+ .globl empty_zero_page
+empty_zero_page:
+ .space 4096
+
+ .globl swapper_pg_dir
+swapper_pg_dir:
+ .space PGD_TABLE_SIZE
+
+ .globl intercept_table
+intercept_table:
+ .long 0, 0, i0x200, i0x300, i0x400, 0, i0x600, i0x700
+ .long i0x800, 0, 0, 0, 0, i0xd00, 0, 0
+ .long 0, 0, 0, i0x1300, 0, 0, 0, 0
+ .long 0, 0, 0, 0, 0, 0, 0, 0
+ .long 0, 0, 0, 0, 0, 0, 0, 0
+ .long 0, 0, 0, 0, 0, 0, 0, 0
+
+/* Room for two PTE pointers, usually the kernel and current user pointers
+ * to their respective root page table.
+ */
+abatron_pteptrs:
+ .space 8
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
new file mode 100644
index 00000000..4989661b
--- /dev/null
+++ b/arch/powerpc/kernel/head_40x.S
@@ -0,0 +1,998 @@
+/*
+ * Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org>
+ * Initial PowerPC version.
+ * Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu>
+ * Rewritten for PReP
+ * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
+ * Low-level exception handers, MMU support, and rewrite.
+ * Copyright (c) 1997 Dan Malek <dmalek@jlc.net>
+ * PowerPC 8xx modifications.
+ * Copyright (c) 1998-1999 TiVo, Inc.
+ * PowerPC 403GCX modifications.
+ * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
+ * PowerPC 403GCX/405GP modifications.
+ * Copyright 2000 MontaVista Software Inc.
+ * PPC405 modifications
+ * PowerPC 403GCX/405GP modifications.
+ * Author: MontaVista Software, Inc.
+ * frank_rowand@mvista.com or source@mvista.com
+ * debbie_chu@mvista.com
+ *
+ *
+ * Module name: head_4xx.S
+ *
+ * Description:
+ * Kernel execution entry point code.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/init.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/pgtable.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
+
+/* As with the other PowerPC ports, it is expected that when code
+ * execution begins here, the following registers contain valid, yet
+ * optional, information:
+ *
+ * r3 - Board info structure pointer (DRAM, frequency, MAC address, etc.)
+ * r4 - Starting address of the init RAM disk
+ * r5 - Ending address of the init RAM disk
+ * r6 - Start of kernel command line string (e.g. "mem=96m")
+ * r7 - End of kernel command line string
+ *
+ * This is all going to change RSN when we add bi_recs....... -- Dan
+ */
+ __HEAD
+_ENTRY(_stext);
+_ENTRY(_start);
+
+ mr r31,r3 /* save device tree ptr */
+
+ /* We have to turn on the MMU right away so we get cache modes
+ * set correctly.
+ */
+ bl initial_mmu
+
+/* We now have the lower 16 Meg mapped into TLB entries, and the caches
+ * ready to work.
+ */
+turn_on_mmu:
+ lis r0,MSR_KERNEL@h
+ ori r0,r0,MSR_KERNEL@l
+ mtspr SPRN_SRR1,r0
+ lis r0,start_here@h
+ ori r0,r0,start_here@l
+ mtspr SPRN_SRR0,r0
+ SYNC
+ rfi /* enables MMU */
+ b . /* prevent prefetch past rfi */
+
+/*
+ * This area is used for temporarily saving registers during the
+ * critical exception prolog.
+ */
+ . = 0xc0
+crit_save:
+_ENTRY(crit_r10)
+ .space 4
+_ENTRY(crit_r11)
+ .space 4
+_ENTRY(crit_srr0)
+ .space 4
+_ENTRY(crit_srr1)
+ .space 4
+_ENTRY(saved_ksp_limit)
+ .space 4
+
+/*
+ * Exception vector entry code. This code runs with address translation
+ * turned off (i.e. using physical addresses). We assume SPRG_THREAD has
+ * the physical address of the current task thread_struct.
+ * Note that we have to have decremented r1 before we write to any fields
+ * of the exception frame, since a critical interrupt could occur at any
+ * time, and it will write to the area immediately below the current r1.
+ */
+#define NORMAL_EXCEPTION_PROLOG \
+ mtspr SPRN_SPRG_SCRATCH0,r10; /* save two registers to work with */\
+ mtspr SPRN_SPRG_SCRATCH1,r11; \
+ mtspr SPRN_SPRG_SCRATCH2,r1; \
+ mfcr r10; /* save CR in r10 for now */\
+ mfspr r11,SPRN_SRR1; /* check whether user or kernel */\
+ andi. r11,r11,MSR_PR; \
+ beq 1f; \
+ mfspr r1,SPRN_SPRG_THREAD; /* if from user, start at top of */\
+ lwz r1,THREAD_INFO-THREAD(r1); /* this thread's kernel stack */\
+ addi r1,r1,THREAD_SIZE; \
+1: subi r1,r1,INT_FRAME_SIZE; /* Allocate an exception frame */\
+ tophys(r11,r1); \
+ stw r10,_CCR(r11); /* save various registers */\
+ stw r12,GPR12(r11); \
+ stw r9,GPR9(r11); \
+ mfspr r10,SPRN_SPRG_SCRATCH0; \
+ stw r10,GPR10(r11); \
+ mfspr r12,SPRN_SPRG_SCRATCH1; \
+ stw r12,GPR11(r11); \
+ mflr r10; \
+ stw r10,_LINK(r11); \
+ mfspr r10,SPRN_SPRG_SCRATCH2; \
+ mfspr r12,SPRN_SRR0; \
+ stw r10,GPR1(r11); \
+ mfspr r9,SPRN_SRR1; \
+ stw r10,0(r11); \
+ rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\
+ stw r0,GPR0(r11); \
+ SAVE_4GPRS(3, r11); \
+ SAVE_2GPRS(7, r11)
+
+/*
+ * Exception prolog for critical exceptions. This is a little different
+ * from the normal exception prolog above since a critical exception
+ * can potentially occur at any point during normal exception processing.
+ * Thus we cannot use the same SPRG registers as the normal prolog above.
+ * Instead we use a couple of words of memory at low physical addresses.
+ * This is OK since we don't support SMP on these processors.
+ */
+#define CRITICAL_EXCEPTION_PROLOG \
+ stw r10,crit_r10@l(0); /* save two registers to work with */\
+ stw r11,crit_r11@l(0); \
+ mfcr r10; /* save CR in r10 for now */\
+ mfspr r11,SPRN_SRR3; /* check whether user or kernel */\
+ andi. r11,r11,MSR_PR; \
+ lis r11,critirq_ctx@ha; \
+ tophys(r11,r11); \
+ lwz r11,critirq_ctx@l(r11); \
+ beq 1f; \
+ /* COMING FROM USER MODE */ \
+ mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\
+ lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
+1: addi r11,r11,THREAD_SIZE-INT_FRAME_SIZE; /* Alloc an excpt frm */\
+ tophys(r11,r11); \
+ stw r10,_CCR(r11); /* save various registers */\
+ stw r12,GPR12(r11); \
+ stw r9,GPR9(r11); \
+ mflr r10; \
+ stw r10,_LINK(r11); \
+ mfspr r12,SPRN_DEAR; /* save DEAR and ESR in the frame */\
+ stw r12,_DEAR(r11); /* since they may have had stuff */\
+ mfspr r9,SPRN_ESR; /* in them at the point where the */\
+ stw r9,_ESR(r11); /* exception was taken */\
+ mfspr r12,SPRN_SRR2; \
+ stw r1,GPR1(r11); \
+ mfspr r9,SPRN_SRR3; \
+ stw r1,0(r11); \
+ tovirt(r1,r11); \
+ rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\
+ stw r0,GPR0(r11); \
+ SAVE_4GPRS(3, r11); \
+ SAVE_2GPRS(7, r11)
+
+ /*
+ * State at this point:
+ * r9 saved in stack frame, now saved SRR3 & ~MSR_WE
+ * r10 saved in crit_r10 and in stack frame, trashed
+ * r11 saved in crit_r11 and in stack frame,
+ * now phys stack/exception frame pointer
+ * r12 saved in stack frame, now saved SRR2
+ * CR saved in stack frame, CR0.EQ = !SRR3.PR
+ * LR, DEAR, ESR in stack frame
+ * r1 saved in stack frame, now virt stack/excframe pointer
+ * r0, r3-r8 saved in stack frame
+ */
+
+/*
+ * Exception vectors.
+ */
+#define START_EXCEPTION(n, label) \
+ . = n; \
+label:
+
+#define EXCEPTION(n, label, hdlr, xfer) \
+ START_EXCEPTION(n, label); \
+ NORMAL_EXCEPTION_PROLOG; \
+ addi r3,r1,STACK_FRAME_OVERHEAD; \
+ xfer(n, hdlr)
+
+#define CRITICAL_EXCEPTION(n, label, hdlr) \
+ START_EXCEPTION(n, label); \
+ CRITICAL_EXCEPTION_PROLOG; \
+ addi r3,r1,STACK_FRAME_OVERHEAD; \
+ EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
+ NOCOPY, crit_transfer_to_handler, \
+ ret_from_crit_exc)
+
+#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret) \
+ li r10,trap; \
+ stw r10,_TRAP(r11); \
+ lis r10,msr@h; \
+ ori r10,r10,msr@l; \
+ copyee(r10, r9); \
+ bl tfer; \
+ .long hdlr; \
+ .long ret
+
+#define COPY_EE(d, s) rlwimi d,s,0,16,16
+#define NOCOPY(d, s)
+
+#define EXC_XFER_STD(n, hdlr) \
+ EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \
+ ret_from_except_full)
+
+#define EXC_XFER_LITE(n, hdlr) \
+ EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \
+ ret_from_except)
+
+#define EXC_XFER_EE(n, hdlr) \
+ EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \
+ ret_from_except_full)
+
+#define EXC_XFER_EE_LITE(n, hdlr) \
+ EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \
+ ret_from_except)
+
+
+/*
+ * 0x0100 - Critical Interrupt Exception
+ */
+ CRITICAL_EXCEPTION(0x0100, CriticalInterrupt, unknown_exception)
+
+/*
+ * 0x0200 - Machine Check Exception
+ */
+ CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
+
+/*
+ * 0x0300 - Data Storage Exception
+ * This happens for just a few reasons. U0 set (but we don't do that),
+ * or zone protection fault (user violation, write to protected page).
+ * If this is just an update of modified status, we do that quickly
+ * and exit. Otherwise, we call heavywight functions to do the work.
+ */
+ START_EXCEPTION(0x0300, DataStorage)
+ mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */
+ mtspr SPRN_SPRG_SCRATCH1, r11
+#ifdef CONFIG_403GCX
+ stw r12, 0(r0)
+ stw r9, 4(r0)
+ mfcr r11
+ mfspr r12, SPRN_PID
+ stw r11, 8(r0)
+ stw r12, 12(r0)
+#else
+ mtspr SPRN_SPRG_SCRATCH3, r12
+ mtspr SPRN_SPRG_SCRATCH4, r9
+ mfcr r11
+ mfspr r12, SPRN_PID
+ mtspr SPRN_SPRG_SCRATCH6, r11
+ mtspr SPRN_SPRG_SCRATCH5, r12
+#endif
+
+ /* First, check if it was a zone fault (which means a user
+ * tried to access a kernel or read-protected page - always
+ * a SEGV). All other faults here must be stores, so no
+ * need to check ESR_DST as well. */
+ mfspr r10, SPRN_ESR
+ andis. r10, r10, ESR_DIZ@h
+ bne 2f
+
+ mfspr r10, SPRN_DEAR /* Get faulting address */
+
+ /* If we are faulting a kernel address, we have to use the
+ * kernel page tables.
+ */
+ lis r11, PAGE_OFFSET@h
+ cmplw r10, r11
+ blt+ 3f
+ lis r11, swapper_pg_dir@h
+ ori r11, r11, swapper_pg_dir@l
+ li r9, 0
+ mtspr SPRN_PID, r9 /* TLB will have 0 TID */
+ b 4f
+
+ /* Get the PGD for the current thread.
+ */
+3:
+ mfspr r11,SPRN_SPRG_THREAD
+ lwz r11,PGDIR(r11)
+4:
+ tophys(r11, r11)
+ rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */
+ lwz r11, 0(r11) /* Get L1 entry */
+ rlwinm. r12, r11, 0, 0, 19 /* Extract L2 (pte) base address */
+ beq 2f /* Bail if no table */
+
+ rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */
+ lwz r11, 0(r12) /* Get Linux PTE */
+
+ andi. r9, r11, _PAGE_RW /* Is it writeable? */
+ beq 2f /* Bail if not */
+
+ /* Update 'changed'.
+ */
+ ori r11, r11, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE
+ stw r11, 0(r12) /* Update Linux page table */
+
+ /* Most of the Linux PTE is ready to load into the TLB LO.
+ * We set ZSEL, where only the LS-bit determines user access.
+ * We set execute, because we don't have the granularity to
+ * properly set this at the page level (Linux problem).
+ * If shared is set, we cause a zero PID->TID load.
+ * Many of these bits are software only. Bits we don't set
+ * here we (properly should) assume have the appropriate value.
+ */
+ li r12, 0x0ce2
+ andc r11, r11, r12 /* Make sure 20, 21 are zero */
+
+ /* find the TLB index that caused the fault. It has to be here.
+ */
+ tlbsx r9, 0, r10
+
+ tlbwe r11, r9, TLB_DATA /* Load TLB LO */
+
+ /* Done...restore registers and get out of here.
+ */
+#ifdef CONFIG_403GCX
+ lwz r12, 12(r0)
+ lwz r11, 8(r0)
+ mtspr SPRN_PID, r12
+ mtcr r11
+ lwz r9, 4(r0)
+ lwz r12, 0(r0)
+#else
+ mfspr r12, SPRN_SPRG_SCRATCH5
+ mfspr r11, SPRN_SPRG_SCRATCH6
+ mtspr SPRN_PID, r12
+ mtcr r11
+ mfspr r9, SPRN_SPRG_SCRATCH4
+ mfspr r12, SPRN_SPRG_SCRATCH3
+#endif
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ PPC405_ERR77_SYNC
+ rfi /* Should sync shadow TLBs */
+ b . /* prevent prefetch past rfi */
+
+2:
+ /* The bailout. Restore registers to pre-exception conditions
+ * and call the heavyweights to help us out.
+ */
+#ifdef CONFIG_403GCX
+ lwz r12, 12(r0)
+ lwz r11, 8(r0)
+ mtspr SPRN_PID, r12
+ mtcr r11
+ lwz r9, 4(r0)
+ lwz r12, 0(r0)
+#else
+ mfspr r12, SPRN_SPRG_SCRATCH5
+ mfspr r11, SPRN_SPRG_SCRATCH6
+ mtspr SPRN_PID, r12
+ mtcr r11
+ mfspr r9, SPRN_SPRG_SCRATCH4
+ mfspr r12, SPRN_SPRG_SCRATCH3
+#endif
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ b DataAccess
+
+/*
+ * 0x0400 - Instruction Storage Exception
+ * This is caused by a fetch from non-execute or guarded pages.
+ */
+ START_EXCEPTION(0x0400, InstructionAccess)
+ NORMAL_EXCEPTION_PROLOG
+ mr r4,r12 /* Pass SRR0 as arg2 */
+ li r5,0 /* Pass zero as arg3 */
+ EXC_XFER_LITE(0x400, handle_page_fault)
+
+/* 0x0500 - External Interrupt Exception */
+ EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
+
+/* 0x0600 - Alignment Exception */
+ START_EXCEPTION(0x0600, Alignment)
+ NORMAL_EXCEPTION_PROLOG
+ mfspr r4,SPRN_DEAR /* Grab the DEAR and save it */
+ stw r4,_DEAR(r11)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ EXC_XFER_EE(0x600, alignment_exception)
+
+/* 0x0700 - Program Exception */
+ START_EXCEPTION(0x0700, ProgramCheck)
+ NORMAL_EXCEPTION_PROLOG
+ mfspr r4,SPRN_ESR /* Grab the ESR and save it */
+ stw r4,_ESR(r11)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ EXC_XFER_STD(0x700, program_check_exception)
+
+ EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_EE)
+
+/* 0x0C00 - System Call Exception */
+ START_EXCEPTION(0x0C00, SystemCall)
+ NORMAL_EXCEPTION_PROLOG
+ EXC_XFER_EE_LITE(0xc00, DoSyscall)
+
+ EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_EE)
+
+/* 0x1000 - Programmable Interval Timer (PIT) Exception */
+ START_EXCEPTION(0x1000, Decrementer)
+ NORMAL_EXCEPTION_PROLOG
+ lis r0,TSR_PIS@h
+ mtspr SPRN_TSR,r0 /* Clear the PIT exception */
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ EXC_XFER_LITE(0x1000, timer_interrupt)
+
+#if 0
+/* NOTE:
+ * FIT and WDT handlers are not implemented yet.
+ */
+
+/* 0x1010 - Fixed Interval Timer (FIT) Exception
+*/
+ STND_EXCEPTION(0x1010, FITException, unknown_exception)
+
+/* 0x1020 - Watchdog Timer (WDT) Exception
+*/
+#ifdef CONFIG_BOOKE_WDT
+ CRITICAL_EXCEPTION(0x1020, WDTException, WatchdogException)
+#else
+ CRITICAL_EXCEPTION(0x1020, WDTException, unknown_exception)
+#endif
+#endif
+
+/* 0x1100 - Data TLB Miss Exception
+ * As the name implies, translation is not in the MMU, so search the
+ * page tables and fix it. The only purpose of this function is to
+ * load TLB entries from the page table if they exist.
+ */
+ START_EXCEPTION(0x1100, DTLBMiss)
+ mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */
+ mtspr SPRN_SPRG_SCRATCH1, r11
+#ifdef CONFIG_403GCX
+ stw r12, 0(r0)
+ stw r9, 4(r0)
+ mfcr r11
+ mfspr r12, SPRN_PID
+ stw r11, 8(r0)
+ stw r12, 12(r0)
+#else
+ mtspr SPRN_SPRG_SCRATCH3, r12
+ mtspr SPRN_SPRG_SCRATCH4, r9
+ mfcr r11
+ mfspr r12, SPRN_PID
+ mtspr SPRN_SPRG_SCRATCH6, r11
+ mtspr SPRN_SPRG_SCRATCH5, r12
+#endif
+ mfspr r10, SPRN_DEAR /* Get faulting address */
+
+ /* If we are faulting a kernel address, we have to use the
+ * kernel page tables.
+ */
+ lis r11, PAGE_OFFSET@h
+ cmplw r10, r11
+ blt+ 3f
+ lis r11, swapper_pg_dir@h
+ ori r11, r11, swapper_pg_dir@l
+ li r9, 0
+ mtspr SPRN_PID, r9 /* TLB will have 0 TID */
+ b 4f
+
+ /* Get the PGD for the current thread.
+ */
+3:
+ mfspr r11,SPRN_SPRG_THREAD
+ lwz r11,PGDIR(r11)
+4:
+ tophys(r11, r11)
+ rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */
+ lwz r12, 0(r11) /* Get L1 entry */
+ andi. r9, r12, _PMD_PRESENT /* Check if it points to a PTE page */
+ beq 2f /* Bail if no table */
+
+ rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */
+ lwz r11, 0(r12) /* Get Linux PTE */
+ andi. r9, r11, _PAGE_PRESENT
+ beq 5f
+
+ ori r11, r11, _PAGE_ACCESSED
+ stw r11, 0(r12)
+
+ /* Create TLB tag. This is the faulting address plus a static
+ * set of bits. These are size, valid, E, U0.
+ */
+ li r12, 0x00c0
+ rlwimi r10, r12, 0, 20, 31
+
+ b finish_tlb_load
+
+2: /* Check for possible large-page pmd entry */
+ rlwinm. r9, r12, 2, 22, 24
+ beq 5f
+
+ /* Create TLB tag. This is the faulting address, plus a static
+ * set of bits (valid, E, U0) plus the size from the PMD.
+ */
+ ori r9, r9, 0x40
+ rlwimi r10, r9, 0, 20, 31
+ mr r11, r12
+
+ b finish_tlb_load
+
+5:
+ /* The bailout. Restore registers to pre-exception conditions
+ * and call the heavyweights to help us out.
+ */
+#ifdef CONFIG_403GCX
+ lwz r12, 12(r0)
+ lwz r11, 8(r0)
+ mtspr SPRN_PID, r12
+ mtcr r11
+ lwz r9, 4(r0)
+ lwz r12, 0(r0)
+#else
+ mfspr r12, SPRN_SPRG_SCRATCH5
+ mfspr r11, SPRN_SPRG_SCRATCH6
+ mtspr SPRN_PID, r12
+ mtcr r11
+ mfspr r9, SPRN_SPRG_SCRATCH4
+ mfspr r12, SPRN_SPRG_SCRATCH3
+#endif
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ b DataAccess
+
+/* 0x1200 - Instruction TLB Miss Exception
+ * Nearly the same as above, except we get our information from different
+ * registers and bailout to a different point.
+ */
+ START_EXCEPTION(0x1200, ITLBMiss)
+ mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */
+ mtspr SPRN_SPRG_SCRATCH1, r11
+#ifdef CONFIG_403GCX
+ stw r12, 0(r0)
+ stw r9, 4(r0)
+ mfcr r11
+ mfspr r12, SPRN_PID
+ stw r11, 8(r0)
+ stw r12, 12(r0)
+#else
+ mtspr SPRN_SPRG_SCRATCH3, r12
+ mtspr SPRN_SPRG_SCRATCH4, r9
+ mfcr r11
+ mfspr r12, SPRN_PID
+ mtspr SPRN_SPRG_SCRATCH6, r11
+ mtspr SPRN_SPRG_SCRATCH5, r12
+#endif
+ mfspr r10, SPRN_SRR0 /* Get faulting address */
+
+ /* If we are faulting a kernel address, we have to use the
+ * kernel page tables.
+ */
+ lis r11, PAGE_OFFSET@h
+ cmplw r10, r11
+ blt+ 3f
+ lis r11, swapper_pg_dir@h
+ ori r11, r11, swapper_pg_dir@l
+ li r9, 0
+ mtspr SPRN_PID, r9 /* TLB will have 0 TID */
+ b 4f
+
+ /* Get the PGD for the current thread.
+ */
+3:
+ mfspr r11,SPRN_SPRG_THREAD
+ lwz r11,PGDIR(r11)
+4:
+ tophys(r11, r11)
+ rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */
+ lwz r12, 0(r11) /* Get L1 entry */
+ andi. r9, r12, _PMD_PRESENT /* Check if it points to a PTE page */
+ beq 2f /* Bail if no table */
+
+ rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */
+ lwz r11, 0(r12) /* Get Linux PTE */
+ andi. r9, r11, _PAGE_PRESENT
+ beq 5f
+
+ ori r11, r11, _PAGE_ACCESSED
+ stw r11, 0(r12)
+
+ /* Create TLB tag. This is the faulting address plus a static
+ * set of bits. These are size, valid, E, U0.
+ */
+ li r12, 0x00c0
+ rlwimi r10, r12, 0, 20, 31
+
+ b finish_tlb_load
+
+2: /* Check for possible large-page pmd entry */
+ rlwinm. r9, r12, 2, 22, 24
+ beq 5f
+
+ /* Create TLB tag. This is the faulting address, plus a static
+ * set of bits (valid, E, U0) plus the size from the PMD.
+ */
+ ori r9, r9, 0x40
+ rlwimi r10, r9, 0, 20, 31
+ mr r11, r12
+
+ b finish_tlb_load
+
+5:
+ /* The bailout. Restore registers to pre-exception conditions
+ * and call the heavyweights to help us out.
+ */
+#ifdef CONFIG_403GCX
+ lwz r12, 12(r0)
+ lwz r11, 8(r0)
+ mtspr SPRN_PID, r12
+ mtcr r11
+ lwz r9, 4(r0)
+ lwz r12, 0(r0)
+#else
+ mfspr r12, SPRN_SPRG_SCRATCH5
+ mfspr r11, SPRN_SPRG_SCRATCH6
+ mtspr SPRN_PID, r12
+ mtcr r11
+ mfspr r9, SPRN_SPRG_SCRATCH4
+ mfspr r12, SPRN_SPRG_SCRATCH3
+#endif
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ b InstructionAccess
+
+ EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE)
+#ifdef CONFIG_IBM405_ERR51
+ /* 405GP errata 51 */
+ START_EXCEPTION(0x1700, Trap_17)
+ b DTLBMiss
+#else
+ EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE)
+#endif
+ EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_EE)
+
+/* Check for a single step debug exception while in an exception
+ * handler before state has been saved. This is to catch the case
+ * where an instruction that we are trying to single step causes
+ * an exception (eg ITLB/DTLB miss) and thus the first instruction of
+ * the exception handler generates a single step debug exception.
+ *
+ * If we get a debug trap on the first instruction of an exception handler,
+ * we reset the MSR_DE in the _exception handler's_ MSR (the debug trap is
+ * a critical exception, so we are using SPRN_CSRR1 to manipulate the MSR).
+ * The exception handler was handling a non-critical interrupt, so it will
+ * save (and later restore) the MSR via SPRN_SRR1, which will still have
+ * the MSR_DE bit set.
+ */
+ /* 0x2000 - Debug Exception */
+ START_EXCEPTION(0x2000, DebugTrap)
+ CRITICAL_EXCEPTION_PROLOG
+
+ /*
+ * If this is a single step or branch-taken exception in an
+ * exception entry sequence, it was probably meant to apply to
+ * the code where the exception occurred (since exception entry
+ * doesn't turn off DE automatically). We simulate the effect
+ * of turning off DE on entry to an exception handler by turning
+ * off DE in the SRR3 value and clearing the debug status.
+ */
+ mfspr r10,SPRN_DBSR /* check single-step/branch taken */
+ andis. r10,r10,DBSR_IC@h
+ beq+ 2f
+
+ andi. r10,r9,MSR_IR|MSR_PR /* check supervisor + MMU off */
+ beq 1f /* branch and fix it up */
+
+ mfspr r10,SPRN_SRR2 /* Faulting instruction address */
+ cmplwi r10,0x2100
+ bgt+ 2f /* address above exception vectors */
+
+ /* here it looks like we got an inappropriate debug exception. */
+1: rlwinm r9,r9,0,~MSR_DE /* clear DE in the SRR3 value */
+ lis r10,DBSR_IC@h /* clear the IC event */
+ mtspr SPRN_DBSR,r10
+ /* restore state and get out */
+ lwz r10,_CCR(r11)
+ lwz r0,GPR0(r11)
+ lwz r1,GPR1(r11)
+ mtcrf 0x80,r10
+ mtspr SPRN_SRR2,r12
+ mtspr SPRN_SRR3,r9
+ lwz r9,GPR9(r11)
+ lwz r12,GPR12(r11)
+ lwz r10,crit_r10@l(0)
+ lwz r11,crit_r11@l(0)
+ PPC405_ERR77_SYNC
+ rfci
+ b .
+
+ /* continue normal handling for a critical exception... */
+2: mfspr r4,SPRN_DBSR
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ EXC_XFER_TEMPLATE(DebugException, 0x2002, \
+ (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
+ NOCOPY, crit_transfer_to_handler, ret_from_crit_exc)
+
+/*
+ * The other Data TLB exceptions bail out to this point
+ * if they can't resolve the lightweight TLB fault.
+ */
+DataAccess:
+ NORMAL_EXCEPTION_PROLOG
+ mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
+ stw r5,_ESR(r11)
+ mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */
+ EXC_XFER_LITE(0x300, handle_page_fault)
+
+/* Other PowerPC processors, namely those derived from the 6xx-series
+ * have vectors from 0x2100 through 0x2F00 defined, but marked as reserved.
+ * However, for the 4xx-series processors these are neither defined nor
+ * reserved.
+ */
+
+ /* Damn, I came up one instruction too many to fit into the
+ * exception space :-). Both the instruction and data TLB
+ * miss get to this point to load the TLB.
+ * r10 - TLB_TAG value
+ * r11 - Linux PTE
+ * r12, r9 - available to use
+ * PID - loaded with proper value when we get here
+ * Upon exit, we reload everything and RFI.
+ * Actually, it will fit now, but oh well.....a common place
+ * to load the TLB.
+ */
+tlb_4xx_index:
+ .long 0
+finish_tlb_load:
+ /* load the next available TLB index.
+ */
+ lwz r9, tlb_4xx_index@l(0)
+ addi r9, r9, 1
+ andi. r9, r9, (PPC40X_TLB_SIZE-1)
+ stw r9, tlb_4xx_index@l(0)
+
+6:
+ /*
+ * Clear out the software-only bits in the PTE to generate the
+ * TLB_DATA value. These are the bottom 2 bits of the RPM, the
+ * top 3 bits of the zone field, and M.
+ */
+ li r12, 0x0ce2
+ andc r11, r11, r12
+
+ tlbwe r11, r9, TLB_DATA /* Load TLB LO */
+ tlbwe r10, r9, TLB_TAG /* Load TLB HI */
+
+ /* Done...restore registers and get out of here.
+ */
+#ifdef CONFIG_403GCX
+ lwz r12, 12(r0)
+ lwz r11, 8(r0)
+ mtspr SPRN_PID, r12
+ mtcr r11
+ lwz r9, 4(r0)
+ lwz r12, 0(r0)
+#else
+ mfspr r12, SPRN_SPRG_SCRATCH5
+ mfspr r11, SPRN_SPRG_SCRATCH6
+ mtspr SPRN_PID, r12
+ mtcr r11
+ mfspr r9, SPRN_SPRG_SCRATCH4
+ mfspr r12, SPRN_SPRG_SCRATCH3
+#endif
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ PPC405_ERR77_SYNC
+ rfi /* Should sync shadow TLBs */
+ b . /* prevent prefetch past rfi */
+
+/* extern void giveup_fpu(struct task_struct *prev)
+ *
+ * The PowerPC 4xx family of processors do not have an FPU, so this just
+ * returns.
+ */
+_ENTRY(giveup_fpu)
+ blr
+
+/* This is where the main kernel code starts.
+ */
+start_here:
+
+ /* ptr to current */
+ lis r2,init_task@h
+ ori r2,r2,init_task@l
+
+ /* ptr to phys current thread */
+ tophys(r4,r2)
+ addi r4,r4,THREAD /* init task's THREAD */
+ mtspr SPRN_SPRG_THREAD,r4
+
+ /* stack */
+ lis r1,init_thread_union@ha
+ addi r1,r1,init_thread_union@l
+ li r0,0
+ stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+
+ bl early_init /* We have to do this with MMU on */
+
+/*
+ * Decide what sort of machine this is and initialize the MMU.
+ */
+ li r3,0
+ mr r4,r31
+ bl machine_init
+ bl MMU_init
+
+/* Go back to running unmapped so we can load up new values
+ * and change to using our exception vectors.
+ * On the 4xx, all we have to do is invalidate the TLB to clear
+ * the old 16M byte TLB mappings.
+ */
+ lis r4,2f@h
+ ori r4,r4,2f@l
+ tophys(r4,r4)
+ lis r3,(MSR_KERNEL & ~(MSR_IR|MSR_DR))@h
+ ori r3,r3,(MSR_KERNEL & ~(MSR_IR|MSR_DR))@l
+ mtspr SPRN_SRR0,r4
+ mtspr SPRN_SRR1,r3
+ rfi
+ b . /* prevent prefetch past rfi */
+
+/* Load up the kernel context */
+2:
+ sync /* Flush to memory before changing TLB */
+ tlbia
+ isync /* Flush shadow TLBs */
+
+ /* set up the PTE pointers for the Abatron bdiGDB.
+ */
+ lis r6, swapper_pg_dir@h
+ ori r6, r6, swapper_pg_dir@l
+ lis r5, abatron_pteptrs@h
+ ori r5, r5, abatron_pteptrs@l
+ stw r5, 0xf0(r0) /* Must match your Abatron config file */
+ tophys(r5,r5)
+ stw r6, 0(r5)
+
+/* Now turn on the MMU for real! */
+ lis r4,MSR_KERNEL@h
+ ori r4,r4,MSR_KERNEL@l
+ lis r3,start_kernel@h
+ ori r3,r3,start_kernel@l
+ mtspr SPRN_SRR0,r3
+ mtspr SPRN_SRR1,r4
+ rfi /* enable MMU and jump to start_kernel */
+ b . /* prevent prefetch past rfi */
+
+/* Set up the initial MMU state so we can do the first level of
+ * kernel initialization. This maps the first 16 MBytes of memory 1:1
+ * virtual to physical and more importantly sets the cache mode.
+ */
+initial_mmu:
+ tlbia /* Invalidate all TLB entries */
+ isync
+
+ /* We should still be executing code at physical address 0x0000xxxx
+ * at this point. However, start_here is at virtual address
+ * 0xC000xxxx. So, set up a TLB mapping to cover this once
+ * translation is enabled.
+ */
+
+ lis r3,KERNELBASE@h /* Load the kernel virtual address */
+ ori r3,r3,KERNELBASE@l
+ tophys(r4,r3) /* Load the kernel physical address */
+
+ iccci r0,r3 /* Invalidate the i-cache before use */
+
+ /* Load the kernel PID.
+ */
+ li r0,0
+ mtspr SPRN_PID,r0
+ sync
+
+ /* Configure and load one entry into TLB slots 63 */
+ clrrwi r4,r4,10 /* Mask off the real page number */
+ ori r4,r4,(TLB_WR | TLB_EX) /* Set the write and execute bits */
+
+ clrrwi r3,r3,10 /* Mask off the effective page number */
+ ori r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_16M))
+
+ li r0,63 /* TLB slot 63 */
+
+ tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */
+ tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */
+
+#if defined(CONFIG_SERIAL_TEXT_DEBUG) && defined(SERIAL_DEBUG_IO_BASE)
+
+ /* Load a TLB entry for the UART, so that ppc4xx_progress() can use
+ * the UARTs nice and early. We use a 4k real==virtual mapping. */
+
+ lis r3,SERIAL_DEBUG_IO_BASE@h
+ ori r3,r3,SERIAL_DEBUG_IO_BASE@l
+ mr r4,r3
+ clrrwi r4,r4,12
+ ori r4,r4,(TLB_WR|TLB_I|TLB_M|TLB_G)
+
+ clrrwi r3,r3,12
+ ori r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_4K))
+
+ li r0,0 /* TLB slot 0 */
+ tlbwe r4,r0,TLB_DATA
+ tlbwe r3,r0,TLB_TAG
+#endif /* CONFIG_SERIAL_DEBUG_TEXT && SERIAL_DEBUG_IO_BASE */
+
+ isync
+
+ /* Establish the exception vector base
+ */
+ lis r4,KERNELBASE@h /* EVPR only uses the high 16-bits */
+ tophys(r0,r4) /* Use the physical address */
+ mtspr SPRN_EVPR,r0
+
+ blr
+
+_GLOBAL(abort)
+ mfspr r13,SPRN_DBCR0
+ oris r13,r13,DBCR0_RST_SYSTEM@h
+ mtspr SPRN_DBCR0,r13
+
+_GLOBAL(set_context)
+
+#ifdef CONFIG_BDI_SWITCH
+ /* Context switch the PTE pointer for the Abatron BDI2000.
+ * The PGDIR is the second parameter.
+ */
+ lis r5, KERNELBASE@h
+ lwz r5, 0xf0(r5)
+ stw r4, 0x4(r5)
+#endif
+ sync
+ mtspr SPRN_PID,r3
+ isync /* Need an isync to flush shadow */
+ /* TLBs after changing PID */
+ blr
+
+/* We put a few things here that have to be page-aligned. This stuff
+ * goes at the beginning of the data segment, which is page-aligned.
+ */
+ .data
+ .align 12
+ .globl sdata
+sdata:
+ .globl empty_zero_page
+empty_zero_page:
+ .space 4096
+ .globl swapper_pg_dir
+swapper_pg_dir:
+ .space PGD_TABLE_SIZE
+
+/* Room for two PTE pointers, usually the kernel and current user pointers
+ * to their respective root page table.
+ */
+abatron_pteptrs:
+ .space 8
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
new file mode 100644
index 00000000..7dd2981b
--- /dev/null
+++ b/arch/powerpc/kernel/head_44x.S
@@ -0,0 +1,1285 @@
+/*
+ * Kernel execution entry point code.
+ *
+ * Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org>
+ * Initial PowerPC version.
+ * Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu>
+ * Rewritten for PReP
+ * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
+ * Low-level exception handers, MMU support, and rewrite.
+ * Copyright (c) 1997 Dan Malek <dmalek@jlc.net>
+ * PowerPC 8xx modifications.
+ * Copyright (c) 1998-1999 TiVo, Inc.
+ * PowerPC 403GCX modifications.
+ * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
+ * PowerPC 403GCX/405GP modifications.
+ * Copyright 2000 MontaVista Software Inc.
+ * PPC405 modifications
+ * PowerPC 403GCX/405GP modifications.
+ * Author: MontaVista Software, Inc.
+ * frank_rowand@mvista.com or source@mvista.com
+ * debbie_chu@mvista.com
+ * Copyright 2002-2005 MontaVista Software, Inc.
+ * PowerPC 44x support, Matt Porter <mporter@kernel.crashing.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/init.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/pgtable.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
+#include <asm/synch.h>
+#include "head_booke.h"
+
+
+/* As with the other PowerPC ports, it is expected that when code
+ * execution begins here, the following registers contain valid, yet
+ * optional, information:
+ *
+ * r3 - Board info structure pointer (DRAM, frequency, MAC address, etc.)
+ * r4 - Starting address of the init RAM disk
+ * r5 - Ending address of the init RAM disk
+ * r6 - Start of kernel command line string (e.g. "mem=128")
+ * r7 - End of kernel command line string
+ *
+ */
+ __HEAD
+_ENTRY(_stext);
+_ENTRY(_start);
+ /*
+ * Reserve a word at a fixed location to store the address
+ * of abatron_pteptrs
+ */
+ nop
+ mr r31,r3 /* save device tree ptr */
+ li r24,0 /* CPU number */
+
+#ifdef CONFIG_RELOCATABLE
+/*
+ * Relocate ourselves to the current runtime address.
+ * This is called only by the Boot CPU.
+ * "relocate" is called with our current runtime virutal
+ * address.
+ * r21 will be loaded with the physical runtime address of _stext
+ */
+ bl 0f /* Get our runtime address */
+0: mflr r21 /* Make it accessible */
+ addis r21,r21,(_stext - 0b)@ha
+ addi r21,r21,(_stext - 0b)@l /* Get our current runtime base */
+
+ /*
+ * We have the runtime (virutal) address of our base.
+ * We calculate our shift of offset from a 256M page.
+ * We could map the 256M page we belong to at PAGE_OFFSET and
+ * get going from there.
+ */
+ lis r4,KERNELBASE@h
+ ori r4,r4,KERNELBASE@l
+ rlwinm r6,r21,0,4,31 /* r6 = PHYS_START % 256M */
+ rlwinm r5,r4,0,4,31 /* r5 = KERNELBASE % 256M */
+ subf r3,r5,r6 /* r3 = r6 - r5 */
+ add r3,r4,r3 /* Required Virutal Address */
+
+ bl relocate
+#endif
+
+ bl init_cpu_state
+
+ /*
+ * This is where the main kernel code starts.
+ */
+
+ /* ptr to current */
+ lis r2,init_task@h
+ ori r2,r2,init_task@l
+
+ /* ptr to current thread */
+ addi r4,r2,THREAD /* init task's THREAD */
+ mtspr SPRN_SPRG_THREAD,r4
+
+ /* stack */
+ lis r1,init_thread_union@h
+ ori r1,r1,init_thread_union@l
+ li r0,0
+ stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+
+ bl early_init
+
+#ifdef CONFIG_RELOCATABLE
+ /*
+ * Relocatable kernel support based on processing of dynamic
+ * relocation entries.
+ *
+ * r25 will contain RPN/ERPN for the start address of memory
+ * r21 will contain the current offset of _stext
+ */
+ lis r3,kernstart_addr@ha
+ la r3,kernstart_addr@l(r3)
+
+ /*
+ * Compute the kernstart_addr.
+ * kernstart_addr => (r6,r8)
+ * kernstart_addr & ~0xfffffff => (r6,r7)
+ */
+ rlwinm r6,r25,0,28,31 /* ERPN. Bits 32-35 of Address */
+ rlwinm r7,r25,0,0,3 /* RPN - assuming 256 MB page size */
+ rlwinm r8,r21,0,4,31 /* r8 = (_stext & 0xfffffff) */
+ or r8,r7,r8 /* Compute the lower 32bit of kernstart_addr */
+
+ /* Store kernstart_addr */
+ stw r6,0(r3) /* higher 32bit */
+ stw r8,4(r3) /* lower 32bit */
+
+ /*
+ * Compute the virt_phys_offset :
+ * virt_phys_offset = stext.run - kernstart_addr
+ *
+ * stext.run = (KERNELBASE & ~0xfffffff) + (kernstart_addr & 0xfffffff)
+ * When we relocate, we have :
+ *
+ * (kernstart_addr & 0xfffffff) = (stext.run & 0xfffffff)
+ *
+ * hence:
+ * virt_phys_offset = (KERNELBASE & ~0xfffffff) - (kernstart_addr & ~0xfffffff)
+ *
+ */
+
+ /* KERNELBASE&~0xfffffff => (r4,r5) */
+ li r4, 0 /* higer 32bit */
+ lis r5,KERNELBASE@h
+ rlwinm r5,r5,0,0,3 /* Align to 256M, lower 32bit */
+
+ /*
+ * 64bit subtraction.
+ */
+ subfc r5,r7,r5
+ subfe r4,r6,r4
+
+ /* Store virt_phys_offset */
+ lis r3,virt_phys_offset@ha
+ la r3,virt_phys_offset@l(r3)
+
+ stw r4,0(r3)
+ stw r5,4(r3)
+
+#elif defined(CONFIG_DYNAMIC_MEMSTART)
+ /*
+ * Mapping based, page aligned dynamic kernel loading.
+ *
+ * r25 will contain RPN/ERPN for the start address of memory
+ *
+ * Add the difference between KERNELBASE and PAGE_OFFSET to the
+ * start of physical memory to get kernstart_addr.
+ */
+ lis r3,kernstart_addr@ha
+ la r3,kernstart_addr@l(r3)
+
+ lis r4,KERNELBASE@h
+ ori r4,r4,KERNELBASE@l
+ lis r5,PAGE_OFFSET@h
+ ori r5,r5,PAGE_OFFSET@l
+ subf r4,r5,r4
+
+ rlwinm r6,r25,0,28,31 /* ERPN */
+ rlwinm r7,r25,0,0,3 /* RPN - assuming 256 MB page size */
+ add r7,r7,r4
+
+ stw r6,0(r3)
+ stw r7,4(r3)
+#endif
+
+/*
+ * Decide what sort of machine this is and initialize the MMU.
+ */
+ li r3,0
+ mr r4,r31
+ bl machine_init
+ bl MMU_init
+
+ /* Setup PTE pointers for the Abatron bdiGDB */
+ lis r6, swapper_pg_dir@h
+ ori r6, r6, swapper_pg_dir@l
+ lis r5, abatron_pteptrs@h
+ ori r5, r5, abatron_pteptrs@l
+ lis r4, KERNELBASE@h
+ ori r4, r4, KERNELBASE@l
+ stw r5, 0(r4) /* Save abatron_pteptrs at a fixed location */
+ stw r6, 0(r5)
+
+ /* Clear the Machine Check Syndrome Register */
+ li r0,0
+ mtspr SPRN_MCSR,r0
+
+ /* Let's move on */
+ lis r4,start_kernel@h
+ ori r4,r4,start_kernel@l
+ lis r3,MSR_KERNEL@h
+ ori r3,r3,MSR_KERNEL@l
+ mtspr SPRN_SRR0,r4
+ mtspr SPRN_SRR1,r3
+ rfi /* change context and jump to start_kernel */
+
+/*
+ * Interrupt vector entry code
+ *
+ * The Book E MMUs are always on so we don't need to handle
+ * interrupts in real mode as with previous PPC processors. In
+ * this case we handle interrupts in the kernel virtual address
+ * space.
+ *
+ * Interrupt vectors are dynamically placed relative to the
+ * interrupt prefix as determined by the address of interrupt_base.
+ * The interrupt vectors offsets are programmed using the labels
+ * for each interrupt vector entry.
+ *
+ * Interrupt vectors must be aligned on a 16 byte boundary.
+ * We align on a 32 byte cache line boundary for good measure.
+ */
+
+interrupt_base:
+ /* Critical Input Interrupt */
+ CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception)
+
+ /* Machine Check Interrupt */
+ CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
+ MCHECK_EXCEPTION(0x0210, MachineCheckA, machine_check_exception)
+
+ /* Data Storage Interrupt */
+ DATA_STORAGE_EXCEPTION
+
+ /* Instruction Storage Interrupt */
+ INSTRUCTION_STORAGE_EXCEPTION
+
+ /* External Input Interrupt */
+ EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE)
+
+ /* Alignment Interrupt */
+ ALIGNMENT_EXCEPTION
+
+ /* Program Interrupt */
+ PROGRAM_EXCEPTION
+
+ /* Floating Point Unavailable Interrupt */
+#ifdef CONFIG_PPC_FPU
+ FP_UNAVAILABLE_EXCEPTION
+#else
+ EXCEPTION(0x2010, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE)
+#endif
+ /* System Call Interrupt */
+ START_EXCEPTION(SystemCall)
+ NORMAL_EXCEPTION_PROLOG
+ EXC_XFER_EE_LITE(0x0c00, DoSyscall)
+
+ /* Auxiliary Processor Unavailable Interrupt */
+ EXCEPTION(0x2020, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE)
+
+ /* Decrementer Interrupt */
+ DECREMENTER_EXCEPTION
+
+ /* Fixed Internal Timer Interrupt */
+ /* TODO: Add FIT support */
+ EXCEPTION(0x1010, FixedIntervalTimer, unknown_exception, EXC_XFER_EE)
+
+ /* Watchdog Timer Interrupt */
+ /* TODO: Add watchdog support */
+#ifdef CONFIG_BOOKE_WDT
+ CRITICAL_EXCEPTION(0x1020, WatchdogTimer, WatchdogException)
+#else
+ CRITICAL_EXCEPTION(0x1020, WatchdogTimer, unknown_exception)
+#endif
+
+ /* Data TLB Error Interrupt */
+ START_EXCEPTION(DataTLBError44x)
+ mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */
+ mtspr SPRN_SPRG_WSCRATCH1, r11
+ mtspr SPRN_SPRG_WSCRATCH2, r12
+ mtspr SPRN_SPRG_WSCRATCH3, r13
+ mfcr r11
+ mtspr SPRN_SPRG_WSCRATCH4, r11
+ mfspr r10, SPRN_DEAR /* Get faulting address */
+
+ /* If we are faulting a kernel address, we have to use the
+ * kernel page tables.
+ */
+ lis r11, PAGE_OFFSET@h
+ cmplw r10, r11
+ blt+ 3f
+ lis r11, swapper_pg_dir@h
+ ori r11, r11, swapper_pg_dir@l
+
+ mfspr r12,SPRN_MMUCR
+ rlwinm r12,r12,0,0,23 /* Clear TID */
+
+ b 4f
+
+ /* Get the PGD for the current thread */
+3:
+ mfspr r11,SPRN_SPRG_THREAD
+ lwz r11,PGDIR(r11)
+
+ /* Load PID into MMUCR TID */
+ mfspr r12,SPRN_MMUCR
+ mfspr r13,SPRN_PID /* Get PID */
+ rlwimi r12,r13,0,24,31 /* Set TID */
+
+4:
+ mtspr SPRN_MMUCR,r12
+
+ /* Mask of required permission bits. Note that while we
+ * do copy ESR:ST to _PAGE_RW position as trying to write
+ * to an RO page is pretty common, we don't do it with
+ * _PAGE_DIRTY. We could do it, but it's a fairly rare
+ * event so I'd rather take the overhead when it happens
+ * rather than adding an instruction here. We should measure
+ * whether the whole thing is worth it in the first place
+ * as we could avoid loading SPRN_ESR completely in the first
+ * place...
+ *
+ * TODO: Is it worth doing that mfspr & rlwimi in the first
+ * place or can we save a couple of instructions here ?
+ */
+ mfspr r12,SPRN_ESR
+ li r13,_PAGE_PRESENT|_PAGE_ACCESSED
+ rlwimi r13,r12,10,30,30
+
+ /* Load the PTE */
+ /* Compute pgdir/pmd offset */
+ rlwinm r12, r10, PPC44x_PGD_OFF_SHIFT, PPC44x_PGD_OFF_MASK_BIT, 29
+ lwzx r11, r12, r11 /* Get pgd/pmd entry */
+ rlwinm. r12, r11, 0, 0, 20 /* Extract pt base address */
+ beq 2f /* Bail if no table */
+
+ /* Compute pte address */
+ rlwimi r12, r10, PPC44x_PTE_ADD_SHIFT, PPC44x_PTE_ADD_MASK_BIT, 28
+ lwz r11, 0(r12) /* Get high word of pte entry */
+ lwz r12, 4(r12) /* Get low word of pte entry */
+
+ lis r10,tlb_44x_index@ha
+
+ andc. r13,r13,r12 /* Check permission */
+
+ /* Load the next available TLB index */
+ lwz r13,tlb_44x_index@l(r10)
+
+ bne 2f /* Bail if permission mismach */
+
+ /* Increment, rollover, and store TLB index */
+ addi r13,r13,1
+
+ /* Compare with watermark (instruction gets patched) */
+ .globl tlb_44x_patch_hwater_D
+tlb_44x_patch_hwater_D:
+ cmpwi 0,r13,1 /* reserve entries */
+ ble 5f
+ li r13,0
+5:
+ /* Store the next available TLB index */
+ stw r13,tlb_44x_index@l(r10)
+
+ /* Re-load the faulting address */
+ mfspr r10,SPRN_DEAR
+
+ /* Jump to common tlb load */
+ b finish_tlb_load_44x
+
+2:
+ /* The bailout. Restore registers to pre-exception conditions
+ * and call the heavyweights to help us out.
+ */
+ mfspr r11, SPRN_SPRG_RSCRATCH4
+ mtcr r11
+ mfspr r13, SPRN_SPRG_RSCRATCH3
+ mfspr r12, SPRN_SPRG_RSCRATCH2
+ mfspr r11, SPRN_SPRG_RSCRATCH1
+ mfspr r10, SPRN_SPRG_RSCRATCH0
+ b DataStorage
+
+ /* Instruction TLB Error Interrupt */
+ /*
+ * Nearly the same as above, except we get our
+ * information from different registers and bailout
+ * to a different point.
+ */
+ START_EXCEPTION(InstructionTLBError44x)
+ mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */
+ mtspr SPRN_SPRG_WSCRATCH1, r11
+ mtspr SPRN_SPRG_WSCRATCH2, r12
+ mtspr SPRN_SPRG_WSCRATCH3, r13
+ mfcr r11
+ mtspr SPRN_SPRG_WSCRATCH4, r11
+ mfspr r10, SPRN_SRR0 /* Get faulting address */
+
+ /* If we are faulting a kernel address, we have to use the
+ * kernel page tables.
+ */
+ lis r11, PAGE_OFFSET@h
+ cmplw r10, r11
+ blt+ 3f
+ lis r11, swapper_pg_dir@h
+ ori r11, r11, swapper_pg_dir@l
+
+ mfspr r12,SPRN_MMUCR
+ rlwinm r12,r12,0,0,23 /* Clear TID */
+
+ b 4f
+
+ /* Get the PGD for the current thread */
+3:
+ mfspr r11,SPRN_SPRG_THREAD
+ lwz r11,PGDIR(r11)
+
+ /* Load PID into MMUCR TID */
+ mfspr r12,SPRN_MMUCR
+ mfspr r13,SPRN_PID /* Get PID */
+ rlwimi r12,r13,0,24,31 /* Set TID */
+
+4:
+ mtspr SPRN_MMUCR,r12
+
+ /* Make up the required permissions */
+ li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
+
+ /* Compute pgdir/pmd offset */
+ rlwinm r12, r10, PPC44x_PGD_OFF_SHIFT, PPC44x_PGD_OFF_MASK_BIT, 29
+ lwzx r11, r12, r11 /* Get pgd/pmd entry */
+ rlwinm. r12, r11, 0, 0, 20 /* Extract pt base address */
+ beq 2f /* Bail if no table */
+
+ /* Compute pte address */
+ rlwimi r12, r10, PPC44x_PTE_ADD_SHIFT, PPC44x_PTE_ADD_MASK_BIT, 28
+ lwz r11, 0(r12) /* Get high word of pte entry */
+ lwz r12, 4(r12) /* Get low word of pte entry */
+
+ lis r10,tlb_44x_index@ha
+
+ andc. r13,r13,r12 /* Check permission */
+
+ /* Load the next available TLB index */
+ lwz r13,tlb_44x_index@l(r10)
+
+ bne 2f /* Bail if permission mismach */
+
+ /* Increment, rollover, and store TLB index */
+ addi r13,r13,1
+
+ /* Compare with watermark (instruction gets patched) */
+ .globl tlb_44x_patch_hwater_I
+tlb_44x_patch_hwater_I:
+ cmpwi 0,r13,1 /* reserve entries */
+ ble 5f
+ li r13,0
+5:
+ /* Store the next available TLB index */
+ stw r13,tlb_44x_index@l(r10)
+
+ /* Re-load the faulting address */
+ mfspr r10,SPRN_SRR0
+
+ /* Jump to common TLB load point */
+ b finish_tlb_load_44x
+
+2:
+ /* The bailout. Restore registers to pre-exception conditions
+ * and call the heavyweights to help us out.
+ */
+ mfspr r11, SPRN_SPRG_RSCRATCH4
+ mtcr r11
+ mfspr r13, SPRN_SPRG_RSCRATCH3
+ mfspr r12, SPRN_SPRG_RSCRATCH2
+ mfspr r11, SPRN_SPRG_RSCRATCH1
+ mfspr r10, SPRN_SPRG_RSCRATCH0
+ b InstructionStorage
+
+/*
+ * Both the instruction and data TLB miss get to this
+ * point to load the TLB.
+ * r10 - EA of fault
+ * r11 - PTE high word value
+ * r12 - PTE low word value
+ * r13 - TLB index
+ * MMUCR - loaded with proper value when we get here
+ * Upon exit, we reload everything and RFI.
+ */
+finish_tlb_load_44x:
+ /* Combine RPN & ERPN an write WS 0 */
+ rlwimi r11,r12,0,0,31-PAGE_SHIFT
+ tlbwe r11,r13,PPC44x_TLB_XLAT
+
+ /*
+ * Create WS1. This is the faulting address (EPN),
+ * page size, and valid flag.
+ */
+ li r11,PPC44x_TLB_VALID | PPC44x_TLBE_SIZE
+ /* Insert valid and page size */
+ rlwimi r10,r11,0,PPC44x_PTE_ADD_MASK_BIT,31
+ tlbwe r10,r13,PPC44x_TLB_PAGEID /* Write PAGEID */
+
+ /* And WS 2 */
+ li r10,0xf85 /* Mask to apply from PTE */
+ rlwimi r10,r12,29,30,30 /* DIRTY -> SW position */
+ and r11,r12,r10 /* Mask PTE bits to keep */
+ andi. r10,r12,_PAGE_USER /* User page ? */
+ beq 1f /* nope, leave U bits empty */
+ rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */
+1: tlbwe r11,r13,PPC44x_TLB_ATTRIB /* Write ATTRIB */
+
+ /* Done...restore registers and get out of here.
+ */
+ mfspr r11, SPRN_SPRG_RSCRATCH4
+ mtcr r11
+ mfspr r13, SPRN_SPRG_RSCRATCH3
+ mfspr r12, SPRN_SPRG_RSCRATCH2
+ mfspr r11, SPRN_SPRG_RSCRATCH1
+ mfspr r10, SPRN_SPRG_RSCRATCH0
+ rfi /* Force context change */
+
+/* TLB error interrupts for 476
+ */
+#ifdef CONFIG_PPC_47x
+ START_EXCEPTION(DataTLBError47x)
+ mtspr SPRN_SPRG_WSCRATCH0,r10 /* Save some working registers */
+ mtspr SPRN_SPRG_WSCRATCH1,r11
+ mtspr SPRN_SPRG_WSCRATCH2,r12
+ mtspr SPRN_SPRG_WSCRATCH3,r13
+ mfcr r11
+ mtspr SPRN_SPRG_WSCRATCH4,r11
+ mfspr r10,SPRN_DEAR /* Get faulting address */
+
+ /* If we are faulting a kernel address, we have to use the
+ * kernel page tables.
+ */
+ lis r11,PAGE_OFFSET@h
+ cmplw cr0,r10,r11
+ blt+ 3f
+ lis r11,swapper_pg_dir@h
+ ori r11,r11, swapper_pg_dir@l
+ li r12,0 /* MMUCR = 0 */
+ b 4f
+
+ /* Get the PGD for the current thread and setup MMUCR */
+3: mfspr r11,SPRN_SPRG3
+ lwz r11,PGDIR(r11)
+ mfspr r12,SPRN_PID /* Get PID */
+4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */
+
+ /* Mask of required permission bits. Note that while we
+ * do copy ESR:ST to _PAGE_RW position as trying to write
+ * to an RO page is pretty common, we don't do it with
+ * _PAGE_DIRTY. We could do it, but it's a fairly rare
+ * event so I'd rather take the overhead when it happens
+ * rather than adding an instruction here. We should measure
+ * whether the whole thing is worth it in the first place
+ * as we could avoid loading SPRN_ESR completely in the first
+ * place...
+ *
+ * TODO: Is it worth doing that mfspr & rlwimi in the first
+ * place or can we save a couple of instructions here ?
+ */
+ mfspr r12,SPRN_ESR
+ li r13,_PAGE_PRESENT|_PAGE_ACCESSED
+ rlwimi r13,r12,10,30,30
+
+ /* Load the PTE */
+ /* Compute pgdir/pmd offset */
+ rlwinm r12,r10,PPC44x_PGD_OFF_SHIFT,PPC44x_PGD_OFF_MASK_BIT,29
+ lwzx r11,r12,r11 /* Get pgd/pmd entry */
+
+ /* Word 0 is EPN,V,TS,DSIZ */
+ li r12,PPC47x_TLB0_VALID | PPC47x_TLBE_SIZE
+ rlwimi r10,r12,0,32-PAGE_SHIFT,31 /* Insert valid and page size*/
+ li r12,0
+ tlbwe r10,r12,0
+
+ /* XXX can we do better ? Need to make sure tlbwe has established
+ * latch V bit in MMUCR0 before the PTE is loaded further down */
+#ifdef CONFIG_SMP
+ isync
+#endif
+
+ rlwinm. r12,r11,0,0,20 /* Extract pt base address */
+ /* Compute pte address */
+ rlwimi r12,r10,PPC44x_PTE_ADD_SHIFT,PPC44x_PTE_ADD_MASK_BIT,28
+ beq 2f /* Bail if no table */
+ lwz r11,0(r12) /* Get high word of pte entry */
+
+ /* XXX can we do better ? maybe insert a known 0 bit from r11 into the
+ * bottom of r12 to create a data dependency... We can also use r10
+ * as destination nowadays
+ */
+#ifdef CONFIG_SMP
+ lwsync
+#endif
+ lwz r12,4(r12) /* Get low word of pte entry */
+
+ andc. r13,r13,r12 /* Check permission */
+
+ /* Jump to common tlb load */
+ beq finish_tlb_load_47x
+
+2: /* The bailout. Restore registers to pre-exception conditions
+ * and call the heavyweights to help us out.
+ */
+ mfspr r11,SPRN_SPRG_RSCRATCH4
+ mtcr r11
+ mfspr r13,SPRN_SPRG_RSCRATCH3
+ mfspr r12,SPRN_SPRG_RSCRATCH2
+ mfspr r11,SPRN_SPRG_RSCRATCH1
+ mfspr r10,SPRN_SPRG_RSCRATCH0
+ b DataStorage
+
+ /* Instruction TLB Error Interrupt */
+ /*
+ * Nearly the same as above, except we get our
+ * information from different registers and bailout
+ * to a different point.
+ */
+ START_EXCEPTION(InstructionTLBError47x)
+ mtspr SPRN_SPRG_WSCRATCH0,r10 /* Save some working registers */
+ mtspr SPRN_SPRG_WSCRATCH1,r11
+ mtspr SPRN_SPRG_WSCRATCH2,r12
+ mtspr SPRN_SPRG_WSCRATCH3,r13
+ mfcr r11
+ mtspr SPRN_SPRG_WSCRATCH4,r11
+ mfspr r10,SPRN_SRR0 /* Get faulting address */
+
+ /* If we are faulting a kernel address, we have to use the
+ * kernel page tables.
+ */
+ lis r11,PAGE_OFFSET@h
+ cmplw cr0,r10,r11
+ blt+ 3f
+ lis r11,swapper_pg_dir@h
+ ori r11,r11, swapper_pg_dir@l
+ li r12,0 /* MMUCR = 0 */
+ b 4f
+
+ /* Get the PGD for the current thread and setup MMUCR */
+3: mfspr r11,SPRN_SPRG_THREAD
+ lwz r11,PGDIR(r11)
+ mfspr r12,SPRN_PID /* Get PID */
+4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */
+
+ /* Make up the required permissions */
+ li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
+
+ /* Load PTE */
+ /* Compute pgdir/pmd offset */
+ rlwinm r12,r10,PPC44x_PGD_OFF_SHIFT,PPC44x_PGD_OFF_MASK_BIT,29
+ lwzx r11,r12,r11 /* Get pgd/pmd entry */
+
+ /* Word 0 is EPN,V,TS,DSIZ */
+ li r12,PPC47x_TLB0_VALID | PPC47x_TLBE_SIZE
+ rlwimi r10,r12,0,32-PAGE_SHIFT,31 /* Insert valid and page size*/
+ li r12,0
+ tlbwe r10,r12,0
+
+ /* XXX can we do better ? Need to make sure tlbwe has established
+ * latch V bit in MMUCR0 before the PTE is loaded further down */
+#ifdef CONFIG_SMP
+ isync
+#endif
+
+ rlwinm. r12,r11,0,0,20 /* Extract pt base address */
+ /* Compute pte address */
+ rlwimi r12,r10,PPC44x_PTE_ADD_SHIFT,PPC44x_PTE_ADD_MASK_BIT,28
+ beq 2f /* Bail if no table */
+
+ lwz r11,0(r12) /* Get high word of pte entry */
+ /* XXX can we do better ? maybe insert a known 0 bit from r11 into the
+ * bottom of r12 to create a data dependency... We can also use r10
+ * as destination nowadays
+ */
+#ifdef CONFIG_SMP
+ lwsync
+#endif
+ lwz r12,4(r12) /* Get low word of pte entry */
+
+ andc. r13,r13,r12 /* Check permission */
+
+ /* Jump to common TLB load point */
+ beq finish_tlb_load_47x
+
+2: /* The bailout. Restore registers to pre-exception conditions
+ * and call the heavyweights to help us out.
+ */
+ mfspr r11, SPRN_SPRG_RSCRATCH4
+ mtcr r11
+ mfspr r13, SPRN_SPRG_RSCRATCH3
+ mfspr r12, SPRN_SPRG_RSCRATCH2
+ mfspr r11, SPRN_SPRG_RSCRATCH1
+ mfspr r10, SPRN_SPRG_RSCRATCH0
+ b InstructionStorage
+
+/*
+ * Both the instruction and data TLB miss get to this
+ * point to load the TLB.
+ * r10 - free to use
+ * r11 - PTE high word value
+ * r12 - PTE low word value
+ * r13 - free to use
+ * MMUCR - loaded with proper value when we get here
+ * Upon exit, we reload everything and RFI.
+ */
+finish_tlb_load_47x:
+ /* Combine RPN & ERPN an write WS 1 */
+ rlwimi r11,r12,0,0,31-PAGE_SHIFT
+ tlbwe r11,r13,1
+
+ /* And make up word 2 */
+ li r10,0xf85 /* Mask to apply from PTE */
+ rlwimi r10,r12,29,30,30 /* DIRTY -> SW position */
+ and r11,r12,r10 /* Mask PTE bits to keep */
+ andi. r10,r12,_PAGE_USER /* User page ? */
+ beq 1f /* nope, leave U bits empty */
+ rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */
+1: tlbwe r11,r13,2
+
+ /* Done...restore registers and get out of here.
+ */
+ mfspr r11, SPRN_SPRG_RSCRATCH4
+ mtcr r11
+ mfspr r13, SPRN_SPRG_RSCRATCH3
+ mfspr r12, SPRN_SPRG_RSCRATCH2
+ mfspr r11, SPRN_SPRG_RSCRATCH1
+ mfspr r10, SPRN_SPRG_RSCRATCH0
+ rfi
+
+#endif /* CONFIG_PPC_47x */
+
+ /* Debug Interrupt */
+ /*
+ * This statement needs to exist at the end of the IVPR
+ * definition just in case you end up taking a debug
+ * exception within another exception.
+ */
+ DEBUG_CRIT_EXCEPTION
+
+/*
+ * Global functions
+ */
+
+/*
+ * Adjust the machine check IVOR on 440A cores
+ */
+_GLOBAL(__fixup_440A_mcheck)
+ li r3,MachineCheckA@l
+ mtspr SPRN_IVOR1,r3
+ sync
+ blr
+
+/*
+ * extern void giveup_altivec(struct task_struct *prev)
+ *
+ * The 44x core does not have an AltiVec unit.
+ */
+_GLOBAL(giveup_altivec)
+ blr
+
+/*
+ * extern void giveup_fpu(struct task_struct *prev)
+ *
+ * The 44x core does not have an FPU.
+ */
+#ifndef CONFIG_PPC_FPU
+_GLOBAL(giveup_fpu)
+ blr
+#endif
+
+_GLOBAL(set_context)
+
+#ifdef CONFIG_BDI_SWITCH
+ /* Context switch the PTE pointer for the Abatron BDI2000.
+ * The PGDIR is the second parameter.
+ */
+ lis r5, abatron_pteptrs@h
+ ori r5, r5, abatron_pteptrs@l
+ stw r4, 0x4(r5)
+#endif
+ mtspr SPRN_PID,r3
+ isync /* Force context change */
+ blr
+
+/*
+ * Init CPU state. This is called at boot time or for secondary CPUs
+ * to setup initial TLB entries, setup IVORs, etc...
+ *
+ */
+_GLOBAL(init_cpu_state)
+ mflr r22
+#ifdef CONFIG_PPC_47x
+ /* We use the PVR to differenciate 44x cores from 476 */
+ mfspr r3,SPRN_PVR
+ srwi r3,r3,16
+ cmplwi cr0,r3,PVR_476FPE@h
+ beq head_start_47x
+ cmplwi cr0,r3,PVR_476@h
+ beq head_start_47x
+ cmplwi cr0,r3,PVR_476_ISS@h
+ beq head_start_47x
+#endif /* CONFIG_PPC_47x */
+
+/*
+ * In case the firmware didn't do it, we apply some workarounds
+ * that are good for all 440 core variants here
+ */
+ mfspr r3,SPRN_CCR0
+ rlwinm r3,r3,0,0,27 /* disable icache prefetch */
+ isync
+ mtspr SPRN_CCR0,r3
+ isync
+ sync
+
+/*
+ * Set up the initial MMU state for 44x
+ *
+ * We are still executing code at the virtual address
+ * mappings set by the firmware for the base of RAM.
+ *
+ * We first invalidate all TLB entries but the one
+ * we are running from. We then load the KERNELBASE
+ * mappings so we can begin to use kernel addresses
+ * natively and so the interrupt vector locations are
+ * permanently pinned (necessary since Book E
+ * implementations always have translation enabled).
+ *
+ * TODO: Use the known TLB entry we are running from to
+ * determine which physical region we are located
+ * in. This can be used to determine where in RAM
+ * (on a shared CPU system) or PCI memory space
+ * (on a DRAMless system) we are located.
+ * For now, we assume a perfect world which means
+ * we are located at the base of DRAM (physical 0).
+ */
+
+/*
+ * Search TLB for entry that we are currently using.
+ * Invalidate all entries but the one we are using.
+ */
+ /* Load our current PID->MMUCR TID and MSR IS->MMUCR STS */
+ mfspr r3,SPRN_PID /* Get PID */
+ mfmsr r4 /* Get MSR */
+ andi. r4,r4,MSR_IS@l /* TS=1? */
+ beq wmmucr /* If not, leave STS=0 */
+ oris r3,r3,PPC44x_MMUCR_STS@h /* Set STS=1 */
+wmmucr: mtspr SPRN_MMUCR,r3 /* Put MMUCR */
+ sync
+
+ bl invstr /* Find our address */
+invstr: mflr r5 /* Make it accessible */
+ tlbsx r23,0,r5 /* Find entry we are in */
+ li r4,0 /* Start at TLB entry 0 */
+ li r3,0 /* Set PAGEID inval value */
+1: cmpw r23,r4 /* Is this our entry? */
+ beq skpinv /* If so, skip the inval */
+ tlbwe r3,r4,PPC44x_TLB_PAGEID /* If not, inval the entry */
+skpinv: addi r4,r4,1 /* Increment */
+ cmpwi r4,64 /* Are we done? */
+ bne 1b /* If not, repeat */
+ isync /* If so, context change */
+
+/*
+ * Configure and load pinned entry into TLB slot 63.
+ */
+#ifdef CONFIG_NONSTATIC_KERNEL
+ /*
+ * In case of a NONSTATIC_KERNEL we reuse the TLB XLAT
+ * entries of the initial mapping set by the boot loader.
+ * The XLAT entry is stored in r25
+ */
+
+ /* Read the XLAT entry for our current mapping */
+ tlbre r25,r23,PPC44x_TLB_XLAT
+
+ lis r3,KERNELBASE@h
+ ori r3,r3,KERNELBASE@l
+
+ /* Use our current RPN entry */
+ mr r4,r25
+#else
+
+ lis r3,PAGE_OFFSET@h
+ ori r3,r3,PAGE_OFFSET@l
+
+ /* Kernel is at the base of RAM */
+ li r4, 0 /* Load the kernel physical address */
+#endif
+
+ /* Load the kernel PID = 0 */
+ li r0,0
+ mtspr SPRN_PID,r0
+ sync
+
+ /* Initialize MMUCR */
+ li r5,0
+ mtspr SPRN_MMUCR,r5
+ sync
+
+ /* pageid fields */
+ clrrwi r3,r3,10 /* Mask off the effective page number */
+ ori r3,r3,PPC44x_TLB_VALID | PPC44x_TLB_256M
+
+ /* xlat fields */
+ clrrwi r4,r4,10 /* Mask off the real page number */
+ /* ERPN is 0 for first 4GB page */
+
+ /* attrib fields */
+ /* Added guarded bit to protect against speculative loads/stores */
+ li r5,0
+ ori r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G)
+
+ li r0,63 /* TLB slot 63 */
+
+ tlbwe r3,r0,PPC44x_TLB_PAGEID /* Load the pageid fields */
+ tlbwe r4,r0,PPC44x_TLB_XLAT /* Load the translation fields */
+ tlbwe r5,r0,PPC44x_TLB_ATTRIB /* Load the attrib/access fields */
+
+ /* Force context change */
+ mfmsr r0
+ mtspr SPRN_SRR1, r0
+ lis r0,3f@h
+ ori r0,r0,3f@l
+ mtspr SPRN_SRR0,r0
+ sync
+ rfi
+
+ /* If necessary, invalidate original entry we used */
+3: cmpwi r23,63
+ beq 4f
+ li r6,0
+ tlbwe r6,r23,PPC44x_TLB_PAGEID
+ isync
+
+4:
+#ifdef CONFIG_PPC_EARLY_DEBUG_44x
+ /* Add UART mapping for early debug. */
+
+ /* pageid fields */
+ lis r3,PPC44x_EARLY_DEBUG_VIRTADDR@h
+ ori r3,r3,PPC44x_TLB_VALID|PPC44x_TLB_TS|PPC44x_TLB_64K
+
+ /* xlat fields */
+ lis r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW@h
+ ori r4,r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSHIGH
+
+ /* attrib fields */
+ li r5,(PPC44x_TLB_SW|PPC44x_TLB_SR|PPC44x_TLB_I|PPC44x_TLB_G)
+ li r0,62 /* TLB slot 0 */
+
+ tlbwe r3,r0,PPC44x_TLB_PAGEID
+ tlbwe r4,r0,PPC44x_TLB_XLAT
+ tlbwe r5,r0,PPC44x_TLB_ATTRIB
+
+ /* Force context change */
+ isync
+#endif /* CONFIG_PPC_EARLY_DEBUG_44x */
+
+ /* Establish the interrupt vector offsets */
+ SET_IVOR(0, CriticalInput);
+ SET_IVOR(1, MachineCheck);
+ SET_IVOR(2, DataStorage);
+ SET_IVOR(3, InstructionStorage);
+ SET_IVOR(4, ExternalInput);
+ SET_IVOR(5, Alignment);
+ SET_IVOR(6, Program);
+ SET_IVOR(7, FloatingPointUnavailable);
+ SET_IVOR(8, SystemCall);
+ SET_IVOR(9, AuxillaryProcessorUnavailable);
+ SET_IVOR(10, Decrementer);
+ SET_IVOR(11, FixedIntervalTimer);
+ SET_IVOR(12, WatchdogTimer);
+ SET_IVOR(13, DataTLBError44x);
+ SET_IVOR(14, InstructionTLBError44x);
+ SET_IVOR(15, DebugCrit);
+
+ b head_start_common
+
+
+#ifdef CONFIG_PPC_47x
+
+#ifdef CONFIG_SMP
+
+/* Entry point for secondary 47x processors */
+_GLOBAL(start_secondary_47x)
+ mr r24,r3 /* CPU number */
+
+ bl init_cpu_state
+
+ /* Now we need to bolt the rest of kernel memory which
+ * is done in C code. We must be careful because our task
+ * struct or our stack can (and will probably) be out
+ * of reach of the initial 256M TLB entry, so we use a
+ * small temporary stack in .bss for that. This works
+ * because only one CPU at a time can be in this code
+ */
+ lis r1,temp_boot_stack@h
+ ori r1,r1,temp_boot_stack@l
+ addi r1,r1,1024-STACK_FRAME_OVERHEAD
+ li r0,0
+ stw r0,0(r1)
+ bl mmu_init_secondary
+
+ /* Now we can get our task struct and real stack pointer */
+
+ /* Get current_thread_info and current */
+ lis r1,secondary_ti@ha
+ lwz r1,secondary_ti@l(r1)
+ lwz r2,TI_TASK(r1)
+
+ /* Current stack pointer */
+ addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+ li r0,0
+ stw r0,0(r1)
+
+ /* Kernel stack for exception entry in SPRG3 */
+ addi r4,r2,THREAD /* init task's THREAD */
+ mtspr SPRN_SPRG3,r4
+
+ b start_secondary
+
+#endif /* CONFIG_SMP */
+
+/*
+ * Set up the initial MMU state for 44x
+ *
+ * We are still executing code at the virtual address
+ * mappings set by the firmware for the base of RAM.
+ */
+
+head_start_47x:
+ /* Load our current PID->MMUCR TID and MSR IS->MMUCR STS */
+ mfspr r3,SPRN_PID /* Get PID */
+ mfmsr r4 /* Get MSR */
+ andi. r4,r4,MSR_IS@l /* TS=1? */
+ beq 1f /* If not, leave STS=0 */
+ oris r3,r3,PPC47x_MMUCR_STS@h /* Set STS=1 */
+1: mtspr SPRN_MMUCR,r3 /* Put MMUCR */
+ sync
+
+ /* Find the entry we are running from */
+ bl 1f
+1: mflr r23
+ tlbsx r23,0,r23
+ tlbre r24,r23,0
+ tlbre r25,r23,1
+ tlbre r26,r23,2
+
+/*
+ * Cleanup time
+ */
+
+ /* Initialize MMUCR */
+ li r5,0
+ mtspr SPRN_MMUCR,r5
+ sync
+
+clear_all_utlb_entries:
+
+ #; Set initial values.
+
+ addis r3,0,0x8000
+ addi r4,0,0
+ addi r5,0,0
+ b clear_utlb_entry
+
+ #; Align the loop to speed things up.
+
+ .align 6
+
+clear_utlb_entry:
+
+ tlbwe r4,r3,0
+ tlbwe r5,r3,1
+ tlbwe r5,r3,2
+ addis r3,r3,0x2000
+ cmpwi r3,0
+ bne clear_utlb_entry
+ addis r3,0,0x8000
+ addis r4,r4,0x100
+ cmpwi r4,0
+ bne clear_utlb_entry
+
+ #; Restore original entry.
+
+ oris r23,r23,0x8000 /* specify the way */
+ tlbwe r24,r23,0
+ tlbwe r25,r23,1
+ tlbwe r26,r23,2
+
+/*
+ * Configure and load pinned entry into TLB for the kernel core
+ */
+
+ lis r3,PAGE_OFFSET@h
+ ori r3,r3,PAGE_OFFSET@l
+
+ /* Load the kernel PID = 0 */
+ li r0,0
+ mtspr SPRN_PID,r0
+ sync
+
+ /* Word 0 */
+ clrrwi r3,r3,12 /* Mask off the effective page number */
+ ori r3,r3,PPC47x_TLB0_VALID | PPC47x_TLB0_256M
+
+ /* Word 1 - use r25. RPN is the same as the original entry */
+
+ /* Word 2 */
+ li r5,0
+ ori r5,r5,PPC47x_TLB2_S_RWX
+#ifdef CONFIG_SMP
+ ori r5,r5,PPC47x_TLB2_M
+#endif
+
+ /* We write to way 0 and bolted 0 */
+ lis r0,0x8800
+ tlbwe r3,r0,0
+ tlbwe r25,r0,1
+ tlbwe r5,r0,2
+
+/*
+ * Configure SSPCR, ISPCR and USPCR for now to search everything, we can fix
+ * them up later
+ */
+ LOAD_REG_IMMEDIATE(r3, 0x9abcdef0)
+ mtspr SPRN_SSPCR,r3
+ mtspr SPRN_USPCR,r3
+ LOAD_REG_IMMEDIATE(r3, 0x12345670)
+ mtspr SPRN_ISPCR,r3
+
+ /* Force context change */
+ mfmsr r0
+ mtspr SPRN_SRR1, r0
+ lis r0,3f@h
+ ori r0,r0,3f@l
+ mtspr SPRN_SRR0,r0
+ sync
+ rfi
+
+ /* Invalidate original entry we used */
+3:
+ rlwinm r24,r24,0,21,19 /* clear the "valid" bit */
+ tlbwe r24,r23,0
+ addi r24,0,0
+ tlbwe r24,r23,1
+ tlbwe r24,r23,2
+ isync /* Clear out the shadow TLB entries */
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_44x
+ /* Add UART mapping for early debug. */
+
+ /* Word 0 */
+ lis r3,PPC44x_EARLY_DEBUG_VIRTADDR@h
+ ori r3,r3,PPC47x_TLB0_VALID | PPC47x_TLB0_TS | PPC47x_TLB0_1M
+
+ /* Word 1 */
+ lis r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW@h
+ ori r4,r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSHIGH
+
+ /* Word 2 */
+ li r5,(PPC47x_TLB2_S_RW | PPC47x_TLB2_IMG)
+
+ /* Bolted in way 0, bolt slot 5, we -hope- we don't hit the same
+ * congruence class as the kernel, we need to make sure of it at
+ * some point
+ */
+ lis r0,0x8d00
+ tlbwe r3,r0,0
+ tlbwe r4,r0,1
+ tlbwe r5,r0,2
+
+ /* Force context change */
+ isync
+#endif /* CONFIG_PPC_EARLY_DEBUG_44x */
+
+ /* Establish the interrupt vector offsets */
+ SET_IVOR(0, CriticalInput);
+ SET_IVOR(1, MachineCheckA);
+ SET_IVOR(2, DataStorage);
+ SET_IVOR(3, InstructionStorage);
+ SET_IVOR(4, ExternalInput);
+ SET_IVOR(5, Alignment);
+ SET_IVOR(6, Program);
+ SET_IVOR(7, FloatingPointUnavailable);
+ SET_IVOR(8, SystemCall);
+ SET_IVOR(9, AuxillaryProcessorUnavailable);
+ SET_IVOR(10, Decrementer);
+ SET_IVOR(11, FixedIntervalTimer);
+ SET_IVOR(12, WatchdogTimer);
+ SET_IVOR(13, DataTLBError47x);
+ SET_IVOR(14, InstructionTLBError47x);
+ SET_IVOR(15, DebugCrit);
+
+ /* We configure icbi to invalidate 128 bytes at a time since the
+ * current 32-bit kernel code isn't too happy with icache != dcache
+ * block size
+ */
+ mfspr r3,SPRN_CCR0
+ oris r3,r3,0x0020
+ mtspr SPRN_CCR0,r3
+ isync
+
+#endif /* CONFIG_PPC_47x */
+
+/*
+ * Here we are back to code that is common between 44x and 47x
+ *
+ * We proceed to further kernel initialization and return to the
+ * main kernel entry
+ */
+head_start_common:
+ /* Establish the interrupt vector base */
+ lis r4,interrupt_base@h /* IVPR only uses the high 16-bits */
+ mtspr SPRN_IVPR,r4
+
+ /*
+ * If the kernel was loaded at a non-zero 256 MB page, we need to
+ * mask off the most significant 4 bits to get the relative address
+ * from the start of physical memory
+ */
+ rlwinm r22,r22,0,4,31
+ addis r22,r22,PAGE_OFFSET@h
+ mtlr r22
+ isync
+ blr
+
+/*
+ * We put a few things here that have to be page-aligned. This stuff
+ * goes at the beginning of the data segment, which is page-aligned.
+ */
+ .data
+ .align PAGE_SHIFT
+ .globl sdata
+sdata:
+ .globl empty_zero_page
+empty_zero_page:
+ .space PAGE_SIZE
+
+/*
+ * To support >32-bit physical addresses, we use an 8KB pgdir.
+ */
+ .globl swapper_pg_dir
+swapper_pg_dir:
+ .space PGD_TABLE_SIZE
+
+/*
+ * Room for two PTE pointers, usually the kernel and current user pointers
+ * to their respective root page table.
+ */
+abatron_pteptrs:
+ .space 8
+
+#ifdef CONFIG_SMP
+ .align 12
+temp_boot_stack:
+ .space 1024
+#endif /* CONFIG_SMP */
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
new file mode 100644
index 00000000..58bddee8
--- /dev/null
+++ b/arch/powerpc/kernel/head_64.S
@@ -0,0 +1,789 @@
+/*
+ * PowerPC version
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
+ * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
+ * Adapted for Power Macintosh by Paul Mackerras.
+ * Low-level exception handlers and MMU support
+ * rewritten by Paul Mackerras.
+ * Copyright (C) 1996 Paul Mackerras.
+ *
+ * Adapted for 64bit PowerPC by Dave Engebretsen, Peter Bergner, and
+ * Mike Corrigan {engebret|bergner|mikejc}@us.ibm.com
+ *
+ * This file contains the entry point for the 64-bit kernel along
+ * with some early initialization code common to all 64-bit powerpc
+ * variants.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/threads.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/bug.h>
+#include <asm/cputable.h>
+#include <asm/setup.h>
+#include <asm/hvcall.h>
+#include <asm/thread_info.h>
+#include <asm/firmware.h>
+#include <asm/page_64.h>
+#include <asm/irqflags.h>
+#include <asm/kvm_book3s_asm.h>
+#include <asm/ptrace.h>
+#include <asm/hw_irq.h>
+
+/* The physical memory is laid out such that the secondary processor
+ * spin code sits at 0x0000...0x00ff. On server, the vectors follow
+ * using the layout described in exceptions-64s.S
+ */
+
+/*
+ * Entering into this code we make the following assumptions:
+ *
+ * For pSeries or server processors:
+ * 1. The MMU is off & open firmware is running in real mode.
+ * 2. The kernel is entered at __start
+ * -or- For OPAL entry:
+ * 1. The MMU is off, processor in HV mode, primary CPU enters at 0
+ * with device-tree in gpr3. We also get OPAL base in r8 and
+ * entry in r9 for debugging purposes
+ * 2. Secondary processors enter at 0x60 with PIR in gpr3
+ *
+ * For Book3E processors:
+ * 1. The MMU is on running in AS0 in a state defined in ePAPR
+ * 2. The kernel is entered at __start
+ */
+
+ .text
+ .globl _stext
+_stext:
+_GLOBAL(__start)
+ /* NOP this out unconditionally */
+BEGIN_FTR_SECTION
+ b .__start_initialization_multiplatform
+END_FTR_SECTION(0, 1)
+
+ /* Catch branch to 0 in real mode */
+ trap
+
+ /* Secondary processors spin on this value until it becomes nonzero.
+ * When it does it contains the real address of the descriptor
+ * of the function that the cpu should jump to to continue
+ * initialization.
+ */
+ .globl __secondary_hold_spinloop
+__secondary_hold_spinloop:
+ .llong 0x0
+
+ /* Secondary processors write this value with their cpu # */
+ /* after they enter the spin loop immediately below. */
+ .globl __secondary_hold_acknowledge
+__secondary_hold_acknowledge:
+ .llong 0x0
+
+#ifdef CONFIG_RELOCATABLE
+ /* This flag is set to 1 by a loader if the kernel should run
+ * at the loaded address instead of the linked address. This
+ * is used by kexec-tools to keep the the kdump kernel in the
+ * crash_kernel region. The loader is responsible for
+ * observing the alignment requirement.
+ */
+ /* Do not move this variable as kexec-tools knows about it. */
+ . = 0x5c
+ .globl __run_at_load
+__run_at_load:
+ .long 0x72756e30 /* "run0" -- relocate to 0 by default */
+#endif
+
+ . = 0x60
+/*
+ * The following code is used to hold secondary processors
+ * in a spin loop after they have entered the kernel, but
+ * before the bulk of the kernel has been relocated. This code
+ * is relocated to physical address 0x60 before prom_init is run.
+ * All of it must fit below the first exception vector at 0x100.
+ * Use .globl here not _GLOBAL because we want __secondary_hold
+ * to be the actual text address, not a descriptor.
+ */
+ .globl __secondary_hold
+__secondary_hold:
+#ifndef CONFIG_PPC_BOOK3E
+ mfmsr r24
+ ori r24,r24,MSR_RI
+ mtmsrd r24 /* RI on */
+#endif
+ /* Grab our physical cpu number */
+ mr r24,r3
+
+ /* Tell the master cpu we're here */
+ /* Relocation is off & we are located at an address less */
+ /* than 0x100, so only need to grab low order offset. */
+ std r24,__secondary_hold_acknowledge-_stext(0)
+ sync
+
+ /* All secondary cpus wait here until told to start. */
+100: ld r4,__secondary_hold_spinloop-_stext(0)
+ cmpdi 0,r4,0
+ beq 100b
+
+#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC)
+ ld r4,0(r4) /* deref function descriptor */
+ mtctr r4
+ mr r3,r24
+ li r4,0
+ /* Make sure that patched code is visible */
+ isync
+ bctr
+#else
+ BUG_OPCODE
+#endif
+
+/* This value is used to mark exception frames on the stack. */
+ .section ".toc","aw"
+exception_marker:
+ .tc ID_72656773_68657265[TC],0x7265677368657265
+ .text
+
+/*
+ * On server, we include the exception vectors code here as it
+ * relies on absolute addressing which is only possible within
+ * this compilation unit
+ */
+#ifdef CONFIG_PPC_BOOK3S
+#include "exceptions-64s.S"
+#endif
+
+_GLOBAL(generic_secondary_thread_init)
+ mr r24,r3
+
+ /* turn on 64-bit mode */
+ bl .enable_64b_mode
+
+ /* get a valid TOC pointer, wherever we're mapped at */
+ bl .relative_toc
+
+#ifdef CONFIG_PPC_BOOK3E
+ /* Book3E initialization */
+ mr r3,r24
+ bl .book3e_secondary_thread_init
+#endif
+ b generic_secondary_common_init
+
+/*
+ * On pSeries and most other platforms, secondary processors spin
+ * in the following code.
+ * At entry, r3 = this processor's number (physical cpu id)
+ *
+ * On Book3E, r4 = 1 to indicate that the initial TLB entry for
+ * this core already exists (setup via some other mechanism such
+ * as SCOM before entry).
+ */
+_GLOBAL(generic_secondary_smp_init)
+ mr r24,r3
+ mr r25,r4
+
+ /* turn on 64-bit mode */
+ bl .enable_64b_mode
+
+ /* get a valid TOC pointer, wherever we're mapped at */
+ bl .relative_toc
+
+#ifdef CONFIG_PPC_BOOK3E
+ /* Book3E initialization */
+ mr r3,r24
+ mr r4,r25
+ bl .book3e_secondary_core_init
+#endif
+
+generic_secondary_common_init:
+ /* Set up a paca value for this processor. Since we have the
+ * physical cpu id in r24, we need to search the pacas to find
+ * which logical id maps to our physical one.
+ */
+ LOAD_REG_ADDR(r13, paca) /* Load paca pointer */
+ ld r13,0(r13) /* Get base vaddr of paca array */
+#ifndef CONFIG_SMP
+ addi r13,r13,PACA_SIZE /* know r13 if used accidentally */
+ b .kexec_wait /* wait for next kernel if !SMP */
+#else
+ LOAD_REG_ADDR(r7, nr_cpu_ids) /* Load nr_cpu_ids address */
+ lwz r7,0(r7) /* also the max paca allocated */
+ li r5,0 /* logical cpu id */
+1: lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */
+ cmpw r6,r24 /* Compare to our id */
+ beq 2f
+ addi r13,r13,PACA_SIZE /* Loop to next PACA on miss */
+ addi r5,r5,1
+ cmpw r5,r7 /* Check if more pacas exist */
+ blt 1b
+
+ mr r3,r24 /* not found, copy phys to r3 */
+ b .kexec_wait /* next kernel might do better */
+
+2: SET_PACA(r13)
+#ifdef CONFIG_PPC_BOOK3E
+ addi r12,r13,PACA_EXTLB /* and TLB exc frame in another */
+ mtspr SPRN_SPRG_TLB_EXFRAME,r12
+#endif
+
+ /* From now on, r24 is expected to be logical cpuid */
+ mr r24,r5
+
+ /* See if we need to call a cpu state restore handler */
+ LOAD_REG_ADDR(r23, cur_cpu_spec)
+ ld r23,0(r23)
+ ld r23,CPU_SPEC_RESTORE(r23)
+ cmpdi 0,r23,0
+ beq 3f
+ ld r23,0(r23)
+ mtctr r23
+ bctrl
+
+3: LOAD_REG_ADDR(r3, spinning_secondaries) /* Decrement spinning_secondaries */
+ lwarx r4,0,r3
+ subi r4,r4,1
+ stwcx. r4,0,r3
+ bne 3b
+ isync
+
+4: HMT_LOW
+ lbz r23,PACAPROCSTART(r13) /* Test if this processor should */
+ /* start. */
+ cmpwi 0,r23,0
+ beq 4b /* Loop until told to go */
+
+ sync /* order paca.run and cur_cpu_spec */
+ isync /* In case code patching happened */
+
+ /* Create a temp kernel stack for use before relocation is on. */
+ ld r1,PACAEMERGSP(r13)
+ subi r1,r1,STACK_FRAME_OVERHEAD
+
+ b __secondary_start
+#endif /* SMP */
+
+/*
+ * Turn the MMU off.
+ * Assumes we're mapped EA == RA if the MMU is on.
+ */
+#ifdef CONFIG_PPC_BOOK3S
+_STATIC(__mmu_off)
+ mfmsr r3
+ andi. r0,r3,MSR_IR|MSR_DR
+ beqlr
+ mflr r4
+ andc r3,r3,r0
+ mtspr SPRN_SRR0,r4
+ mtspr SPRN_SRR1,r3
+ sync
+ rfid
+ b . /* prevent speculative execution */
+#endif
+
+
+/*
+ * Here is our main kernel entry point. We support currently 2 kind of entries
+ * depending on the value of r5.
+ *
+ * r5 != NULL -> OF entry, we go to prom_init, "legacy" parameter content
+ * in r3...r7
+ *
+ * r5 == NULL -> kexec style entry. r3 is a physical pointer to the
+ * DT block, r4 is a physical pointer to the kernel itself
+ *
+ */
+_GLOBAL(__start_initialization_multiplatform)
+ /* Make sure we are running in 64 bits mode */
+ bl .enable_64b_mode
+
+ /* Get TOC pointer (current runtime address) */
+ bl .relative_toc
+
+ /* find out where we are now */
+ bcl 20,31,$+4
+0: mflr r26 /* r26 = runtime addr here */
+ addis r26,r26,(_stext - 0b)@ha
+ addi r26,r26,(_stext - 0b)@l /* current runtime base addr */
+
+ /*
+ * Are we booted from a PROM Of-type client-interface ?
+ */
+ cmpldi cr0,r5,0
+ beq 1f
+ b .__boot_from_prom /* yes -> prom */
+1:
+ /* Save parameters */
+ mr r31,r3
+ mr r30,r4
+#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
+ /* Save OPAL entry */
+ mr r28,r8
+ mr r29,r9
+#endif
+
+#ifdef CONFIG_PPC_BOOK3E
+ bl .start_initialization_book3e
+ b .__after_prom_start
+#else
+ /* Setup some critical 970 SPRs before switching MMU off */
+ mfspr r0,SPRN_PVR
+ srwi r0,r0,16
+ cmpwi r0,0x39 /* 970 */
+ beq 1f
+ cmpwi r0,0x3c /* 970FX */
+ beq 1f
+ cmpwi r0,0x44 /* 970MP */
+ beq 1f
+ cmpwi r0,0x45 /* 970GX */
+ bne 2f
+1: bl .__cpu_preinit_ppc970
+2:
+
+ /* Switch off MMU if not already off */
+ bl .__mmu_off
+ b .__after_prom_start
+#endif /* CONFIG_PPC_BOOK3E */
+
+_INIT_STATIC(__boot_from_prom)
+#ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
+ /* Save parameters */
+ mr r31,r3
+ mr r30,r4
+ mr r29,r5
+ mr r28,r6
+ mr r27,r7
+
+ /*
+ * Align the stack to 16-byte boundary
+ * Depending on the size and layout of the ELF sections in the initial
+ * boot binary, the stack pointer may be unaligned on PowerMac
+ */
+ rldicr r1,r1,0,59
+
+#ifdef CONFIG_RELOCATABLE
+ /* Relocate code for where we are now */
+ mr r3,r26
+ bl .relocate
+#endif
+
+ /* Restore parameters */
+ mr r3,r31
+ mr r4,r30
+ mr r5,r29
+ mr r6,r28
+ mr r7,r27
+
+ /* Do all of the interaction with OF client interface */
+ mr r8,r26
+ bl .prom_init
+#endif /* #CONFIG_PPC_OF_BOOT_TRAMPOLINE */
+
+ /* We never return. We also hit that trap if trying to boot
+ * from OF while CONFIG_PPC_OF_BOOT_TRAMPOLINE isn't selected */
+ trap
+
+_STATIC(__after_prom_start)
+#ifdef CONFIG_RELOCATABLE
+ /* process relocations for the final address of the kernel */
+ lis r25,PAGE_OFFSET@highest /* compute virtual base of kernel */
+ sldi r25,r25,32
+ lwz r7,__run_at_load-_stext(r26)
+ cmplwi cr0,r7,1 /* flagged to stay where we are ? */
+ bne 1f
+ add r25,r25,r26
+1: mr r3,r25
+ bl .relocate
+#endif
+
+/*
+ * We need to run with _stext at physical address PHYSICAL_START.
+ * This will leave some code in the first 256B of
+ * real memory, which are reserved for software use.
+ *
+ * Note: This process overwrites the OF exception vectors.
+ */
+ li r3,0 /* target addr */
+#ifdef CONFIG_PPC_BOOK3E
+ tovirt(r3,r3) /* on booke, we already run at PAGE_OFFSET */
+#endif
+ mr. r4,r26 /* In some cases the loader may */
+ beq 9f /* have already put us at zero */
+ li r6,0x100 /* Start offset, the first 0x100 */
+ /* bytes were copied earlier. */
+#ifdef CONFIG_PPC_BOOK3E
+ tovirt(r6,r6) /* on booke, we already run at PAGE_OFFSET */
+#endif
+
+#ifdef CONFIG_CRASH_DUMP
+/*
+ * Check if the kernel has to be running as relocatable kernel based on the
+ * variable __run_at_load, if it is set the kernel is treated as relocatable
+ * kernel, otherwise it will be moved to PHYSICAL_START
+ */
+ lwz r7,__run_at_load-_stext(r26)
+ cmplwi cr0,r7,1
+ bne 3f
+
+ li r5,__end_interrupts - _stext /* just copy interrupts */
+ b 5f
+3:
+#endif
+ lis r5,(copy_to_here - _stext)@ha
+ addi r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */
+
+ bl .copy_and_flush /* copy the first n bytes */
+ /* this includes the code being */
+ /* executed here. */
+ addis r8,r3,(4f - _stext)@ha /* Jump to the copy of this code */
+ addi r8,r8,(4f - _stext)@l /* that we just made */
+ mtctr r8
+ bctr
+
+p_end: .llong _end - _stext
+
+4: /* Now copy the rest of the kernel up to _end */
+ addis r5,r26,(p_end - _stext)@ha
+ ld r5,(p_end - _stext)@l(r5) /* get _end */
+5: bl .copy_and_flush /* copy the rest */
+
+9: b .start_here_multiplatform
+
+/*
+ * Copy routine used to copy the kernel to start at physical address 0
+ * and flush and invalidate the caches as needed.
+ * r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset
+ * on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5.
+ *
+ * Note: this routine *only* clobbers r0, r6 and lr
+ */
+_GLOBAL(copy_and_flush)
+ addi r5,r5,-8
+ addi r6,r6,-8
+4: li r0,8 /* Use the smallest common */
+ /* denominator cache line */
+ /* size. This results in */
+ /* extra cache line flushes */
+ /* but operation is correct. */
+ /* Can't get cache line size */
+ /* from NACA as it is being */
+ /* moved too. */
+
+ mtctr r0 /* put # words/line in ctr */
+3: addi r6,r6,8 /* copy a cache line */
+ ldx r0,r6,r4
+ stdx r0,r6,r3
+ bdnz 3b
+ dcbst r6,r3 /* write it to memory */
+ sync
+ icbi r6,r3 /* flush the icache line */
+ cmpld 0,r6,r5
+ blt 4b
+ sync
+ addi r5,r5,8
+ addi r6,r6,8
+ blr
+
+.align 8
+copy_to_here:
+
+#ifdef CONFIG_SMP
+#ifdef CONFIG_PPC_PMAC
+/*
+ * On PowerMac, secondary processors starts from the reset vector, which
+ * is temporarily turned into a call to one of the functions below.
+ */
+ .section ".text";
+ .align 2 ;
+
+ .globl __secondary_start_pmac_0
+__secondary_start_pmac_0:
+ /* NB the entries for cpus 0, 1, 2 must each occupy 8 bytes. */
+ li r24,0
+ b 1f
+ li r24,1
+ b 1f
+ li r24,2
+ b 1f
+ li r24,3
+1:
+
+_GLOBAL(pmac_secondary_start)
+ /* turn on 64-bit mode */
+ bl .enable_64b_mode
+
+ li r0,0
+ mfspr r3,SPRN_HID4
+ rldimi r3,r0,40,23 /* clear bit 23 (rm_ci) */
+ sync
+ mtspr SPRN_HID4,r3
+ isync
+ sync
+ slbia
+
+ /* get TOC pointer (real address) */
+ bl .relative_toc
+
+ /* Copy some CPU settings from CPU 0 */
+ bl .__restore_cpu_ppc970
+
+ /* pSeries do that early though I don't think we really need it */
+ mfmsr r3
+ ori r3,r3,MSR_RI
+ mtmsrd r3 /* RI on */
+
+ /* Set up a paca value for this processor. */
+ LOAD_REG_ADDR(r4,paca) /* Load paca pointer */
+ ld r4,0(r4) /* Get base vaddr of paca array */
+ mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */
+ add r13,r13,r4 /* for this processor. */
+ SET_PACA(r13) /* Save vaddr of paca in an SPRG*/
+
+ /* Mark interrupts soft and hard disabled (they might be enabled
+ * in the PACA when doing hotplug)
+ */
+ li r0,0
+ stb r0,PACASOFTIRQEN(r13)
+ li r0,PACA_IRQ_HARD_DIS
+ stb r0,PACAIRQHAPPENED(r13)
+
+ /* Create a temp kernel stack for use before relocation is on. */
+ ld r1,PACAEMERGSP(r13)
+ subi r1,r1,STACK_FRAME_OVERHEAD
+
+ b __secondary_start
+
+#endif /* CONFIG_PPC_PMAC */
+
+/*
+ * This function is called after the master CPU has released the
+ * secondary processors. The execution environment is relocation off.
+ * The paca for this processor has the following fields initialized at
+ * this point:
+ * 1. Processor number
+ * 2. Segment table pointer (virtual address)
+ * On entry the following are set:
+ * r1 = stack pointer (real addr of temp stack)
+ * r24 = cpu# (in Linux terms)
+ * r13 = paca virtual address
+ * SPRG_PACA = paca virtual address
+ */
+ .section ".text";
+ .align 2 ;
+
+ .globl __secondary_start
+__secondary_start:
+ /* Set thread priority to MEDIUM */
+ HMT_MEDIUM
+
+ /* Initialize the kernel stack */
+ LOAD_REG_ADDR(r3, current_set)
+ sldi r28,r24,3 /* get current_set[cpu#] */
+ ldx r14,r3,r28
+ addi r14,r14,THREAD_SIZE-STACK_FRAME_OVERHEAD
+ std r14,PACAKSAVE(r13)
+
+ /* Do early setup for that CPU (stab, slb, hash table pointer) */
+ bl .early_setup_secondary
+
+ /*
+ * setup the new stack pointer, but *don't* use this until
+ * translation is on.
+ */
+ mr r1, r14
+
+ /* Clear backchain so we get nice backtraces */
+ li r7,0
+ mtlr r7
+
+ /* Mark interrupts soft and hard disabled (they might be enabled
+ * in the PACA when doing hotplug)
+ */
+ stb r7,PACASOFTIRQEN(r13)
+ li r0,PACA_IRQ_HARD_DIS
+ stb r0,PACAIRQHAPPENED(r13)
+
+ /* enable MMU and jump to start_secondary */
+ LOAD_REG_ADDR(r3, .start_secondary_prolog)
+ LOAD_REG_IMMEDIATE(r4, MSR_KERNEL)
+
+ mtspr SPRN_SRR0,r3
+ mtspr SPRN_SRR1,r4
+ RFI
+ b . /* prevent speculative execution */
+
+/*
+ * Running with relocation on at this point. All we want to do is
+ * zero the stack back-chain pointer and get the TOC virtual address
+ * before going into C code.
+ */
+_GLOBAL(start_secondary_prolog)
+ ld r2,PACATOC(r13)
+ li r3,0
+ std r3,0(r1) /* Zero the stack frame pointer */
+ bl .start_secondary
+ b .
+/*
+ * Reset stack pointer and call start_secondary
+ * to continue with online operation when woken up
+ * from cede in cpu offline.
+ */
+_GLOBAL(start_secondary_resume)
+ ld r1,PACAKSAVE(r13) /* Reload kernel stack pointer */
+ li r3,0
+ std r3,0(r1) /* Zero the stack frame pointer */
+ bl .start_secondary
+ b .
+#endif
+
+/*
+ * This subroutine clobbers r11 and r12
+ */
+_GLOBAL(enable_64b_mode)
+ mfmsr r11 /* grab the current MSR */
+#ifdef CONFIG_PPC_BOOK3E
+ oris r11,r11,0x8000 /* CM bit set, we'll set ICM later */
+ mtmsr r11
+#else /* CONFIG_PPC_BOOK3E */
+ li r12,(MSR_64BIT | MSR_ISF)@highest
+ sldi r12,r12,48
+ or r11,r11,r12
+ mtmsrd r11
+ isync
+#endif
+ blr
+
+/*
+ * This puts the TOC pointer into r2, offset by 0x8000 (as expected
+ * by the toolchain). It computes the correct value for wherever we
+ * are running at the moment, using position-independent code.
+ */
+_GLOBAL(relative_toc)
+ mflr r0
+ bcl 20,31,$+4
+0: mflr r11
+ ld r2,(p_toc - 0b)(r11)
+ add r2,r2,r11
+ mtlr r0
+ blr
+
+p_toc: .llong __toc_start + 0x8000 - 0b
+
+/*
+ * This is where the main kernel code starts.
+ */
+_INIT_STATIC(start_here_multiplatform)
+ /* set up the TOC (real address) */
+ bl .relative_toc
+
+ /* Clear out the BSS. It may have been done in prom_init,
+ * already but that's irrelevant since prom_init will soon
+ * be detached from the kernel completely. Besides, we need
+ * to clear it now for kexec-style entry.
+ */
+ LOAD_REG_ADDR(r11,__bss_stop)
+ LOAD_REG_ADDR(r8,__bss_start)
+ sub r11,r11,r8 /* bss size */
+ addi r11,r11,7 /* round up to an even double word */
+ srdi. r11,r11,3 /* shift right by 3 */
+ beq 4f
+ addi r8,r8,-8
+ li r0,0
+ mtctr r11 /* zero this many doublewords */
+3: stdu r0,8(r8)
+ bdnz 3b
+4:
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
+ /* Setup OPAL entry */
+ std r28,0(r11);
+ std r29,8(r11);
+#endif
+
+#ifndef CONFIG_PPC_BOOK3E
+ mfmsr r6
+ ori r6,r6,MSR_RI
+ mtmsrd r6 /* RI on */
+#endif
+
+#ifdef CONFIG_RELOCATABLE
+ /* Save the physical address we're running at in kernstart_addr */
+ LOAD_REG_ADDR(r4, kernstart_addr)
+ clrldi r0,r25,2
+ std r0,0(r4)
+#endif
+
+ /* The following gets the stack set up with the regs */
+ /* pointing to the real addr of the kernel stack. This is */
+ /* all done to support the C function call below which sets */
+ /* up the htab. This is done because we have relocated the */
+ /* kernel but are still running in real mode. */
+
+ LOAD_REG_ADDR(r3,init_thread_union)
+
+ /* set up a stack pointer */
+ addi r1,r3,THREAD_SIZE
+ li r0,0
+ stdu r0,-STACK_FRAME_OVERHEAD(r1)
+
+ /* Do very early kernel initializations, including initial hash table,
+ * stab and slb setup before we turn on relocation. */
+
+ /* Restore parameters passed from prom_init/kexec */
+ mr r3,r31
+ bl .early_setup /* also sets r13 and SPRG_PACA */
+
+ LOAD_REG_ADDR(r3, .start_here_common)
+ ld r4,PACAKMSR(r13)
+ mtspr SPRN_SRR0,r3
+ mtspr SPRN_SRR1,r4
+ RFI
+ b . /* prevent speculative execution */
+
+ /* This is where all platforms converge execution */
+_INIT_GLOBAL(start_here_common)
+ /* relocation is on at this point */
+ std r1,PACAKSAVE(r13)
+
+ /* Load the TOC (virtual address) */
+ ld r2,PACATOC(r13)
+
+ /* Do more system initializations in virtual mode */
+ bl .setup_system
+
+ /* Mark interrupts soft and hard disabled (they might be enabled
+ * in the PACA when doing hotplug)
+ */
+ li r0,0
+ stb r0,PACASOFTIRQEN(r13)
+ li r0,PACA_IRQ_HARD_DIS
+ stb r0,PACAIRQHAPPENED(r13)
+
+ /* Generic kernel entry */
+ bl .start_kernel
+
+ /* Not reached */
+ BUG_OPCODE
+
+/*
+ * We put a few things here that have to be page-aligned.
+ * This stuff goes at the beginning of the bss, which is page-aligned.
+ */
+ .section ".bss"
+
+ .align PAGE_SHIFT
+
+ .globl empty_zero_page
+empty_zero_page:
+ .space PAGE_SIZE
+
+ .globl swapper_pg_dir
+swapper_pg_dir:
+ .space PGD_TABLE_SIZE
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
new file mode 100644
index 00000000..b2a5860a
--- /dev/null
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -0,0 +1,979 @@
+/*
+ * PowerPC version
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
+ * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
+ * Low-level exception handlers and MMU support
+ * rewritten by Paul Mackerras.
+ * Copyright (C) 1996 Paul Mackerras.
+ * MPC8xx modifications by Dan Malek
+ * Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
+ *
+ * This file contains low-level support and setup for PowerPC 8xx
+ * embedded processors, including trap and interrupt dispatch.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/init.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/cache.h>
+#include <asm/pgtable.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
+
+/* Macro to make the code more readable. */
+#ifdef CONFIG_8xx_CPU6
+#define DO_8xx_CPU6(val, reg) \
+ li reg, val; \
+ stw reg, 12(r0); \
+ lwz reg, 12(r0);
+#else
+#define DO_8xx_CPU6(val, reg)
+#endif
+ __HEAD
+_ENTRY(_stext);
+_ENTRY(_start);
+
+/* MPC8xx
+ * This port was done on an MBX board with an 860. Right now I only
+ * support an ELF compressed (zImage) boot from EPPC-Bug because the
+ * code there loads up some registers before calling us:
+ * r3: ptr to board info data
+ * r4: initrd_start or if no initrd then 0
+ * r5: initrd_end - unused if r4 is 0
+ * r6: Start of command line string
+ * r7: End of command line string
+ *
+ * I decided to use conditional compilation instead of checking PVR and
+ * adding more processor specific branches around code I don't need.
+ * Since this is an embedded processor, I also appreciate any memory
+ * savings I can get.
+ *
+ * The MPC8xx does not have any BATs, but it supports large page sizes.
+ * We first initialize the MMU to support 8M byte pages, then load one
+ * entry into each of the instruction and data TLBs to map the first
+ * 8M 1:1. I also mapped an additional I/O space 1:1 so we can get to
+ * the "internal" processor registers before MMU_init is called.
+ *
+ * The TLB code currently contains a major hack. Since I use the condition
+ * code register, I have to save and restore it. I am out of registers, so
+ * I just store it in memory location 0 (the TLB handlers are not reentrant).
+ * To avoid making any decisions, I need to use the "segment" valid bit
+ * in the first level table, but that would require many changes to the
+ * Linux page directory/table functions that I don't want to do right now.
+ *
+ * -- Dan
+ */
+ .globl __start
+__start:
+ mr r31,r3 /* save device tree ptr */
+
+ /* We have to turn on the MMU right away so we get cache modes
+ * set correctly.
+ */
+ bl initial_mmu
+
+/* We now have the lower 8 Meg mapped into TLB entries, and the caches
+ * ready to work.
+ */
+
+turn_on_mmu:
+ mfmsr r0
+ ori r0,r0,MSR_DR|MSR_IR
+ mtspr SPRN_SRR1,r0
+ lis r0,start_here@h
+ ori r0,r0,start_here@l
+ mtspr SPRN_SRR0,r0
+ SYNC
+ rfi /* enables MMU */
+
+/*
+ * Exception entry code. This code runs with address translation
+ * turned off, i.e. using physical addresses.
+ * We assume sprg3 has the physical address of the current
+ * task's thread_struct.
+ */
+#define EXCEPTION_PROLOG \
+ mtspr SPRN_SPRG_SCRATCH0,r10; \
+ mtspr SPRN_SPRG_SCRATCH1,r11; \
+ mfcr r10; \
+ EXCEPTION_PROLOG_1; \
+ EXCEPTION_PROLOG_2
+
+#define EXCEPTION_PROLOG_1 \
+ mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \
+ andi. r11,r11,MSR_PR; \
+ tophys(r11,r1); /* use tophys(r1) if kernel */ \
+ beq 1f; \
+ mfspr r11,SPRN_SPRG_THREAD; \
+ lwz r11,THREAD_INFO-THREAD(r11); \
+ addi r11,r11,THREAD_SIZE; \
+ tophys(r11,r11); \
+1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */
+
+
+#define EXCEPTION_PROLOG_2 \
+ CLR_TOP32(r11); \
+ stw r10,_CCR(r11); /* save registers */ \
+ stw r12,GPR12(r11); \
+ stw r9,GPR9(r11); \
+ mfspr r10,SPRN_SPRG_SCRATCH0; \
+ stw r10,GPR10(r11); \
+ mfspr r12,SPRN_SPRG_SCRATCH1; \
+ stw r12,GPR11(r11); \
+ mflr r10; \
+ stw r10,_LINK(r11); \
+ mfspr r12,SPRN_SRR0; \
+ mfspr r9,SPRN_SRR1; \
+ stw r1,GPR1(r11); \
+ stw r1,0(r11); \
+ tovirt(r1,r11); /* set new kernel sp */ \
+ li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \
+ MTMSRD(r10); /* (except for mach check in rtas) */ \
+ stw r0,GPR0(r11); \
+ SAVE_4GPRS(3, r11); \
+ SAVE_2GPRS(7, r11)
+
+/*
+ * Note: code which follows this uses cr0.eq (set if from kernel),
+ * r11, r12 (SRR0), and r9 (SRR1).
+ *
+ * Note2: once we have set r1 we are in a position to take exceptions
+ * again, and we could thus set MSR:RI at that point.
+ */
+
+/*
+ * Exception vectors.
+ */
+#define EXCEPTION(n, label, hdlr, xfer) \
+ . = n; \
+label: \
+ EXCEPTION_PROLOG; \
+ addi r3,r1,STACK_FRAME_OVERHEAD; \
+ xfer(n, hdlr)
+
+#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret) \
+ li r10,trap; \
+ stw r10,_TRAP(r11); \
+ li r10,MSR_KERNEL; \
+ copyee(r10, r9); \
+ bl tfer; \
+i##n: \
+ .long hdlr; \
+ .long ret
+
+#define COPY_EE(d, s) rlwimi d,s,0,16,16
+#define NOCOPY(d, s)
+
+#define EXC_XFER_STD(n, hdlr) \
+ EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \
+ ret_from_except_full)
+
+#define EXC_XFER_LITE(n, hdlr) \
+ EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \
+ ret_from_except)
+
+#define EXC_XFER_EE(n, hdlr) \
+ EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE, transfer_to_handler_full, \
+ ret_from_except_full)
+
+#define EXC_XFER_EE_LITE(n, hdlr) \
+ EXC_XFER_TEMPLATE(n, hdlr, n+1, COPY_EE, transfer_to_handler, \
+ ret_from_except)
+
+/* System reset */
+ EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD)
+
+/* Machine check */
+ . = 0x200
+MachineCheck:
+ EXCEPTION_PROLOG
+ mfspr r4,SPRN_DAR
+ stw r4,_DAR(r11)
+ li r5,0x00f0
+ mtspr SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */
+ mfspr r5,SPRN_DSISR
+ stw r5,_DSISR(r11)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ EXC_XFER_STD(0x200, machine_check_exception)
+
+/* Data access exception.
+ * This is "never generated" by the MPC8xx. We jump to it for other
+ * translation errors.
+ */
+ . = 0x300
+DataAccess:
+ EXCEPTION_PROLOG
+ mfspr r10,SPRN_DSISR
+ stw r10,_DSISR(r11)
+ mr r5,r10
+ mfspr r4,SPRN_DAR
+ li r10,0x00f0
+ mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */
+ EXC_XFER_LITE(0x300, handle_page_fault)
+
+/* Instruction access exception.
+ * This is "never generated" by the MPC8xx. We jump to it for other
+ * translation errors.
+ */
+ . = 0x400
+InstructionAccess:
+ EXCEPTION_PROLOG
+ mr r4,r12
+ mr r5,r9
+ EXC_XFER_LITE(0x400, handle_page_fault)
+
+/* External interrupt */
+ EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
+
+/* Alignment exception */
+ . = 0x600
+Alignment:
+ EXCEPTION_PROLOG
+ mfspr r4,SPRN_DAR
+ stw r4,_DAR(r11)
+ li r5,0x00f0
+ mtspr SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */
+ mfspr r5,SPRN_DSISR
+ stw r5,_DSISR(r11)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ EXC_XFER_EE(0x600, alignment_exception)
+
+/* Program check exception */
+ EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
+
+/* No FPU on MPC8xx. This exception is not supposed to happen.
+*/
+ EXCEPTION(0x800, FPUnavailable, unknown_exception, EXC_XFER_STD)
+
+/* Decrementer */
+ EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
+
+ EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE)
+
+/* System call */
+ . = 0xc00
+SystemCall:
+ EXCEPTION_PROLOG
+ EXC_XFER_EE_LITE(0xc00, DoSyscall)
+
+/* Single step - not used on 601 */
+ EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
+ EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0xf00, Trap_0f, unknown_exception, EXC_XFER_EE)
+
+/* On the MPC8xx, this is a software emulation interrupt. It occurs
+ * for all unimplemented and illegal instructions.
+ */
+ EXCEPTION(0x1000, SoftEmu, SoftwareEmulation, EXC_XFER_STD)
+
+ . = 0x1100
+/*
+ * For the MPC8xx, this is a software tablewalk to load the instruction
+ * TLB. It is modelled after the example in the Motorola manual. The task
+ * switch loads the M_TWB register with the pointer to the first level table.
+ * If we discover there is no second level table (value is zero) or if there
+ * is an invalid pte, we load that into the TLB, which causes another fault
+ * into the TLB Error interrupt where we can handle such problems.
+ * We have to use the MD_xxx registers for the tablewalk because the
+ * equivalent MI_xxx registers only perform the attribute functions.
+ */
+InstructionTLBMiss:
+#ifdef CONFIG_8xx_CPU6
+ stw r3, 8(r0)
+#endif
+ DO_8xx_CPU6(0x3f80, r3)
+ mtspr SPRN_M_TW, r10 /* Save a couple of working registers */
+ mfcr r10
+#ifdef CONFIG_8xx_CPU6
+ stw r10, 0(r0)
+ stw r11, 4(r0)
+#else
+ mtspr SPRN_DAR, r10
+ mtspr SPRN_SPRG2, r11
+#endif
+ mfspr r10, SPRN_SRR0 /* Get effective address of fault */
+#ifdef CONFIG_8xx_CPU15
+ addi r11, r10, 0x1000
+ tlbie r11
+ addi r11, r10, -0x1000
+ tlbie r11
+#endif
+ DO_8xx_CPU6(0x3780, r3)
+ mtspr SPRN_MD_EPN, r10 /* Have to use MD_EPN for walk, MI_EPN can't */
+ mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */
+
+ /* If we are faulting a kernel address, we have to use the
+ * kernel page tables.
+ */
+#ifdef CONFIG_MODULES
+ /* Only modules will cause ITLB Misses as we always
+ * pin the first 8MB of kernel memory */
+ andi. r11, r10, 0x0800 /* Address >= 0x80000000 */
+ beq 3f
+ lis r11, swapper_pg_dir@h
+ ori r11, r11, swapper_pg_dir@l
+ rlwimi r10, r11, 0, 2, 19
+3:
+#endif
+ lwz r11, 0(r10) /* Get the level 1 entry */
+ rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */
+ beq 2f /* If zero, don't try to find a pte */
+
+ /* We have a pte table, so load the MI_TWC with the attributes
+ * for this "segment."
+ */
+ ori r11,r11,1 /* Set valid bit */
+ DO_8xx_CPU6(0x2b80, r3)
+ mtspr SPRN_MI_TWC, r11 /* Set segment attributes */
+ DO_8xx_CPU6(0x3b80, r3)
+ mtspr SPRN_MD_TWC, r11 /* Load pte table base address */
+ mfspr r11, SPRN_MD_TWC /* ....and get the pte address */
+ lwz r10, 0(r11) /* Get the pte */
+
+#ifdef CONFIG_SWAP
+ andi. r11, r10, _PAGE_ACCESSED | _PAGE_PRESENT
+ cmpwi cr0, r11, _PAGE_ACCESSED | _PAGE_PRESENT
+ bne- cr0, 2f
+#endif
+ /* The Linux PTE won't go exactly into the MMU TLB.
+ * Software indicator bits 21 and 28 must be clear.
+ * Software indicator bits 24, 25, 26, and 27 must be
+ * set. All other Linux PTE bits control the behavior
+ * of the MMU.
+ */
+ li r11, 0x00f0
+ rlwimi r10, r11, 0, 0x07f8 /* Set 24-27, clear 21-23,28 */
+ DO_8xx_CPU6(0x2d80, r3)
+ mtspr SPRN_MI_RPN, r10 /* Update TLB entry */
+
+ /* Restore registers */
+#ifndef CONFIG_8xx_CPU6
+ mfspr r10, SPRN_DAR
+ mtcr r10
+ mtspr SPRN_DAR, r11 /* Tag DAR */
+ mfspr r11, SPRN_SPRG2
+#else
+ lwz r11, 0(r0)
+ mtcr r11
+ lwz r11, 4(r0)
+ lwz r3, 8(r0)
+#endif
+ mfspr r10, SPRN_M_TW
+ rfi
+2:
+ mfspr r11, SPRN_SRR1
+ /* clear all error bits as TLB Miss
+ * sets a few unconditionally
+ */
+ rlwinm r11, r11, 0, 0xffff
+ mtspr SPRN_SRR1, r11
+
+ /* Restore registers */
+#ifndef CONFIG_8xx_CPU6
+ mfspr r10, SPRN_DAR
+ mtcr r10
+ li r11, 0x00f0
+ mtspr SPRN_DAR, r11 /* Tag DAR */
+ mfspr r11, SPRN_SPRG2
+#else
+ lwz r11, 0(r0)
+ mtcr r11
+ lwz r11, 4(r0)
+ lwz r3, 8(r0)
+#endif
+ mfspr r10, SPRN_M_TW
+ b InstructionAccess
+
+ . = 0x1200
+DataStoreTLBMiss:
+#ifdef CONFIG_8xx_CPU6
+ stw r3, 8(r0)
+#endif
+ DO_8xx_CPU6(0x3f80, r3)
+ mtspr SPRN_M_TW, r10 /* Save a couple of working registers */
+ mfcr r10
+#ifdef CONFIG_8xx_CPU6
+ stw r10, 0(r0)
+ stw r11, 4(r0)
+#else
+ mtspr SPRN_DAR, r10
+ mtspr SPRN_SPRG2, r11
+#endif
+ mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */
+
+ /* If we are faulting a kernel address, we have to use the
+ * kernel page tables.
+ */
+ andi. r11, r10, 0x0800
+ beq 3f
+ lis r11, swapper_pg_dir@h
+ ori r11, r11, swapper_pg_dir@l
+ rlwimi r10, r11, 0, 2, 19
+3:
+ lwz r11, 0(r10) /* Get the level 1 entry */
+ rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */
+ beq 2f /* If zero, don't try to find a pte */
+
+ /* We have a pte table, so load fetch the pte from the table.
+ */
+ ori r11, r11, 1 /* Set valid bit in physical L2 page */
+ DO_8xx_CPU6(0x3b80, r3)
+ mtspr SPRN_MD_TWC, r11 /* Load pte table base address */
+ mfspr r10, SPRN_MD_TWC /* ....and get the pte address */
+ lwz r10, 0(r10) /* Get the pte */
+
+ /* Insert the Guarded flag into the TWC from the Linux PTE.
+ * It is bit 27 of both the Linux PTE and the TWC (at least
+ * I got that right :-). It will be better when we can put
+ * this into the Linux pgd/pmd and load it in the operation
+ * above.
+ */
+ rlwimi r11, r10, 0, 27, 27
+ /* Insert the WriteThru flag into the TWC from the Linux PTE.
+ * It is bit 25 in the Linux PTE and bit 30 in the TWC
+ */
+ rlwimi r11, r10, 32-5, 30, 30
+ DO_8xx_CPU6(0x3b80, r3)
+ mtspr SPRN_MD_TWC, r11
+
+ /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set.
+ * We also need to know if the insn is a load/store, so:
+ * Clear _PAGE_PRESENT and load that which will
+ * trap into DTLB Error with store bit set accordinly.
+ */
+ /* PRESENT=0x1, ACCESSED=0x20
+ * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5));
+ * r10 = (r10 & ~PRESENT) | r11;
+ */
+#ifdef CONFIG_SWAP
+ rlwinm r11, r10, 32-5, _PAGE_PRESENT
+ and r11, r11, r10
+ rlwimi r10, r11, 0, _PAGE_PRESENT
+#endif
+ /* Honour kernel RO, User NA */
+ /* 0x200 == Extended encoding, bit 22 */
+ rlwimi r10, r10, 32-2, 0x200 /* Copy USER to bit 22, 0x200 */
+ /* r11 = (r10 & _PAGE_RW) >> 1 */
+ rlwinm r11, r10, 32-1, 0x200
+ or r10, r11, r10
+ /* invert RW and 0x200 bits */
+ xori r10, r10, _PAGE_RW | 0x200
+
+ /* The Linux PTE won't go exactly into the MMU TLB.
+ * Software indicator bits 22 and 28 must be clear.
+ * Software indicator bits 24, 25, 26, and 27 must be
+ * set. All other Linux PTE bits control the behavior
+ * of the MMU.
+ */
+2: li r11, 0x00f0
+ rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */
+ DO_8xx_CPU6(0x3d80, r3)
+ mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
+
+ /* Restore registers */
+#ifndef CONFIG_8xx_CPU6
+ mfspr r10, SPRN_DAR
+ mtcr r10
+ mtspr SPRN_DAR, r11 /* Tag DAR */
+ mfspr r11, SPRN_SPRG2
+#else
+ mtspr SPRN_DAR, r11 /* Tag DAR */
+ lwz r11, 0(r0)
+ mtcr r11
+ lwz r11, 4(r0)
+ lwz r3, 8(r0)
+#endif
+ mfspr r10, SPRN_M_TW
+ rfi
+
+/* This is an instruction TLB error on the MPC8xx. This could be due
+ * to many reasons, such as executing guarded memory or illegal instruction
+ * addresses. There is nothing to do but handle a big time error fault.
+ */
+ . = 0x1300
+InstructionTLBError:
+ b InstructionAccess
+
+/* This is the data TLB error on the MPC8xx. This could be due to
+ * many reasons, including a dirty update to a pte. We can catch that
+ * one here, but anything else is an error. First, we track down the
+ * Linux pte. If it is valid, write access is allowed, but the
+ * page dirty bit is not set, we will set it and reload the TLB. For
+ * any other case, we bail out to a higher level function that can
+ * handle it.
+ */
+ . = 0x1400
+DataTLBError:
+#ifdef CONFIG_8xx_CPU6
+ stw r3, 8(r0)
+#endif
+ DO_8xx_CPU6(0x3f80, r3)
+ mtspr SPRN_M_TW, r10 /* Save a couple of working registers */
+ mfcr r10
+ stw r10, 0(r0)
+ stw r11, 4(r0)
+
+ mfspr r10, SPRN_DAR
+ cmpwi cr0, r10, 0x00f0
+ beq- FixupDAR /* must be a buggy dcbX, icbi insn. */
+DARFixed:/* Return from dcbx instruction bug workaround, r10 holds value of DAR */
+ mfspr r10, SPRN_M_TW /* Restore registers */
+ lwz r11, 0(r0)
+ mtcr r11
+ lwz r11, 4(r0)
+#ifdef CONFIG_8xx_CPU6
+ lwz r3, 8(r0)
+#endif
+ b DataAccess
+
+ EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE)
+
+/* On the MPC8xx, these next four traps are used for development
+ * support of breakpoints and such. Someday I will get around to
+ * using them.
+ */
+ EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE)
+
+ . = 0x2000
+
+/* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions
+ * by decoding the registers used by the dcbx instruction and adding them.
+ * DAR is set to the calculated address and r10 also holds the EA on exit.
+ */
+ /* define if you don't want to use self modifying code */
+#define NO_SELF_MODIFYING_CODE
+FixupDAR:/* Entry point for dcbx workaround. */
+ /* fetch instruction from memory. */
+ mfspr r10, SPRN_SRR0
+ andis. r11, r10, 0x8000 /* Address >= 0x80000000 */
+ DO_8xx_CPU6(0x3780, r3)
+ mtspr SPRN_MD_EPN, r10
+ mfspr r11, SPRN_M_TWB /* Get level 1 table entry address */
+ beq- 3f /* Branch if user space */
+ lis r11, (swapper_pg_dir-PAGE_OFFSET)@h
+ ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l
+ rlwimi r11, r10, 32-20, 0xffc /* r11 = r11&~0xffc|(r10>>20)&0xffc */
+3: lwz r11, 0(r11) /* Get the level 1 entry */
+ DO_8xx_CPU6(0x3b80, r3)
+ mtspr SPRN_MD_TWC, r11 /* Load pte table base address */
+ mfspr r11, SPRN_MD_TWC /* ....and get the pte address */
+ lwz r11, 0(r11) /* Get the pte */
+ /* concat physical page address(r11) and page offset(r10) */
+ rlwimi r11, r10, 0, 20, 31
+ lwz r11,0(r11)
+/* Check if it really is a dcbx instruction. */
+/* dcbt and dcbtst does not generate DTLB Misses/Errors,
+ * no need to include them here */
+ srwi r10, r11, 26 /* check if major OP code is 31 */
+ cmpwi cr0, r10, 31
+ bne- 141f
+ rlwinm r10, r11, 0, 21, 30
+ cmpwi cr0, r10, 2028 /* Is dcbz? */
+ beq+ 142f
+ cmpwi cr0, r10, 940 /* Is dcbi? */
+ beq+ 142f
+ cmpwi cr0, r10, 108 /* Is dcbst? */
+ beq+ 144f /* Fix up store bit! */
+ cmpwi cr0, r10, 172 /* Is dcbf? */
+ beq+ 142f
+ cmpwi cr0, r10, 1964 /* Is icbi? */
+ beq+ 142f
+141: mfspr r10, SPRN_DAR /* r10 must hold DAR at exit */
+ b DARFixed /* Nope, go back to normal TLB processing */
+
+144: mfspr r10, SPRN_DSISR
+ rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */
+ mtspr SPRN_DSISR, r10
+142: /* continue, it was a dcbx, dcbi instruction. */
+#ifdef CONFIG_8xx_CPU6
+ lwz r3, 8(r0) /* restore r3 from memory */
+#endif
+#ifndef NO_SELF_MODIFYING_CODE
+ andis. r10,r11,0x1f /* test if reg RA is r0 */
+ li r10,modified_instr@l
+ dcbtst r0,r10 /* touch for store */
+ rlwinm r11,r11,0,0,20 /* Zero lower 10 bits */
+ oris r11,r11,640 /* Transform instr. to a "add r10,RA,RB" */
+ ori r11,r11,532
+ stw r11,0(r10) /* store add/and instruction */
+ dcbf 0,r10 /* flush new instr. to memory. */
+ icbi 0,r10 /* invalidate instr. cache line */
+ lwz r11, 4(r0) /* restore r11 from memory */
+ mfspr r10, SPRN_M_TW /* restore r10 from M_TW */
+ isync /* Wait until new instr is loaded from memory */
+modified_instr:
+ .space 4 /* this is where the add instr. is stored */
+ bne+ 143f
+ subf r10,r0,r10 /* r10=r10-r0, only if reg RA is r0 */
+143: mtdar r10 /* store faulting EA in DAR */
+ b DARFixed /* Go back to normal TLB handling */
+#else
+ mfctr r10
+ mtdar r10 /* save ctr reg in DAR */
+ rlwinm r10, r11, 24, 24, 28 /* offset into jump table for reg RB */
+ addi r10, r10, 150f@l /* add start of table */
+ mtctr r10 /* load ctr with jump address */
+ xor r10, r10, r10 /* sum starts at zero */
+ bctr /* jump into table */
+150:
+ add r10, r10, r0 ;b 151f
+ add r10, r10, r1 ;b 151f
+ add r10, r10, r2 ;b 151f
+ add r10, r10, r3 ;b 151f
+ add r10, r10, r4 ;b 151f
+ add r10, r10, r5 ;b 151f
+ add r10, r10, r6 ;b 151f
+ add r10, r10, r7 ;b 151f
+ add r10, r10, r8 ;b 151f
+ add r10, r10, r9 ;b 151f
+ mtctr r11 ;b 154f /* r10 needs special handling */
+ mtctr r11 ;b 153f /* r11 needs special handling */
+ add r10, r10, r12 ;b 151f
+ add r10, r10, r13 ;b 151f
+ add r10, r10, r14 ;b 151f
+ add r10, r10, r15 ;b 151f
+ add r10, r10, r16 ;b 151f
+ add r10, r10, r17 ;b 151f
+ add r10, r10, r18 ;b 151f
+ add r10, r10, r19 ;b 151f
+ add r10, r10, r20 ;b 151f
+ add r10, r10, r21 ;b 151f
+ add r10, r10, r22 ;b 151f
+ add r10, r10, r23 ;b 151f
+ add r10, r10, r24 ;b 151f
+ add r10, r10, r25 ;b 151f
+ add r10, r10, r26 ;b 151f
+ add r10, r10, r27 ;b 151f
+ add r10, r10, r28 ;b 151f
+ add r10, r10, r29 ;b 151f
+ add r10, r10, r30 ;b 151f
+ add r10, r10, r31
+151:
+ rlwinm. r11,r11,19,24,28 /* offset into jump table for reg RA */
+ beq 152f /* if reg RA is zero, don't add it */
+ addi r11, r11, 150b@l /* add start of table */
+ mtctr r11 /* load ctr with jump address */
+ rlwinm r11,r11,0,16,10 /* make sure we don't execute this more than once */
+ bctr /* jump into table */
+152:
+ mfdar r11
+ mtctr r11 /* restore ctr reg from DAR */
+ mtdar r10 /* save fault EA to DAR */
+ b DARFixed /* Go back to normal TLB handling */
+
+ /* special handling for r10,r11 since these are modified already */
+153: lwz r11, 4(r0) /* load r11 from memory */
+ b 155f
+154: mfspr r11, SPRN_M_TW /* load r10 from M_TW */
+155: add r10, r10, r11 /* add it */
+ mfctr r11 /* restore r11 */
+ b 151b
+#endif
+
+ .globl giveup_fpu
+giveup_fpu:
+ blr
+
+/*
+ * This is where the main kernel code starts.
+ */
+start_here:
+ /* ptr to current */
+ lis r2,init_task@h
+ ori r2,r2,init_task@l
+
+ /* ptr to phys current thread */
+ tophys(r4,r2)
+ addi r4,r4,THREAD /* init task's THREAD */
+ mtspr SPRN_SPRG_THREAD,r4
+
+ /* stack */
+ lis r1,init_thread_union@ha
+ addi r1,r1,init_thread_union@l
+ li r0,0
+ stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+
+ bl early_init /* We have to do this with MMU on */
+
+/*
+ * Decide what sort of machine this is and initialize the MMU.
+ */
+ li r3,0
+ mr r4,r31
+ bl machine_init
+ bl MMU_init
+
+/*
+ * Go back to running unmapped so we can load up new values
+ * and change to using our exception vectors.
+ * On the 8xx, all we have to do is invalidate the TLB to clear
+ * the old 8M byte TLB mappings and load the page table base register.
+ */
+ /* The right way to do this would be to track it down through
+ * init's THREAD like the context switch code does, but this is
+ * easier......until someone changes init's static structures.
+ */
+ lis r6, swapper_pg_dir@h
+ ori r6, r6, swapper_pg_dir@l
+ tophys(r6,r6)
+#ifdef CONFIG_8xx_CPU6
+ lis r4, cpu6_errata_word@h
+ ori r4, r4, cpu6_errata_word@l
+ li r3, 0x3980
+ stw r3, 12(r4)
+ lwz r3, 12(r4)
+#endif
+ mtspr SPRN_M_TWB, r6
+ lis r4,2f@h
+ ori r4,r4,2f@l
+ tophys(r4,r4)
+ li r3,MSR_KERNEL & ~(MSR_IR|MSR_DR)
+ mtspr SPRN_SRR0,r4
+ mtspr SPRN_SRR1,r3
+ rfi
+/* Load up the kernel context */
+2:
+ SYNC /* Force all PTE updates to finish */
+ tlbia /* Clear all TLB entries */
+ sync /* wait for tlbia/tlbie to finish */
+ TLBSYNC /* ... on all CPUs */
+
+ /* set up the PTE pointers for the Abatron bdiGDB.
+ */
+ tovirt(r6,r6)
+ lis r5, abatron_pteptrs@h
+ ori r5, r5, abatron_pteptrs@l
+ stw r5, 0xf0(r0) /* Must match your Abatron config file */
+ tophys(r5,r5)
+ stw r6, 0(r5)
+
+/* Now turn on the MMU for real! */
+ li r4,MSR_KERNEL
+ lis r3,start_kernel@h
+ ori r3,r3,start_kernel@l
+ mtspr SPRN_SRR0,r3
+ mtspr SPRN_SRR1,r4
+ rfi /* enable MMU and jump to start_kernel */
+
+/* Set up the initial MMU state so we can do the first level of
+ * kernel initialization. This maps the first 8 MBytes of memory 1:1
+ * virtual to physical. Also, set the cache mode since that is defined
+ * by TLB entries and perform any additional mapping (like of the IMMR).
+ * If configured to pin some TLBs, we pin the first 8 Mbytes of kernel,
+ * 24 Mbytes of data, and the 8M IMMR space. Anything not covered by
+ * these mappings is mapped by page tables.
+ */
+initial_mmu:
+ tlbia /* Invalidate all TLB entries */
+/* Always pin the first 8 MB ITLB to prevent ITLB
+ misses while mucking around with SRR0/SRR1 in asm
+*/
+ lis r8, MI_RSV4I@h
+ ori r8, r8, 0x1c00
+
+ mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */
+
+#ifdef CONFIG_PIN_TLB
+ lis r10, (MD_RSV4I | MD_RESETVAL)@h
+ ori r10, r10, 0x1c00
+ mr r8, r10
+#else
+ lis r10, MD_RESETVAL@h
+#endif
+#ifndef CONFIG_8xx_COPYBACK
+ oris r10, r10, MD_WTDEF@h
+#endif
+ mtspr SPRN_MD_CTR, r10 /* Set data TLB control */
+
+ /* Now map the lower 8 Meg into the TLBs. For this quick hack,
+ * we can load the instruction and data TLB registers with the
+ * same values.
+ */
+ lis r8, KERNELBASE@h /* Create vaddr for TLB */
+ ori r8, r8, MI_EVALID /* Mark it valid */
+ mtspr SPRN_MI_EPN, r8
+ mtspr SPRN_MD_EPN, r8
+ li r8, MI_PS8MEG /* Set 8M byte page */
+ ori r8, r8, MI_SVALID /* Make it valid */
+ mtspr SPRN_MI_TWC, r8
+ mtspr SPRN_MD_TWC, r8
+ li r8, MI_BOOTINIT /* Create RPN for address 0 */
+ mtspr SPRN_MI_RPN, r8 /* Store TLB entry */
+ mtspr SPRN_MD_RPN, r8
+ lis r8, MI_Kp@h /* Set the protection mode */
+ mtspr SPRN_MI_AP, r8
+ mtspr SPRN_MD_AP, r8
+
+ /* Map another 8 MByte at the IMMR to get the processor
+ * internal registers (among other things).
+ */
+#ifdef CONFIG_PIN_TLB
+ addi r10, r10, 0x0100
+ mtspr SPRN_MD_CTR, r10
+#endif
+ mfspr r9, 638 /* Get current IMMR */
+ andis. r9, r9, 0xff80 /* Get 8Mbyte boundary */
+
+ mr r8, r9 /* Create vaddr for TLB */
+ ori r8, r8, MD_EVALID /* Mark it valid */
+ mtspr SPRN_MD_EPN, r8
+ li r8, MD_PS8MEG /* Set 8M byte page */
+ ori r8, r8, MD_SVALID /* Make it valid */
+ mtspr SPRN_MD_TWC, r8
+ mr r8, r9 /* Create paddr for TLB */
+ ori r8, r8, MI_BOOTINIT|0x2 /* Inhibit cache -- Cort */
+ mtspr SPRN_MD_RPN, r8
+
+#ifdef CONFIG_PIN_TLB
+ /* Map two more 8M kernel data pages.
+ */
+ addi r10, r10, 0x0100
+ mtspr SPRN_MD_CTR, r10
+
+ lis r8, KERNELBASE@h /* Create vaddr for TLB */
+ addis r8, r8, 0x0080 /* Add 8M */
+ ori r8, r8, MI_EVALID /* Mark it valid */
+ mtspr SPRN_MD_EPN, r8
+ li r9, MI_PS8MEG /* Set 8M byte page */
+ ori r9, r9, MI_SVALID /* Make it valid */
+ mtspr SPRN_MD_TWC, r9
+ li r11, MI_BOOTINIT /* Create RPN for address 0 */
+ addis r11, r11, 0x0080 /* Add 8M */
+ mtspr SPRN_MD_RPN, r11
+
+ addis r8, r8, 0x0080 /* Add 8M */
+ mtspr SPRN_MD_EPN, r8
+ mtspr SPRN_MD_TWC, r9
+ addis r11, r11, 0x0080 /* Add 8M */
+ mtspr SPRN_MD_RPN, r11
+#endif
+
+ /* Since the cache is enabled according to the information we
+ * just loaded into the TLB, invalidate and enable the caches here.
+ * We should probably check/set other modes....later.
+ */
+ lis r8, IDC_INVALL@h
+ mtspr SPRN_IC_CST, r8
+ mtspr SPRN_DC_CST, r8
+ lis r8, IDC_ENABLE@h
+ mtspr SPRN_IC_CST, r8
+#ifdef CONFIG_8xx_COPYBACK
+ mtspr SPRN_DC_CST, r8
+#else
+ /* For a debug option, I left this here to easily enable
+ * the write through cache mode
+ */
+ lis r8, DC_SFWT@h
+ mtspr SPRN_DC_CST, r8
+ lis r8, IDC_ENABLE@h
+ mtspr SPRN_DC_CST, r8
+#endif
+ blr
+
+
+/*
+ * Set up to use a given MMU context.
+ * r3 is context number, r4 is PGD pointer.
+ *
+ * We place the physical address of the new task page directory loaded
+ * into the MMU base register, and set the ASID compare register with
+ * the new "context."
+ */
+_GLOBAL(set_context)
+
+#ifdef CONFIG_BDI_SWITCH
+ /* Context switch the PTE pointer for the Abatron BDI2000.
+ * The PGDIR is passed as second argument.
+ */
+ lis r5, KERNELBASE@h
+ lwz r5, 0xf0(r5)
+ stw r4, 0x4(r5)
+#endif
+
+#ifdef CONFIG_8xx_CPU6
+ lis r6, cpu6_errata_word@h
+ ori r6, r6, cpu6_errata_word@l
+ tophys (r4, r4)
+ li r7, 0x3980
+ stw r7, 12(r6)
+ lwz r7, 12(r6)
+ mtspr SPRN_M_TWB, r4 /* Update MMU base address */
+ li r7, 0x3380
+ stw r7, 12(r6)
+ lwz r7, 12(r6)
+ mtspr SPRN_M_CASID, r3 /* Update context */
+#else
+ mtspr SPRN_M_CASID,r3 /* Update context */
+ tophys (r4, r4)
+ mtspr SPRN_M_TWB, r4 /* and pgd */
+#endif
+ SYNC
+ blr
+
+#ifdef CONFIG_8xx_CPU6
+/* It's here because it is unique to the 8xx.
+ * It is important we get called with interrupts disabled. I used to
+ * do that, but it appears that all code that calls this already had
+ * interrupt disabled.
+ */
+ .globl set_dec_cpu6
+set_dec_cpu6:
+ lis r7, cpu6_errata_word@h
+ ori r7, r7, cpu6_errata_word@l
+ li r4, 0x2c00
+ stw r4, 8(r7)
+ lwz r4, 8(r7)
+ mtspr 22, r3 /* Update Decrementer */
+ SYNC
+ blr
+#endif
+
+/*
+ * We put a few things here that have to be page-aligned.
+ * This stuff goes at the beginning of the data segment,
+ * which is page-aligned.
+ */
+ .data
+ .globl sdata
+sdata:
+ .globl empty_zero_page
+empty_zero_page:
+ .space 4096
+
+ .globl swapper_pg_dir
+swapper_pg_dir:
+ .space 4096
+
+/* Room for two PTE table poiners, usually the kernel and current user
+ * pointer to their respective root page table (pgdir).
+ */
+abatron_pteptrs:
+ .space 8
+
+#ifdef CONFIG_8xx_CPU6
+ .globl cpu6_errata_word
+cpu6_errata_word:
+ .space 16
+#endif
+
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
new file mode 100644
index 00000000..0e417538
--- /dev/null
+++ b/arch/powerpc/kernel/head_booke.h
@@ -0,0 +1,427 @@
+#ifndef __HEAD_BOOKE_H__
+#define __HEAD_BOOKE_H__
+
+#include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */
+/*
+ * Macros used for common Book-e exception handling
+ */
+
+#define SET_IVOR(vector_number, vector_label) \
+ li r26,vector_label@l; \
+ mtspr SPRN_IVOR##vector_number,r26; \
+ sync
+
+#if (THREAD_SHIFT < 15)
+#define ALLOC_STACK_FRAME(reg, val) \
+ addi reg,reg,val
+#else
+#define ALLOC_STACK_FRAME(reg, val) \
+ addis reg,reg,val@ha; \
+ addi reg,reg,val@l
+#endif
+
+/*
+ * Macro used to get to thread save registers.
+ * Note that entries 0-3 are used for the prolog code, and the remaining
+ * entries are available for specific exception use in the event a handler
+ * requires more than 4 scratch registers.
+ */
+#define THREAD_NORMSAVE(offset) (THREAD_NORMSAVES + (offset * 4))
+
+#define NORMAL_EXCEPTION_PROLOG \
+ mtspr SPRN_SPRG_WSCRATCH0, r10; /* save one register */ \
+ mfspr r10, SPRN_SPRG_THREAD; \
+ stw r11, THREAD_NORMSAVE(0)(r10); \
+ stw r13, THREAD_NORMSAVE(2)(r10); \
+ mfcr r13; /* save CR in r13 for now */\
+ mfspr r11,SPRN_SRR1; /* check whether user or kernel */\
+ andi. r11,r11,MSR_PR; \
+ mr r11, r1; \
+ beq 1f; \
+ /* if from user, start at top of this thread's kernel stack */ \
+ lwz r11, THREAD_INFO-THREAD(r10); \
+ ALLOC_STACK_FRAME(r11, THREAD_SIZE); \
+1 : subi r11, r11, INT_FRAME_SIZE; /* Allocate exception frame */ \
+ stw r13, _CCR(r11); /* save various registers */ \
+ stw r12,GPR12(r11); \
+ stw r9,GPR9(r11); \
+ mfspr r13, SPRN_SPRG_RSCRATCH0; \
+ stw r13, GPR10(r11); \
+ lwz r12, THREAD_NORMSAVE(0)(r10); \
+ stw r12,GPR11(r11); \
+ lwz r13, THREAD_NORMSAVE(2)(r10); /* restore r13 */ \
+ mflr r10; \
+ stw r10,_LINK(r11); \
+ mfspr r12,SPRN_SRR0; \
+ stw r1, GPR1(r11); \
+ mfspr r9,SPRN_SRR1; \
+ stw r1, 0(r11); \
+ mr r1, r11; \
+ rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\
+ stw r0,GPR0(r11); \
+ lis r10, STACK_FRAME_REGS_MARKER@ha;/* exception frame marker */ \
+ addi r10, r10, STACK_FRAME_REGS_MARKER@l; \
+ stw r10, 8(r11); \
+ SAVE_4GPRS(3, r11); \
+ SAVE_2GPRS(7, r11)
+
+/* To handle the additional exception priority levels on 40x and Book-E
+ * processors we allocate a stack per additional priority level.
+ *
+ * On 40x critical is the only additional level
+ * On 44x/e500 we have critical and machine check
+ * On e200 we have critical and debug (machine check occurs via critical)
+ *
+ * Additionally we reserve a SPRG for each priority level so we can free up a
+ * GPR to use as the base for indirect access to the exception stacks. This
+ * is necessary since the MMU is always on, for Book-E parts, and the stacks
+ * are offset from KERNELBASE.
+ *
+ * There is some space optimization to be had here if desired. However
+ * to allow for a common kernel with support for debug exceptions either
+ * going to critical or their own debug level we aren't currently
+ * providing configurations that micro-optimize space usage.
+ */
+
+#define MC_STACK_BASE mcheckirq_ctx
+#define CRIT_STACK_BASE critirq_ctx
+
+/* only on e500mc/e200 */
+#define DBG_STACK_BASE dbgirq_ctx
+
+#define EXC_LVL_FRAME_OVERHEAD (THREAD_SIZE - INT_FRAME_SIZE - EXC_LVL_SIZE)
+
+#ifdef CONFIG_SMP
+#define BOOKE_LOAD_EXC_LEVEL_STACK(level) \
+ mfspr r8,SPRN_PIR; \
+ slwi r8,r8,2; \
+ addis r8,r8,level##_STACK_BASE@ha; \
+ lwz r8,level##_STACK_BASE@l(r8); \
+ addi r8,r8,EXC_LVL_FRAME_OVERHEAD;
+#else
+#define BOOKE_LOAD_EXC_LEVEL_STACK(level) \
+ lis r8,level##_STACK_BASE@ha; \
+ lwz r8,level##_STACK_BASE@l(r8); \
+ addi r8,r8,EXC_LVL_FRAME_OVERHEAD;
+#endif
+
+/*
+ * Exception prolog for critical/machine check exceptions. This is a
+ * little different from the normal exception prolog above since a
+ * critical/machine check exception can potentially occur at any point
+ * during normal exception processing. Thus we cannot use the same SPRG
+ * registers as the normal prolog above. Instead we use a portion of the
+ * critical/machine check exception stack at low physical addresses.
+ */
+#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, exc_level_srr0, exc_level_srr1) \
+ mtspr SPRN_SPRG_WSCRATCH_##exc_level,r8; \
+ BOOKE_LOAD_EXC_LEVEL_STACK(exc_level);/* r8 points to the exc_level stack*/ \
+ stw r9,GPR9(r8); /* save various registers */\
+ mfcr r9; /* save CR in r9 for now */\
+ stw r10,GPR10(r8); \
+ stw r11,GPR11(r8); \
+ stw r9,_CCR(r8); /* save CR on stack */\
+ mfspr r10,exc_level_srr1; /* check whether user or kernel */\
+ andi. r10,r10,MSR_PR; \
+ mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\
+ lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
+ addi r11,r11,EXC_LVL_FRAME_OVERHEAD; /* allocate stack frame */\
+ beq 1f; \
+ /* COMING FROM USER MODE */ \
+ stw r9,_CCR(r11); /* save CR */\
+ lwz r10,GPR10(r8); /* copy regs from exception stack */\
+ lwz r9,GPR9(r8); \
+ stw r10,GPR10(r11); \
+ lwz r10,GPR11(r8); \
+ stw r9,GPR9(r11); \
+ stw r10,GPR11(r11); \
+ b 2f; \
+ /* COMING FROM PRIV MODE */ \
+1: lwz r9,TI_FLAGS-EXC_LVL_FRAME_OVERHEAD(r11); \
+ lwz r10,TI_PREEMPT-EXC_LVL_FRAME_OVERHEAD(r11); \
+ stw r9,TI_FLAGS-EXC_LVL_FRAME_OVERHEAD(r8); \
+ stw r10,TI_PREEMPT-EXC_LVL_FRAME_OVERHEAD(r8); \
+ lwz r9,TI_TASK-EXC_LVL_FRAME_OVERHEAD(r11); \
+ stw r9,TI_TASK-EXC_LVL_FRAME_OVERHEAD(r8); \
+ mr r11,r8; \
+2: mfspr r8,SPRN_SPRG_RSCRATCH_##exc_level; \
+ stw r12,GPR12(r11); /* save various registers */\
+ mflr r10; \
+ stw r10,_LINK(r11); \
+ mfspr r12,SPRN_DEAR; /* save DEAR and ESR in the frame */\
+ stw r12,_DEAR(r11); /* since they may have had stuff */\
+ mfspr r9,SPRN_ESR; /* in them at the point where the */\
+ stw r9,_ESR(r11); /* exception was taken */\
+ mfspr r12,exc_level_srr0; \
+ stw r1,GPR1(r11); \
+ mfspr r9,exc_level_srr1; \
+ stw r1,0(r11); \
+ mr r1,r11; \
+ rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\
+ stw r0,GPR0(r11); \
+ SAVE_4GPRS(3, r11); \
+ SAVE_2GPRS(7, r11)
+
+#define CRITICAL_EXCEPTION_PROLOG \
+ EXC_LEVEL_EXCEPTION_PROLOG(CRIT, SPRN_CSRR0, SPRN_CSRR1)
+#define DEBUG_EXCEPTION_PROLOG \
+ EXC_LEVEL_EXCEPTION_PROLOG(DBG, SPRN_DSRR0, SPRN_DSRR1)
+#define MCHECK_EXCEPTION_PROLOG \
+ EXC_LEVEL_EXCEPTION_PROLOG(MC, SPRN_MCSRR0, SPRN_MCSRR1)
+
+/*
+ * Exception vectors.
+ */
+#define START_EXCEPTION(label) \
+ .align 5; \
+label:
+
+#define FINISH_EXCEPTION(func) \
+ bl transfer_to_handler_full; \
+ .long func; \
+ .long ret_from_except_full
+
+#define EXCEPTION(n, label, hdlr, xfer) \
+ START_EXCEPTION(label); \
+ NORMAL_EXCEPTION_PROLOG; \
+ addi r3,r1,STACK_FRAME_OVERHEAD; \
+ xfer(n, hdlr)
+
+#define CRITICAL_EXCEPTION(n, label, hdlr) \
+ START_EXCEPTION(label); \
+ CRITICAL_EXCEPTION_PROLOG; \
+ addi r3,r1,STACK_FRAME_OVERHEAD; \
+ EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
+ NOCOPY, crit_transfer_to_handler, \
+ ret_from_crit_exc)
+
+#define MCHECK_EXCEPTION(n, label, hdlr) \
+ START_EXCEPTION(label); \
+ MCHECK_EXCEPTION_PROLOG; \
+ mfspr r5,SPRN_ESR; \
+ stw r5,_ESR(r11); \
+ addi r3,r1,STACK_FRAME_OVERHEAD; \
+ EXC_XFER_TEMPLATE(hdlr, n+4, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
+ NOCOPY, mcheck_transfer_to_handler, \
+ ret_from_mcheck_exc)
+
+#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret) \
+ li r10,trap; \
+ stw r10,_TRAP(r11); \
+ lis r10,msr@h; \
+ ori r10,r10,msr@l; \
+ copyee(r10, r9); \
+ bl tfer; \
+ .long hdlr; \
+ .long ret
+
+#define COPY_EE(d, s) rlwimi d,s,0,16,16
+#define NOCOPY(d, s)
+
+#define EXC_XFER_STD(n, hdlr) \
+ EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \
+ ret_from_except_full)
+
+#define EXC_XFER_LITE(n, hdlr) \
+ EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \
+ ret_from_except)
+
+#define EXC_XFER_EE(n, hdlr) \
+ EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \
+ ret_from_except_full)
+
+#define EXC_XFER_EE_LITE(n, hdlr) \
+ EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \
+ ret_from_except)
+
+/* Check for a single step debug exception while in an exception
+ * handler before state has been saved. This is to catch the case
+ * where an instruction that we are trying to single step causes
+ * an exception (eg ITLB/DTLB miss) and thus the first instruction of
+ * the exception handler generates a single step debug exception.
+ *
+ * If we get a debug trap on the first instruction of an exception handler,
+ * we reset the MSR_DE in the _exception handler's_ MSR (the debug trap is
+ * a critical exception, so we are using SPRN_CSRR1 to manipulate the MSR).
+ * The exception handler was handling a non-critical interrupt, so it will
+ * save (and later restore) the MSR via SPRN_CSRR1, which will still have
+ * the MSR_DE bit set.
+ */
+#define DEBUG_DEBUG_EXCEPTION \
+ START_EXCEPTION(DebugDebug); \
+ DEBUG_EXCEPTION_PROLOG; \
+ \
+ /* \
+ * If there is a single step or branch-taken exception in an \
+ * exception entry sequence, it was probably meant to apply to \
+ * the code where the exception occurred (since exception entry \
+ * doesn't turn off DE automatically). We simulate the effect \
+ * of turning off DE on entry to an exception handler by turning \
+ * off DE in the DSRR1 value and clearing the debug status. \
+ */ \
+ mfspr r10,SPRN_DBSR; /* check single-step/branch taken */ \
+ andis. r10,r10,(DBSR_IC|DBSR_BT)@h; \
+ beq+ 2f; \
+ \
+ lis r10,KERNELBASE@h; /* check if exception in vectors */ \
+ ori r10,r10,KERNELBASE@l; \
+ cmplw r12,r10; \
+ blt+ 2f; /* addr below exception vectors */ \
+ \
+ lis r10,DebugDebug@h; \
+ ori r10,r10,DebugDebug@l; \
+ cmplw r12,r10; \
+ bgt+ 2f; /* addr above exception vectors */ \
+ \
+ /* here it looks like we got an inappropriate debug exception. */ \
+1: rlwinm r9,r9,0,~MSR_DE; /* clear DE in the CDRR1 value */ \
+ lis r10,(DBSR_IC|DBSR_BT)@h; /* clear the IC event */ \
+ mtspr SPRN_DBSR,r10; \
+ /* restore state and get out */ \
+ lwz r10,_CCR(r11); \
+ lwz r0,GPR0(r11); \
+ lwz r1,GPR1(r11); \
+ mtcrf 0x80,r10; \
+ mtspr SPRN_DSRR0,r12; \
+ mtspr SPRN_DSRR1,r9; \
+ lwz r9,GPR9(r11); \
+ lwz r12,GPR12(r11); \
+ mtspr SPRN_SPRG_WSCRATCH_DBG,r8; \
+ BOOKE_LOAD_EXC_LEVEL_STACK(DBG); /* r8 points to the debug stack */ \
+ lwz r10,GPR10(r8); \
+ lwz r11,GPR11(r8); \
+ mfspr r8,SPRN_SPRG_RSCRATCH_DBG; \
+ \
+ PPC_RFDI; \
+ b .; \
+ \
+ /* continue normal handling for a debug exception... */ \
+2: mfspr r4,SPRN_DBSR; \
+ addi r3,r1,STACK_FRAME_OVERHEAD; \
+ EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, debug_transfer_to_handler, ret_from_debug_exc)
+
+#define DEBUG_CRIT_EXCEPTION \
+ START_EXCEPTION(DebugCrit); \
+ CRITICAL_EXCEPTION_PROLOG; \
+ \
+ /* \
+ * If there is a single step or branch-taken exception in an \
+ * exception entry sequence, it was probably meant to apply to \
+ * the code where the exception occurred (since exception entry \
+ * doesn't turn off DE automatically). We simulate the effect \
+ * of turning off DE on entry to an exception handler by turning \
+ * off DE in the CSRR1 value and clearing the debug status. \
+ */ \
+ mfspr r10,SPRN_DBSR; /* check single-step/branch taken */ \
+ andis. r10,r10,(DBSR_IC|DBSR_BT)@h; \
+ beq+ 2f; \
+ \
+ lis r10,KERNELBASE@h; /* check if exception in vectors */ \
+ ori r10,r10,KERNELBASE@l; \
+ cmplw r12,r10; \
+ blt+ 2f; /* addr below exception vectors */ \
+ \
+ lis r10,DebugCrit@h; \
+ ori r10,r10,DebugCrit@l; \
+ cmplw r12,r10; \
+ bgt+ 2f; /* addr above exception vectors */ \
+ \
+ /* here it looks like we got an inappropriate debug exception. */ \
+1: rlwinm r9,r9,0,~MSR_DE; /* clear DE in the CSRR1 value */ \
+ lis r10,(DBSR_IC|DBSR_BT)@h; /* clear the IC event */ \
+ mtspr SPRN_DBSR,r10; \
+ /* restore state and get out */ \
+ lwz r10,_CCR(r11); \
+ lwz r0,GPR0(r11); \
+ lwz r1,GPR1(r11); \
+ mtcrf 0x80,r10; \
+ mtspr SPRN_CSRR0,r12; \
+ mtspr SPRN_CSRR1,r9; \
+ lwz r9,GPR9(r11); \
+ lwz r12,GPR12(r11); \
+ mtspr SPRN_SPRG_WSCRATCH_CRIT,r8; \
+ BOOKE_LOAD_EXC_LEVEL_STACK(CRIT); /* r8 points to the debug stack */ \
+ lwz r10,GPR10(r8); \
+ lwz r11,GPR11(r8); \
+ mfspr r8,SPRN_SPRG_RSCRATCH_CRIT; \
+ \
+ rfci; \
+ b .; \
+ \
+ /* continue normal handling for a critical exception... */ \
+2: mfspr r4,SPRN_DBSR; \
+ addi r3,r1,STACK_FRAME_OVERHEAD; \
+ EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, crit_transfer_to_handler, ret_from_crit_exc)
+
+#define DATA_STORAGE_EXCEPTION \
+ START_EXCEPTION(DataStorage) \
+ NORMAL_EXCEPTION_PROLOG; \
+ mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \
+ stw r5,_ESR(r11); \
+ mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \
+ EXC_XFER_LITE(0x0300, handle_page_fault)
+
+#define INSTRUCTION_STORAGE_EXCEPTION \
+ START_EXCEPTION(InstructionStorage) \
+ NORMAL_EXCEPTION_PROLOG; \
+ mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \
+ stw r5,_ESR(r11); \
+ mr r4,r12; /* Pass SRR0 as arg2 */ \
+ li r5,0; /* Pass zero as arg3 */ \
+ EXC_XFER_LITE(0x0400, handle_page_fault)
+
+#define ALIGNMENT_EXCEPTION \
+ START_EXCEPTION(Alignment) \
+ NORMAL_EXCEPTION_PROLOG; \
+ mfspr r4,SPRN_DEAR; /* Grab the DEAR and save it */ \
+ stw r4,_DEAR(r11); \
+ addi r3,r1,STACK_FRAME_OVERHEAD; \
+ EXC_XFER_EE(0x0600, alignment_exception)
+
+#define PROGRAM_EXCEPTION \
+ START_EXCEPTION(Program) \
+ NORMAL_EXCEPTION_PROLOG; \
+ mfspr r4,SPRN_ESR; /* Grab the ESR and save it */ \
+ stw r4,_ESR(r11); \
+ addi r3,r1,STACK_FRAME_OVERHEAD; \
+ EXC_XFER_STD(0x0700, program_check_exception)
+
+#define DECREMENTER_EXCEPTION \
+ START_EXCEPTION(Decrementer) \
+ NORMAL_EXCEPTION_PROLOG; \
+ lis r0,TSR_DIS@h; /* Setup the DEC interrupt mask */ \
+ mtspr SPRN_TSR,r0; /* Clear the DEC interrupt */ \
+ addi r3,r1,STACK_FRAME_OVERHEAD; \
+ EXC_XFER_LITE(0x0900, timer_interrupt)
+
+#define FP_UNAVAILABLE_EXCEPTION \
+ START_EXCEPTION(FloatingPointUnavailable) \
+ NORMAL_EXCEPTION_PROLOG; \
+ beq 1f; \
+ bl load_up_fpu; /* if from user, just load it up */ \
+ b fast_exception_return; \
+1: addi r3,r1,STACK_FRAME_OVERHEAD; \
+ EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception)
+
+#ifndef __ASSEMBLY__
+struct exception_regs {
+ unsigned long mas0;
+ unsigned long mas1;
+ unsigned long mas2;
+ unsigned long mas3;
+ unsigned long mas6;
+ unsigned long mas7;
+ unsigned long srr0;
+ unsigned long srr1;
+ unsigned long csrr0;
+ unsigned long csrr1;
+ unsigned long dsrr0;
+ unsigned long dsrr1;
+ unsigned long saved_ksp_limit;
+};
+
+/* ensure this structure is always sized to a multiple of the stack alignment */
+#define STACK_EXC_LVL_FRAME_SIZE _ALIGN_UP(sizeof (struct exception_regs), 16)
+
+#endif /* __ASSEMBLY__ */
+#endif /* __HEAD_BOOKE_H__ */
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
new file mode 100644
index 00000000..28e62598
--- /dev/null
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -0,0 +1,1084 @@
+/*
+ * Kernel execution entry point code.
+ *
+ * Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org>
+ * Initial PowerPC version.
+ * Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu>
+ * Rewritten for PReP
+ * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
+ * Low-level exception handers, MMU support, and rewrite.
+ * Copyright (c) 1997 Dan Malek <dmalek@jlc.net>
+ * PowerPC 8xx modifications.
+ * Copyright (c) 1998-1999 TiVo, Inc.
+ * PowerPC 403GCX modifications.
+ * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
+ * PowerPC 403GCX/405GP modifications.
+ * Copyright 2000 MontaVista Software Inc.
+ * PPC405 modifications
+ * PowerPC 403GCX/405GP modifications.
+ * Author: MontaVista Software, Inc.
+ * frank_rowand@mvista.com or source@mvista.com
+ * debbie_chu@mvista.com
+ * Copyright 2002-2004 MontaVista Software, Inc.
+ * PowerPC 44x support, Matt Porter <mporter@kernel.crashing.org>
+ * Copyright 2004 Freescale Semiconductor, Inc
+ * PowerPC e500 modifications, Kumar Gala <galak@kernel.crashing.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/threads.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/pgtable.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cache.h>
+#include <asm/ptrace.h>
+#include "head_booke.h"
+
+/* As with the other PowerPC ports, it is expected that when code
+ * execution begins here, the following registers contain valid, yet
+ * optional, information:
+ *
+ * r3 - Board info structure pointer (DRAM, frequency, MAC address, etc.)
+ * r4 - Starting address of the init RAM disk
+ * r5 - Ending address of the init RAM disk
+ * r6 - Start of kernel command line string (e.g. "mem=128")
+ * r7 - End of kernel command line string
+ *
+ */
+ __HEAD
+_ENTRY(_stext);
+_ENTRY(_start);
+ /*
+ * Reserve a word at a fixed location to store the address
+ * of abatron_pteptrs
+ */
+ nop
+
+ /* Translate device tree address to physical, save in r30/r31 */
+ mfmsr r16
+ mfspr r17,SPRN_PID
+ rlwinm r17,r17,16,0x3fff0000 /* turn PID into MAS6[SPID] */
+ rlwimi r17,r16,28,0x00000001 /* turn MSR[DS] into MAS6[SAS] */
+ mtspr SPRN_MAS6,r17
+
+ tlbsx 0,r3 /* must succeed */
+
+ mfspr r16,SPRN_MAS1
+ mfspr r20,SPRN_MAS3
+ rlwinm r17,r16,25,0x1f /* r17 = log2(page size) */
+ li r18,1024
+ slw r18,r18,r17 /* r18 = page size */
+ addi r18,r18,-1
+ and r19,r3,r18 /* r19 = page offset */
+ andc r31,r20,r18 /* r31 = page base */
+ or r31,r31,r19 /* r31 = devtree phys addr */
+ mfspr r30,SPRN_MAS7
+
+ li r25,0 /* phys kernel start (low) */
+ li r24,0 /* CPU number */
+ li r23,0 /* phys kernel start (high) */
+
+/* We try to not make any assumptions about how the boot loader
+ * setup or used the TLBs. We invalidate all mappings from the
+ * boot loader and load a single entry in TLB1[0] to map the
+ * first 64M of kernel memory. Any boot info passed from the
+ * bootloader needs to live in this first 64M.
+ *
+ * Requirement on bootloader:
+ * - The page we're executing in needs to reside in TLB1 and
+ * have IPROT=1. If not an invalidate broadcast could
+ * evict the entry we're currently executing in.
+ *
+ * r3 = Index of TLB1 were executing in
+ * r4 = Current MSR[IS]
+ * r5 = Index of TLB1 temp mapping
+ *
+ * Later in mapin_ram we will correctly map lowmem, and resize TLB1[0]
+ * if needed
+ */
+
+_ENTRY(__early_start)
+
+#define ENTRY_MAPPING_BOOT_SETUP
+#include "fsl_booke_entry_mapping.S"
+#undef ENTRY_MAPPING_BOOT_SETUP
+
+ /* Establish the interrupt vector offsets */
+ SET_IVOR(0, CriticalInput);
+ SET_IVOR(1, MachineCheck);
+ SET_IVOR(2, DataStorage);
+ SET_IVOR(3, InstructionStorage);
+ SET_IVOR(4, ExternalInput);
+ SET_IVOR(5, Alignment);
+ SET_IVOR(6, Program);
+ SET_IVOR(7, FloatingPointUnavailable);
+ SET_IVOR(8, SystemCall);
+ SET_IVOR(9, AuxillaryProcessorUnavailable);
+ SET_IVOR(10, Decrementer);
+ SET_IVOR(11, FixedIntervalTimer);
+ SET_IVOR(12, WatchdogTimer);
+ SET_IVOR(13, DataTLBError);
+ SET_IVOR(14, InstructionTLBError);
+ SET_IVOR(15, DebugCrit);
+
+ /* Establish the interrupt vector base */
+ lis r4,interrupt_base@h /* IVPR only uses the high 16-bits */
+ mtspr SPRN_IVPR,r4
+
+ /* Setup the defaults for TLB entries */
+ li r2,(MAS4_TSIZED(BOOK3E_PAGESZ_4K))@l
+#ifdef CONFIG_E200
+ oris r2,r2,MAS4_TLBSELD(1)@h
+#endif
+ mtspr SPRN_MAS4, r2
+
+#if 0
+ /* Enable DOZE */
+ mfspr r2,SPRN_HID0
+ oris r2,r2,HID0_DOZE@h
+ mtspr SPRN_HID0, r2
+#endif
+
+#if !defined(CONFIG_BDI_SWITCH)
+ /*
+ * The Abatron BDI JTAG debugger does not tolerate others
+ * mucking with the debug registers.
+ */
+ lis r2,DBCR0_IDM@h
+ mtspr SPRN_DBCR0,r2
+ isync
+ /* clear any residual debug events */
+ li r2,-1
+ mtspr SPRN_DBSR,r2
+#endif
+
+#ifdef CONFIG_SMP
+ /* Check to see if we're the second processor, and jump
+ * to the secondary_start code if so
+ */
+ lis r24, boot_cpuid@h
+ ori r24, r24, boot_cpuid@l
+ lwz r24, 0(r24)
+ cmpwi r24, -1
+ mfspr r24,SPRN_PIR
+ bne __secondary_start
+#endif
+
+ /*
+ * This is where the main kernel code starts.
+ */
+
+ /* ptr to current */
+ lis r2,init_task@h
+ ori r2,r2,init_task@l
+
+ /* ptr to current thread */
+ addi r4,r2,THREAD /* init task's THREAD */
+ mtspr SPRN_SPRG_THREAD,r4
+
+ /* stack */
+ lis r1,init_thread_union@h
+ ori r1,r1,init_thread_union@l
+ li r0,0
+ stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+
+ rlwinm r22,r1,0,0,31-THREAD_SHIFT /* current thread_info */
+ stw r24, TI_CPU(r22)
+
+ bl early_init
+
+#ifdef CONFIG_DYNAMIC_MEMSTART
+ lis r3,kernstart_addr@ha
+ la r3,kernstart_addr@l(r3)
+#ifdef CONFIG_PHYS_64BIT
+ stw r23,0(r3)
+ stw r25,4(r3)
+#else
+ stw r25,0(r3)
+#endif
+#endif
+
+/*
+ * Decide what sort of machine this is and initialize the MMU.
+ */
+ mr r3,r30
+ mr r4,r31
+ bl machine_init
+ bl MMU_init
+
+ /* Setup PTE pointers for the Abatron bdiGDB */
+ lis r6, swapper_pg_dir@h
+ ori r6, r6, swapper_pg_dir@l
+ lis r5, abatron_pteptrs@h
+ ori r5, r5, abatron_pteptrs@l
+ lis r4, KERNELBASE@h
+ ori r4, r4, KERNELBASE@l
+ stw r5, 0(r4) /* Save abatron_pteptrs at a fixed location */
+ stw r6, 0(r5)
+
+ /* Let's move on */
+ lis r4,start_kernel@h
+ ori r4,r4,start_kernel@l
+ lis r3,MSR_KERNEL@h
+ ori r3,r3,MSR_KERNEL@l
+ mtspr SPRN_SRR0,r4
+ mtspr SPRN_SRR1,r3
+ rfi /* change context and jump to start_kernel */
+
+/* Macros to hide the PTE size differences
+ *
+ * FIND_PTE -- walks the page tables given EA & pgdir pointer
+ * r10 -- EA of fault
+ * r11 -- PGDIR pointer
+ * r12 -- free
+ * label 2: is the bailout case
+ *
+ * if we find the pte (fall through):
+ * r11 is low pte word
+ * r12 is pointer to the pte
+ * r10 is the pshift from the PGD, if we're a hugepage
+ */
+#ifdef CONFIG_PTE_64BIT
+#ifdef CONFIG_HUGETLB_PAGE
+#define FIND_PTE \
+ rlwinm r12, r10, 13, 19, 29; /* Compute pgdir/pmd offset */ \
+ lwzx r11, r12, r11; /* Get pgd/pmd entry */ \
+ rlwinm. r12, r11, 0, 0, 20; /* Extract pt base address */ \
+ blt 1000f; /* Normal non-huge page */ \
+ beq 2f; /* Bail if no table */ \
+ oris r11, r11, PD_HUGE@h; /* Put back address bit */ \
+ andi. r10, r11, HUGEPD_SHIFT_MASK@l; /* extract size field */ \
+ xor r12, r10, r11; /* drop size bits from pointer */ \
+ b 1001f; \
+1000: rlwimi r12, r10, 23, 20, 28; /* Compute pte address */ \
+ li r10, 0; /* clear r10 */ \
+1001: lwz r11, 4(r12); /* Get pte entry */
+#else
+#define FIND_PTE \
+ rlwinm r12, r10, 13, 19, 29; /* Compute pgdir/pmd offset */ \
+ lwzx r11, r12, r11; /* Get pgd/pmd entry */ \
+ rlwinm. r12, r11, 0, 0, 20; /* Extract pt base address */ \
+ beq 2f; /* Bail if no table */ \
+ rlwimi r12, r10, 23, 20, 28; /* Compute pte address */ \
+ lwz r11, 4(r12); /* Get pte entry */
+#endif /* HUGEPAGE */
+#else /* !PTE_64BIT */
+#define FIND_PTE \
+ rlwimi r11, r10, 12, 20, 29; /* Create L1 (pgdir/pmd) address */ \
+ lwz r11, 0(r11); /* Get L1 entry */ \
+ rlwinm. r12, r11, 0, 0, 19; /* Extract L2 (pte) base address */ \
+ beq 2f; /* Bail if no table */ \
+ rlwimi r12, r10, 22, 20, 29; /* Compute PTE address */ \
+ lwz r11, 0(r12); /* Get Linux PTE */
+#endif
+
+/*
+ * Interrupt vector entry code
+ *
+ * The Book E MMUs are always on so we don't need to handle
+ * interrupts in real mode as with previous PPC processors. In
+ * this case we handle interrupts in the kernel virtual address
+ * space.
+ *
+ * Interrupt vectors are dynamically placed relative to the
+ * interrupt prefix as determined by the address of interrupt_base.
+ * The interrupt vectors offsets are programmed using the labels
+ * for each interrupt vector entry.
+ *
+ * Interrupt vectors must be aligned on a 16 byte boundary.
+ * We align on a 32 byte cache line boundary for good measure.
+ */
+
+interrupt_base:
+ /* Critical Input Interrupt */
+ CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception)
+
+ /* Machine Check Interrupt */
+#ifdef CONFIG_E200
+ /* no RFMCI, MCSRRs on E200 */
+ CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
+#else
+ MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
+#endif
+
+ /* Data Storage Interrupt */
+ START_EXCEPTION(DataStorage)
+ NORMAL_EXCEPTION_PROLOG
+ mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
+ stw r5,_ESR(r11)
+ mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */
+ andis. r10,r5,(ESR_ILK|ESR_DLK)@h
+ bne 1f
+ EXC_XFER_LITE(0x0300, handle_page_fault)
+1:
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ EXC_XFER_EE_LITE(0x0300, CacheLockingException)
+
+ /* Instruction Storage Interrupt */
+ INSTRUCTION_STORAGE_EXCEPTION
+
+ /* External Input Interrupt */
+ EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE)
+
+ /* Alignment Interrupt */
+ ALIGNMENT_EXCEPTION
+
+ /* Program Interrupt */
+ PROGRAM_EXCEPTION
+
+ /* Floating Point Unavailable Interrupt */
+#ifdef CONFIG_PPC_FPU
+ FP_UNAVAILABLE_EXCEPTION
+#else
+#ifdef CONFIG_E200
+ /* E200 treats 'normal' floating point instructions as FP Unavail exception */
+ EXCEPTION(0x0800, FloatingPointUnavailable, program_check_exception, EXC_XFER_EE)
+#else
+ EXCEPTION(0x0800, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE)
+#endif
+#endif
+
+ /* System Call Interrupt */
+ START_EXCEPTION(SystemCall)
+ NORMAL_EXCEPTION_PROLOG
+ EXC_XFER_EE_LITE(0x0c00, DoSyscall)
+
+ /* Auxiliary Processor Unavailable Interrupt */
+ EXCEPTION(0x2900, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE)
+
+ /* Decrementer Interrupt */
+ DECREMENTER_EXCEPTION
+
+ /* Fixed Internal Timer Interrupt */
+ /* TODO: Add FIT support */
+ EXCEPTION(0x3100, FixedIntervalTimer, unknown_exception, EXC_XFER_EE)
+
+ /* Watchdog Timer Interrupt */
+#ifdef CONFIG_BOOKE_WDT
+ CRITICAL_EXCEPTION(0x3200, WatchdogTimer, WatchdogException)
+#else
+ CRITICAL_EXCEPTION(0x3200, WatchdogTimer, unknown_exception)
+#endif
+
+ /* Data TLB Error Interrupt */
+ START_EXCEPTION(DataTLBError)
+ mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */
+ mfspr r10, SPRN_SPRG_THREAD
+ stw r11, THREAD_NORMSAVE(0)(r10)
+ stw r12, THREAD_NORMSAVE(1)(r10)
+ stw r13, THREAD_NORMSAVE(2)(r10)
+ mfcr r13
+ stw r13, THREAD_NORMSAVE(3)(r10)
+ mfspr r10, SPRN_DEAR /* Get faulting address */
+
+ /* If we are faulting a kernel address, we have to use the
+ * kernel page tables.
+ */
+ lis r11, PAGE_OFFSET@h
+ cmplw 5, r10, r11
+ blt 5, 3f
+ lis r11, swapper_pg_dir@h
+ ori r11, r11, swapper_pg_dir@l
+
+ mfspr r12,SPRN_MAS1 /* Set TID to 0 */
+ rlwinm r12,r12,0,16,1
+ mtspr SPRN_MAS1,r12
+
+ b 4f
+
+ /* Get the PGD for the current thread */
+3:
+ mfspr r11,SPRN_SPRG_THREAD
+ lwz r11,PGDIR(r11)
+
+4:
+ /* Mask of required permission bits. Note that while we
+ * do copy ESR:ST to _PAGE_RW position as trying to write
+ * to an RO page is pretty common, we don't do it with
+ * _PAGE_DIRTY. We could do it, but it's a fairly rare
+ * event so I'd rather take the overhead when it happens
+ * rather than adding an instruction here. We should measure
+ * whether the whole thing is worth it in the first place
+ * as we could avoid loading SPRN_ESR completely in the first
+ * place...
+ *
+ * TODO: Is it worth doing that mfspr & rlwimi in the first
+ * place or can we save a couple of instructions here ?
+ */
+ mfspr r12,SPRN_ESR
+#ifdef CONFIG_PTE_64BIT
+ li r13,_PAGE_PRESENT
+ oris r13,r13,_PAGE_ACCESSED@h
+#else
+ li r13,_PAGE_PRESENT|_PAGE_ACCESSED
+#endif
+ rlwimi r13,r12,11,29,29
+
+ FIND_PTE
+ andc. r13,r13,r11 /* Check permission */
+
+#ifdef CONFIG_PTE_64BIT
+#ifdef CONFIG_SMP
+ subf r13,r11,r12 /* create false data dep */
+ lwzx r13,r11,r13 /* Get upper pte bits */
+#else
+ lwz r13,0(r12) /* Get upper pte bits */
+#endif
+#endif
+
+ bne 2f /* Bail if permission/valid mismach */
+
+ /* Jump to common tlb load */
+ b finish_tlb_load
+2:
+ /* The bailout. Restore registers to pre-exception conditions
+ * and call the heavyweights to help us out.
+ */
+ mfspr r10, SPRN_SPRG_THREAD
+ lwz r11, THREAD_NORMSAVE(3)(r10)
+ mtcr r11
+ lwz r13, THREAD_NORMSAVE(2)(r10)
+ lwz r12, THREAD_NORMSAVE(1)(r10)
+ lwz r11, THREAD_NORMSAVE(0)(r10)
+ mfspr r10, SPRN_SPRG_RSCRATCH0
+ b DataStorage
+
+ /* Instruction TLB Error Interrupt */
+ /*
+ * Nearly the same as above, except we get our
+ * information from different registers and bailout
+ * to a different point.
+ */
+ START_EXCEPTION(InstructionTLBError)
+ mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */
+ mfspr r10, SPRN_SPRG_THREAD
+ stw r11, THREAD_NORMSAVE(0)(r10)
+ stw r12, THREAD_NORMSAVE(1)(r10)
+ stw r13, THREAD_NORMSAVE(2)(r10)
+ mfcr r13
+ stw r13, THREAD_NORMSAVE(3)(r10)
+ mfspr r10, SPRN_SRR0 /* Get faulting address */
+
+ /* If we are faulting a kernel address, we have to use the
+ * kernel page tables.
+ */
+ lis r11, PAGE_OFFSET@h
+ cmplw 5, r10, r11
+ blt 5, 3f
+ lis r11, swapper_pg_dir@h
+ ori r11, r11, swapper_pg_dir@l
+
+ mfspr r12,SPRN_MAS1 /* Set TID to 0 */
+ rlwinm r12,r12,0,16,1
+ mtspr SPRN_MAS1,r12
+
+ /* Make up the required permissions for kernel code */
+#ifdef CONFIG_PTE_64BIT
+ li r13,_PAGE_PRESENT | _PAGE_BAP_SX
+ oris r13,r13,_PAGE_ACCESSED@h
+#else
+ li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
+#endif
+ b 4f
+
+ /* Get the PGD for the current thread */
+3:
+ mfspr r11,SPRN_SPRG_THREAD
+ lwz r11,PGDIR(r11)
+
+ /* Make up the required permissions for user code */
+#ifdef CONFIG_PTE_64BIT
+ li r13,_PAGE_PRESENT | _PAGE_BAP_UX
+ oris r13,r13,_PAGE_ACCESSED@h
+#else
+ li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
+#endif
+
+4:
+ FIND_PTE
+ andc. r13,r13,r11 /* Check permission */
+
+#ifdef CONFIG_PTE_64BIT
+#ifdef CONFIG_SMP
+ subf r13,r11,r12 /* create false data dep */
+ lwzx r13,r11,r13 /* Get upper pte bits */
+#else
+ lwz r13,0(r12) /* Get upper pte bits */
+#endif
+#endif
+
+ bne 2f /* Bail if permission mismach */
+
+ /* Jump to common TLB load point */
+ b finish_tlb_load
+
+2:
+ /* The bailout. Restore registers to pre-exception conditions
+ * and call the heavyweights to help us out.
+ */
+ mfspr r10, SPRN_SPRG_THREAD
+ lwz r11, THREAD_NORMSAVE(3)(r10)
+ mtcr r11
+ lwz r13, THREAD_NORMSAVE(2)(r10)
+ lwz r12, THREAD_NORMSAVE(1)(r10)
+ lwz r11, THREAD_NORMSAVE(0)(r10)
+ mfspr r10, SPRN_SPRG_RSCRATCH0
+ b InstructionStorage
+
+#ifdef CONFIG_SPE
+ /* SPE Unavailable */
+ START_EXCEPTION(SPEUnavailable)
+ NORMAL_EXCEPTION_PROLOG
+ bne load_up_spe
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ EXC_XFER_EE_LITE(0x2010, KernelSPE)
+#else
+ EXCEPTION(0x2020, SPEUnavailable, unknown_exception, EXC_XFER_EE)
+#endif /* CONFIG_SPE */
+
+ /* SPE Floating Point Data */
+#ifdef CONFIG_SPE
+ EXCEPTION(0x2030, SPEFloatingPointData, SPEFloatingPointException, EXC_XFER_EE);
+
+ /* SPE Floating Point Round */
+ EXCEPTION(0x2050, SPEFloatingPointRound, SPEFloatingPointRoundException, EXC_XFER_EE)
+#else
+ EXCEPTION(0x2040, SPEFloatingPointData, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2050, SPEFloatingPointRound, unknown_exception, EXC_XFER_EE)
+#endif /* CONFIG_SPE */
+
+ /* Performance Monitor */
+ EXCEPTION(0x2060, PerformanceMonitor, performance_monitor_exception, EXC_XFER_STD)
+
+ EXCEPTION(0x2070, Doorbell, doorbell_exception, EXC_XFER_STD)
+
+ CRITICAL_EXCEPTION(0x2080, CriticalDoorbell, unknown_exception)
+
+ /* Debug Interrupt */
+ DEBUG_DEBUG_EXCEPTION
+ DEBUG_CRIT_EXCEPTION
+
+/*
+ * Local functions
+ */
+
+/*
+ * Both the instruction and data TLB miss get to this
+ * point to load the TLB.
+ * r10 - tsize encoding (if HUGETLB_PAGE) or available to use
+ * r11 - TLB (info from Linux PTE)
+ * r12 - available to use
+ * r13 - upper bits of PTE (if PTE_64BIT) or available to use
+ * CR5 - results of addr >= PAGE_OFFSET
+ * MAS0, MAS1 - loaded with proper value when we get here
+ * MAS2, MAS3 - will need additional info from Linux PTE
+ * Upon exit, we reload everything and RFI.
+ */
+finish_tlb_load:
+#ifdef CONFIG_HUGETLB_PAGE
+ cmpwi 6, r10, 0 /* check for huge page */
+ beq 6, finish_tlb_load_cont /* !huge */
+
+ /* Alas, we need more scratch registers for hugepages */
+ mfspr r12, SPRN_SPRG_THREAD
+ stw r14, THREAD_NORMSAVE(4)(r12)
+ stw r15, THREAD_NORMSAVE(5)(r12)
+ stw r16, THREAD_NORMSAVE(6)(r12)
+ stw r17, THREAD_NORMSAVE(7)(r12)
+
+ /* Get the next_tlbcam_idx percpu var */
+#ifdef CONFIG_SMP
+ lwz r12, THREAD_INFO-THREAD(r12)
+ lwz r15, TI_CPU(r12)
+ lis r14, __per_cpu_offset@h
+ ori r14, r14, __per_cpu_offset@l
+ rlwinm r15, r15, 2, 0, 29
+ lwzx r16, r14, r15
+#else
+ li r16, 0
+#endif
+ lis r17, next_tlbcam_idx@h
+ ori r17, r17, next_tlbcam_idx@l
+ add r17, r17, r16 /* r17 = *next_tlbcam_idx */
+ lwz r15, 0(r17) /* r15 = next_tlbcam_idx */
+
+ lis r14, MAS0_TLBSEL(1)@h /* select TLB1 (TLBCAM) */
+ rlwimi r14, r15, 16, 4, 15 /* next_tlbcam_idx entry */
+ mtspr SPRN_MAS0, r14
+
+ /* Extract TLB1CFG(NENTRY) */
+ mfspr r16, SPRN_TLB1CFG
+ andi. r16, r16, 0xfff
+
+ /* Update next_tlbcam_idx, wrapping when necessary */
+ addi r15, r15, 1
+ cmpw r15, r16
+ blt 100f
+ lis r14, tlbcam_index@h
+ ori r14, r14, tlbcam_index@l
+ lwz r15, 0(r14)
+100: stw r15, 0(r17)
+
+ /*
+ * Calc MAS1_TSIZE from r10 (which has pshift encoded)
+ * tlb_enc = (pshift - 10).
+ */
+ subi r15, r10, 10
+ mfspr r16, SPRN_MAS1
+ rlwimi r16, r15, 7, 20, 24
+ mtspr SPRN_MAS1, r16
+
+ /* copy the pshift for use later */
+ mr r14, r10
+
+ /* fall through */
+
+#endif /* CONFIG_HUGETLB_PAGE */
+
+ /*
+ * We set execute, because we don't have the granularity to
+ * properly set this at the page level (Linux problem).
+ * Many of these bits are software only. Bits we don't set
+ * here we (properly should) assume have the appropriate value.
+ */
+finish_tlb_load_cont:
+#ifdef CONFIG_PTE_64BIT
+ rlwinm r12, r11, 32-2, 26, 31 /* Move in perm bits */
+ andi. r10, r11, _PAGE_DIRTY
+ bne 1f
+ li r10, MAS3_SW | MAS3_UW
+ andc r12, r12, r10
+1: rlwimi r12, r13, 20, 0, 11 /* grab RPN[32:43] */
+ rlwimi r12, r11, 20, 12, 19 /* grab RPN[44:51] */
+2: mtspr SPRN_MAS3, r12
+BEGIN_MMU_FTR_SECTION
+ srwi r10, r13, 12 /* grab RPN[12:31] */
+ mtspr SPRN_MAS7, r10
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS)
+#else
+ li r10, (_PAGE_EXEC | _PAGE_PRESENT)
+ mr r13, r11
+ rlwimi r10, r11, 31, 29, 29 /* extract _PAGE_DIRTY into SW */
+ and r12, r11, r10
+ andi. r10, r11, _PAGE_USER /* Test for _PAGE_USER */
+ slwi r10, r12, 1
+ or r10, r10, r12
+ iseleq r12, r12, r10
+ rlwimi r13, r12, 0, 20, 31 /* Get RPN from PTE, merge w/ perms */
+ mtspr SPRN_MAS3, r13
+#endif
+
+ mfspr r12, SPRN_MAS2
+#ifdef CONFIG_PTE_64BIT
+ rlwimi r12, r11, 32-19, 27, 31 /* extract WIMGE from pte */
+#else
+ rlwimi r12, r11, 26, 27, 31 /* extract WIMGE from pte */
+#endif
+#ifdef CONFIG_HUGETLB_PAGE
+ beq 6, 3f /* don't mask if page isn't huge */
+ li r13, 1
+ slw r13, r13, r14
+ subi r13, r13, 1
+ rlwinm r13, r13, 0, 0, 19 /* bottom bits used for WIMGE/etc */
+ andc r12, r12, r13 /* mask off ea bits within the page */
+#endif
+3: mtspr SPRN_MAS2, r12
+
+#ifdef CONFIG_E200
+ /* Round robin TLB1 entries assignment */
+ mfspr r12, SPRN_MAS0
+
+ /* Extract TLB1CFG(NENTRY) */
+ mfspr r11, SPRN_TLB1CFG
+ andi. r11, r11, 0xfff
+
+ /* Extract MAS0(NV) */
+ andi. r13, r12, 0xfff
+ addi r13, r13, 1
+ cmpw 0, r13, r11
+ addi r12, r12, 1
+
+ /* check if we need to wrap */
+ blt 7f
+
+ /* wrap back to first free tlbcam entry */
+ lis r13, tlbcam_index@ha
+ lwz r13, tlbcam_index@l(r13)
+ rlwimi r12, r13, 0, 20, 31
+7:
+ mtspr SPRN_MAS0,r12
+#endif /* CONFIG_E200 */
+
+tlb_write_entry:
+ tlbwe
+
+ /* Done...restore registers and get out of here. */
+ mfspr r10, SPRN_SPRG_THREAD
+#ifdef CONFIG_HUGETLB_PAGE
+ beq 6, 8f /* skip restore for 4k page faults */
+ lwz r14, THREAD_NORMSAVE(4)(r10)
+ lwz r15, THREAD_NORMSAVE(5)(r10)
+ lwz r16, THREAD_NORMSAVE(6)(r10)
+ lwz r17, THREAD_NORMSAVE(7)(r10)
+#endif
+8: lwz r11, THREAD_NORMSAVE(3)(r10)
+ mtcr r11
+ lwz r13, THREAD_NORMSAVE(2)(r10)
+ lwz r12, THREAD_NORMSAVE(1)(r10)
+ lwz r11, THREAD_NORMSAVE(0)(r10)
+ mfspr r10, SPRN_SPRG_RSCRATCH0
+ rfi /* Force context change */
+
+#ifdef CONFIG_SPE
+/* Note that the SPE support is closely modeled after the AltiVec
+ * support. Changes to one are likely to be applicable to the
+ * other! */
+load_up_spe:
+/*
+ * Disable SPE for the task which had SPE previously,
+ * and save its SPE registers in its thread_struct.
+ * Enables SPE for use in the kernel on return.
+ * On SMP we know the SPE units are free, since we give it up every
+ * switch. -- Kumar
+ */
+ mfmsr r5
+ oris r5,r5,MSR_SPE@h
+ mtmsr r5 /* enable use of SPE now */
+ isync
+/*
+ * For SMP, we don't do lazy SPE switching because it just gets too
+ * horrendously complex, especially when a task switches from one CPU
+ * to another. Instead we call giveup_spe in switch_to.
+ */
+#ifndef CONFIG_SMP
+ lis r3,last_task_used_spe@ha
+ lwz r4,last_task_used_spe@l(r3)
+ cmpi 0,r4,0
+ beq 1f
+ addi r4,r4,THREAD /* want THREAD of last_task_used_spe */
+ SAVE_32EVRS(0,r10,r4,THREAD_EVR0)
+ evxor evr10, evr10, evr10 /* clear out evr10 */
+ evmwumiaa evr10, evr10, evr10 /* evr10 <- ACC = 0 * 0 + ACC */
+ li r5,THREAD_ACC
+ evstddx evr10, r4, r5 /* save off accumulator */
+ lwz r5,PT_REGS(r4)
+ lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+ lis r10,MSR_SPE@h
+ andc r4,r4,r10 /* disable SPE for previous task */
+ stw r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#endif /* !CONFIG_SMP */
+ /* enable use of SPE after return */
+ oris r9,r9,MSR_SPE@h
+ mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */
+ li r4,1
+ li r10,THREAD_ACC
+ stw r4,THREAD_USED_SPE(r5)
+ evlddx evr4,r10,r5
+ evmra evr4,evr4
+ REST_32EVRS(0,r10,r5,THREAD_EVR0)
+#ifndef CONFIG_SMP
+ subi r4,r5,THREAD
+ stw r4,last_task_used_spe@l(r3)
+#endif /* !CONFIG_SMP */
+ /* restore registers and return */
+2: REST_4GPRS(3, r11)
+ lwz r10,_CCR(r11)
+ REST_GPR(1, r11)
+ mtcr r10
+ lwz r10,_LINK(r11)
+ mtlr r10
+ REST_GPR(10, r11)
+ mtspr SPRN_SRR1,r9
+ mtspr SPRN_SRR0,r12
+ REST_GPR(9, r11)
+ REST_GPR(12, r11)
+ lwz r11,GPR11(r11)
+ rfi
+
+/*
+ * SPE unavailable trap from kernel - print a message, but let
+ * the task use SPE in the kernel until it returns to user mode.
+ */
+KernelSPE:
+ lwz r3,_MSR(r1)
+ oris r3,r3,MSR_SPE@h
+ stw r3,_MSR(r1) /* enable use of SPE after return */
+#ifdef CONFIG_PRINTK
+ lis r3,87f@h
+ ori r3,r3,87f@l
+ mr r4,r2 /* current */
+ lwz r5,_NIP(r1)
+ bl printk
+#endif
+ b ret_from_except
+#ifdef CONFIG_PRINTK
+87: .string "SPE used in kernel (task=%p, pc=%x) \n"
+#endif
+ .align 4,0
+
+#endif /* CONFIG_SPE */
+
+/*
+ * Global functions
+ */
+
+/* Adjust or setup IVORs for e200 */
+_GLOBAL(__setup_e200_ivors)
+ li r3,DebugDebug@l
+ mtspr SPRN_IVOR15,r3
+ li r3,SPEUnavailable@l
+ mtspr SPRN_IVOR32,r3
+ li r3,SPEFloatingPointData@l
+ mtspr SPRN_IVOR33,r3
+ li r3,SPEFloatingPointRound@l
+ mtspr SPRN_IVOR34,r3
+ sync
+ blr
+
+/* Adjust or setup IVORs for e500v1/v2 */
+_GLOBAL(__setup_e500_ivors)
+ li r3,DebugCrit@l
+ mtspr SPRN_IVOR15,r3
+ li r3,SPEUnavailable@l
+ mtspr SPRN_IVOR32,r3
+ li r3,SPEFloatingPointData@l
+ mtspr SPRN_IVOR33,r3
+ li r3,SPEFloatingPointRound@l
+ mtspr SPRN_IVOR34,r3
+ li r3,PerformanceMonitor@l
+ mtspr SPRN_IVOR35,r3
+ sync
+ blr
+
+/* Adjust or setup IVORs for e500mc */
+_GLOBAL(__setup_e500mc_ivors)
+ li r3,DebugDebug@l
+ mtspr SPRN_IVOR15,r3
+ li r3,PerformanceMonitor@l
+ mtspr SPRN_IVOR35,r3
+ li r3,Doorbell@l
+ mtspr SPRN_IVOR36,r3
+ li r3,CriticalDoorbell@l
+ mtspr SPRN_IVOR37,r3
+ sync
+ blr
+
+/*
+ * extern void giveup_altivec(struct task_struct *prev)
+ *
+ * The e500 core does not have an AltiVec unit.
+ */
+_GLOBAL(giveup_altivec)
+ blr
+
+#ifdef CONFIG_SPE
+/*
+ * extern void giveup_spe(struct task_struct *prev)
+ *
+ */
+_GLOBAL(giveup_spe)
+ mfmsr r5
+ oris r5,r5,MSR_SPE@h
+ mtmsr r5 /* enable use of SPE now */
+ isync
+ cmpi 0,r3,0
+ beqlr- /* if no previous owner, done */
+ addi r3,r3,THREAD /* want THREAD of task */
+ lwz r5,PT_REGS(r3)
+ cmpi 0,r5,0
+ SAVE_32EVRS(0, r4, r3, THREAD_EVR0)
+ evxor evr6, evr6, evr6 /* clear out evr6 */
+ evmwumiaa evr6, evr6, evr6 /* evr6 <- ACC = 0 * 0 + ACC */
+ li r4,THREAD_ACC
+ evstddx evr6, r4, r3 /* save off accumulator */
+ beq 1f
+ lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+ lis r3,MSR_SPE@h
+ andc r4,r4,r3 /* disable SPE for previous task */
+ stw r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#ifndef CONFIG_SMP
+ li r5,0
+ lis r4,last_task_used_spe@ha
+ stw r5,last_task_used_spe@l(r4)
+#endif /* !CONFIG_SMP */
+ blr
+#endif /* CONFIG_SPE */
+
+/*
+ * extern void giveup_fpu(struct task_struct *prev)
+ *
+ * Not all FSL Book-E cores have an FPU
+ */
+#ifndef CONFIG_PPC_FPU
+_GLOBAL(giveup_fpu)
+ blr
+#endif
+
+/*
+ * extern void abort(void)
+ *
+ * At present, this routine just applies a system reset.
+ */
+_GLOBAL(abort)
+ li r13,0
+ mtspr SPRN_DBCR0,r13 /* disable all debug events */
+ isync
+ mfmsr r13
+ ori r13,r13,MSR_DE@l /* Enable Debug Events */
+ mtmsr r13
+ isync
+ mfspr r13,SPRN_DBCR0
+ lis r13,(DBCR0_IDM|DBCR0_RST_CHIP)@h
+ mtspr SPRN_DBCR0,r13
+ isync
+
+_GLOBAL(set_context)
+
+#ifdef CONFIG_BDI_SWITCH
+ /* Context switch the PTE pointer for the Abatron BDI2000.
+ * The PGDIR is the second parameter.
+ */
+ lis r5, abatron_pteptrs@h
+ ori r5, r5, abatron_pteptrs@l
+ stw r4, 0x4(r5)
+#endif
+ mtspr SPRN_PID,r3
+ isync /* Force context change */
+ blr
+
+_GLOBAL(flush_dcache_L1)
+ mfspr r3,SPRN_L1CFG0
+
+ rlwinm r5,r3,9,3 /* Extract cache block size */
+ twlgti r5,1 /* Only 32 and 64 byte cache blocks
+ * are currently defined.
+ */
+ li r4,32
+ subfic r6,r5,2 /* r6 = log2(1KiB / cache block size) -
+ * log2(number of ways)
+ */
+ slw r5,r4,r5 /* r5 = cache block size */
+
+ rlwinm r7,r3,0,0xff /* Extract number of KiB in the cache */
+ mulli r7,r7,13 /* An 8-way cache will require 13
+ * loads per set.
+ */
+ slw r7,r7,r6
+
+ /* save off HID0 and set DCFA */
+ mfspr r8,SPRN_HID0
+ ori r9,r8,HID0_DCFA@l
+ mtspr SPRN_HID0,r9
+ isync
+
+ lis r4,KERNELBASE@h
+ mtctr r7
+
+1: lwz r3,0(r4) /* Load... */
+ add r4,r4,r5
+ bdnz 1b
+
+ msync
+ lis r4,KERNELBASE@h
+ mtctr r7
+
+1: dcbf 0,r4 /* ...and flush. */
+ add r4,r4,r5
+ bdnz 1b
+
+ /* restore HID0 */
+ mtspr SPRN_HID0,r8
+ isync
+
+ blr
+
+#ifdef CONFIG_SMP
+/* When we get here, r24 needs to hold the CPU # */
+ .globl __secondary_start
+__secondary_start:
+ lis r3,__secondary_hold_acknowledge@h
+ ori r3,r3,__secondary_hold_acknowledge@l
+ stw r24,0(r3)
+
+ li r3,0
+ mr r4,r24 /* Why? */
+ bl call_setup_cpu
+
+ lis r3,tlbcam_index@ha
+ lwz r3,tlbcam_index@l(r3)
+ mtctr r3
+ li r26,0 /* r26 safe? */
+
+ /* Load each CAM entry */
+1: mr r3,r26
+ bl loadcam_entry
+ addi r26,r26,1
+ bdnz 1b
+
+ /* get current_thread_info and current */
+ lis r1,secondary_ti@ha
+ lwz r1,secondary_ti@l(r1)
+ lwz r2,TI_TASK(r1)
+
+ /* stack */
+ addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+ li r0,0
+ stw r0,0(r1)
+
+ /* ptr to current thread */
+ addi r4,r2,THREAD /* address of our thread_struct */
+ mtspr SPRN_SPRG_THREAD,r4
+
+ /* Setup the defaults for TLB entries */
+ li r4,(MAS4_TSIZED(BOOK3E_PAGESZ_4K))@l
+ mtspr SPRN_MAS4,r4
+
+ /* Jump to start_secondary */
+ lis r4,MSR_KERNEL@h
+ ori r4,r4,MSR_KERNEL@l
+ lis r3,start_secondary@h
+ ori r3,r3,start_secondary@l
+ mtspr SPRN_SRR0,r3
+ mtspr SPRN_SRR1,r4
+ sync
+ rfi
+ sync
+
+ .globl __secondary_hold_acknowledge
+__secondary_hold_acknowledge:
+ .long -1
+#endif
+
+/*
+ * We put a few things here that have to be page-aligned. This stuff
+ * goes at the beginning of the data segment, which is page-aligned.
+ */
+ .data
+ .align 12
+ .globl sdata
+sdata:
+ .globl empty_zero_page
+empty_zero_page:
+ .space 4096
+ .globl swapper_pg_dir
+swapper_pg_dir:
+ .space PGD_TABLE_SIZE
+
+/*
+ * Room for two PTE pointers, usually the kernel and current user pointers
+ * to their respective root page table.
+ */
+abatron_pteptrs:
+ .space 8
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
new file mode 100644
index 00000000..2bc0584b
--- /dev/null
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -0,0 +1,363 @@
+/*
+ * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
+ * using the CPU's debug registers. Derived from
+ * "arch/x86/kernel/hw_breakpoint.c"
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright 2010 IBM Corporation
+ * Author: K.Prasad <prasad@linux.vnet.ibm.com>
+ *
+ */
+
+#include <linux/hw_breakpoint.h>
+#include <linux/notifier.h>
+#include <linux/kprobes.h>
+#include <linux/percpu.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <asm/hw_breakpoint.h>
+#include <asm/processor.h>
+#include <asm/sstep.h>
+#include <asm/uaccess.h>
+
+/*
+ * Stores the breakpoints currently in use on each breakpoint address
+ * register for every cpu
+ */
+static DEFINE_PER_CPU(struct perf_event *, bp_per_reg);
+
+/*
+ * Returns total number of data or instruction breakpoints available.
+ */
+int hw_breakpoint_slots(int type)
+{
+ if (type == TYPE_DATA)
+ return HBP_NUM;
+ return 0; /* no instruction breakpoints available */
+}
+
+/*
+ * Install a perf counter breakpoint.
+ *
+ * We seek a free debug address register and use it for this
+ * breakpoint.
+ *
+ * Atomic: we hold the counter->ctx->lock and we only handle variables
+ * and registers local to this cpu.
+ */
+int arch_install_hw_breakpoint(struct perf_event *bp)
+{
+ struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+ struct perf_event **slot = &__get_cpu_var(bp_per_reg);
+
+ *slot = bp;
+
+ /*
+ * Do not install DABR values if the instruction must be single-stepped.
+ * If so, DABR will be populated in single_step_dabr_instruction().
+ */
+ if (current->thread.last_hit_ubp != bp)
+ set_dabr(info->address | info->type | DABR_TRANSLATION);
+
+ return 0;
+}
+
+/*
+ * Uninstall the breakpoint contained in the given counter.
+ *
+ * First we search the debug address register it uses and then we disable
+ * it.
+ *
+ * Atomic: we hold the counter->ctx->lock and we only handle variables
+ * and registers local to this cpu.
+ */
+void arch_uninstall_hw_breakpoint(struct perf_event *bp)
+{
+ struct perf_event **slot = &__get_cpu_var(bp_per_reg);
+
+ if (*slot != bp) {
+ WARN_ONCE(1, "Can't find the breakpoint");
+ return;
+ }
+
+ *slot = NULL;
+ set_dabr(0);
+}
+
+/*
+ * Perform cleanup of arch-specific counters during unregistration
+ * of the perf-event
+ */
+void arch_unregister_hw_breakpoint(struct perf_event *bp)
+{
+ /*
+ * If the breakpoint is unregistered between a hw_breakpoint_handler()
+ * and the single_step_dabr_instruction(), then cleanup the breakpoint
+ * restoration variables to prevent dangling pointers.
+ */
+ if (bp->ctx->task)
+ bp->ctx->task->thread.last_hit_ubp = NULL;
+}
+
+/*
+ * Check for virtual address in kernel space.
+ */
+int arch_check_bp_in_kernelspace(struct perf_event *bp)
+{
+ struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+
+ return is_kernel_addr(info->address);
+}
+
+int arch_bp_generic_fields(int type, int *gen_bp_type)
+{
+ switch (type) {
+ case DABR_DATA_READ:
+ *gen_bp_type = HW_BREAKPOINT_R;
+ break;
+ case DABR_DATA_WRITE:
+ *gen_bp_type = HW_BREAKPOINT_W;
+ break;
+ case (DABR_DATA_WRITE | DABR_DATA_READ):
+ *gen_bp_type = (HW_BREAKPOINT_W | HW_BREAKPOINT_R);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/*
+ * Validate the arch-specific HW Breakpoint register settings
+ */
+int arch_validate_hwbkpt_settings(struct perf_event *bp)
+{
+ int ret = -EINVAL;
+ struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+
+ if (!bp)
+ return ret;
+
+ switch (bp->attr.bp_type) {
+ case HW_BREAKPOINT_R:
+ info->type = DABR_DATA_READ;
+ break;
+ case HW_BREAKPOINT_W:
+ info->type = DABR_DATA_WRITE;
+ break;
+ case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
+ info->type = (DABR_DATA_READ | DABR_DATA_WRITE);
+ break;
+ default:
+ return ret;
+ }
+
+ info->address = bp->attr.bp_addr;
+ info->len = bp->attr.bp_len;
+
+ /*
+ * Since breakpoint length can be a maximum of HW_BREAKPOINT_LEN(8)
+ * and breakpoint addresses are aligned to nearest double-word
+ * HW_BREAKPOINT_ALIGN by rounding off to the lower address, the
+ * 'symbolsize' should satisfy the check below.
+ */
+ if (info->len >
+ (HW_BREAKPOINT_LEN - (info->address & HW_BREAKPOINT_ALIGN)))
+ return -EINVAL;
+ return 0;
+}
+
+/*
+ * Restores the breakpoint on the debug registers.
+ * Invoke this function if it is known that the execution context is
+ * about to change to cause loss of MSR_SE settings.
+ */
+void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs)
+{
+ struct arch_hw_breakpoint *info;
+
+ if (likely(!tsk->thread.last_hit_ubp))
+ return;
+
+ info = counter_arch_bp(tsk->thread.last_hit_ubp);
+ regs->msr &= ~MSR_SE;
+ set_dabr(info->address | info->type | DABR_TRANSLATION);
+ tsk->thread.last_hit_ubp = NULL;
+}
+
+/*
+ * Handle debug exception notifications.
+ */
+int __kprobes hw_breakpoint_handler(struct die_args *args)
+{
+ int rc = NOTIFY_STOP;
+ struct perf_event *bp;
+ struct pt_regs *regs = args->regs;
+ int stepped = 1;
+ struct arch_hw_breakpoint *info;
+ unsigned int instr;
+ unsigned long dar = regs->dar;
+
+ /* Disable breakpoints during exception handling */
+ set_dabr(0);
+
+ /*
+ * The counter may be concurrently released but that can only
+ * occur from a call_rcu() path. We can then safely fetch
+ * the breakpoint, use its callback, touch its counter
+ * while we are in an rcu_read_lock() path.
+ */
+ rcu_read_lock();
+
+ bp = __get_cpu_var(bp_per_reg);
+ if (!bp)
+ goto out;
+ info = counter_arch_bp(bp);
+
+ /*
+ * Return early after invoking user-callback function without restoring
+ * DABR if the breakpoint is from ptrace which always operates in
+ * one-shot mode. The ptrace-ed process will receive the SIGTRAP signal
+ * generated in do_dabr().
+ */
+ if (bp->overflow_handler == ptrace_triggered) {
+ perf_bp_event(bp, regs);
+ rc = NOTIFY_DONE;
+ goto out;
+ }
+
+ /*
+ * Verify if dar lies within the address range occupied by the symbol
+ * being watched to filter extraneous exceptions. If it doesn't,
+ * we still need to single-step the instruction, but we don't
+ * generate an event.
+ */
+ info->extraneous_interrupt = !((bp->attr.bp_addr <= dar) &&
+ (dar - bp->attr.bp_addr < bp->attr.bp_len));
+
+ /* Do not emulate user-space instructions, instead single-step them */
+ if (user_mode(regs)) {
+ bp->ctx->task->thread.last_hit_ubp = bp;
+ regs->msr |= MSR_SE;
+ goto out;
+ }
+
+ stepped = 0;
+ instr = 0;
+ if (!__get_user_inatomic(instr, (unsigned int *) regs->nip))
+ stepped = emulate_step(regs, instr);
+
+ /*
+ * emulate_step() could not execute it. We've failed in reliably
+ * handling the hw-breakpoint. Unregister it and throw a warning
+ * message to let the user know about it.
+ */
+ if (!stepped) {
+ WARN(1, "Unable to handle hardware breakpoint. Breakpoint at "
+ "0x%lx will be disabled.", info->address);
+ perf_event_disable(bp);
+ goto out;
+ }
+ /*
+ * As a policy, the callback is invoked in a 'trigger-after-execute'
+ * fashion
+ */
+ if (!info->extraneous_interrupt)
+ perf_bp_event(bp, regs);
+
+ set_dabr(info->address | info->type | DABR_TRANSLATION);
+out:
+ rcu_read_unlock();
+ return rc;
+}
+
+/*
+ * Handle single-step exceptions following a DABR hit.
+ */
+int __kprobes single_step_dabr_instruction(struct die_args *args)
+{
+ struct pt_regs *regs = args->regs;
+ struct perf_event *bp = NULL;
+ struct arch_hw_breakpoint *bp_info;
+
+ bp = current->thread.last_hit_ubp;
+ /*
+ * Check if we are single-stepping as a result of a
+ * previous HW Breakpoint exception
+ */
+ if (!bp)
+ return NOTIFY_DONE;
+
+ bp_info = counter_arch_bp(bp);
+
+ /*
+ * We shall invoke the user-defined callback function in the single
+ * stepping handler to confirm to 'trigger-after-execute' semantics
+ */
+ if (!bp_info->extraneous_interrupt)
+ perf_bp_event(bp, regs);
+
+ set_dabr(bp_info->address | bp_info->type | DABR_TRANSLATION);
+ current->thread.last_hit_ubp = NULL;
+
+ /*
+ * If the process was being single-stepped by ptrace, let the
+ * other single-step actions occur (e.g. generate SIGTRAP).
+ */
+ if (test_thread_flag(TIF_SINGLESTEP))
+ return NOTIFY_DONE;
+
+ return NOTIFY_STOP;
+}
+
+/*
+ * Handle debug exception notifications.
+ */
+int __kprobes hw_breakpoint_exceptions_notify(
+ struct notifier_block *unused, unsigned long val, void *data)
+{
+ int ret = NOTIFY_DONE;
+
+ switch (val) {
+ case DIE_DABR_MATCH:
+ ret = hw_breakpoint_handler(data);
+ break;
+ case DIE_SSTEP:
+ ret = single_step_dabr_instruction(data);
+ break;
+ }
+
+ return ret;
+}
+
+/*
+ * Release the user breakpoints used by ptrace
+ */
+void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
+{
+ struct thread_struct *t = &tsk->thread;
+
+ unregister_hw_breakpoint(t->ptrace_bps[0]);
+ t->ptrace_bps[0] = NULL;
+}
+
+void hw_breakpoint_pmu_read(struct perf_event *bp)
+{
+ /* TODO */
+}
diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c
new file mode 100644
index 00000000..b01d14ee
--- /dev/null
+++ b/arch/powerpc/kernel/ibmebus.c
@@ -0,0 +1,760 @@
+/*
+ * IBM PowerPC IBM eBus Infrastructure Support.
+ *
+ * Copyright (c) 2005 IBM Corporation
+ * Joachim Fenkes <fenkes@de.ibm.com>
+ * Heiko J Schick <schickhj@de.ibm.com>
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/console.h>
+#include <linux/kobject.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/stat.h>
+#include <linux/of_platform.h>
+#include <asm/ibmebus.h>
+#include <asm/abs_addr.h>
+
+static struct device ibmebus_bus_device = { /* fake "parent" device */
+ .init_name = "ibmebus",
+};
+
+struct bus_type ibmebus_bus_type;
+
+/* These devices will automatically be added to the bus during init */
+static struct of_device_id __initdata ibmebus_matches[] = {
+ { .compatible = "IBM,lhca" },
+ { .compatible = "IBM,lhea" },
+ {},
+};
+
+static void *ibmebus_alloc_coherent(struct device *dev,
+ size_t size,
+ dma_addr_t *dma_handle,
+ gfp_t flag,
+ struct dma_attrs *attrs)
+{
+ void *mem;
+
+ mem = kmalloc(size, flag);
+ *dma_handle = (dma_addr_t)mem;
+
+ return mem;
+}
+
+static void ibmebus_free_coherent(struct device *dev,
+ size_t size, void *vaddr,
+ dma_addr_t dma_handle,
+ struct dma_attrs *attrs)
+{
+ kfree(vaddr);
+}
+
+static dma_addr_t ibmebus_map_page(struct device *dev,
+ struct page *page,
+ unsigned long offset,
+ size_t size,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ return (dma_addr_t)(page_address(page) + offset);
+}
+
+static void ibmebus_unmap_page(struct device *dev,
+ dma_addr_t dma_addr,
+ size_t size,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ return;
+}
+
+static int ibmebus_map_sg(struct device *dev,
+ struct scatterlist *sgl,
+ int nents, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ struct scatterlist *sg;
+ int i;
+
+ for_each_sg(sgl, sg, nents, i) {
+ sg->dma_address = (dma_addr_t) sg_virt(sg);
+ sg->dma_length = sg->length;
+ }
+
+ return nents;
+}
+
+static void ibmebus_unmap_sg(struct device *dev,
+ struct scatterlist *sg,
+ int nents, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ return;
+}
+
+static int ibmebus_dma_supported(struct device *dev, u64 mask)
+{
+ return mask == DMA_BIT_MASK(64);
+}
+
+static u64 ibmebus_dma_get_required_mask(struct device *dev)
+{
+ return DMA_BIT_MASK(64);
+}
+
+static struct dma_map_ops ibmebus_dma_ops = {
+ .alloc = ibmebus_alloc_coherent,
+ .free = ibmebus_free_coherent,
+ .map_sg = ibmebus_map_sg,
+ .unmap_sg = ibmebus_unmap_sg,
+ .dma_supported = ibmebus_dma_supported,
+ .get_required_mask = ibmebus_dma_get_required_mask,
+ .map_page = ibmebus_map_page,
+ .unmap_page = ibmebus_unmap_page,
+};
+
+static int ibmebus_match_path(struct device *dev, void *data)
+{
+ struct device_node *dn = to_platform_device(dev)->dev.of_node;
+ return (dn->full_name &&
+ (strcasecmp((char *)data, dn->full_name) == 0));
+}
+
+static int ibmebus_match_node(struct device *dev, void *data)
+{
+ return to_platform_device(dev)->dev.of_node == data;
+}
+
+static int ibmebus_create_device(struct device_node *dn)
+{
+ struct platform_device *dev;
+ int ret;
+
+ dev = of_device_alloc(dn, NULL, &ibmebus_bus_device);
+ if (!dev)
+ return -ENOMEM;
+
+ dev->dev.bus = &ibmebus_bus_type;
+ dev->dev.archdata.dma_ops = &ibmebus_dma_ops;
+
+ ret = of_device_add(dev);
+ if (ret)
+ platform_device_put(dev);
+ return ret;
+}
+
+static int ibmebus_create_devices(const struct of_device_id *matches)
+{
+ struct device_node *root, *child;
+ int ret = 0;
+
+ root = of_find_node_by_path("/");
+
+ for_each_child_of_node(root, child) {
+ if (!of_match_node(matches, child))
+ continue;
+
+ if (bus_find_device(&ibmebus_bus_type, NULL, child,
+ ibmebus_match_node))
+ continue;
+
+ ret = ibmebus_create_device(child);
+ if (ret) {
+ printk(KERN_ERR "%s: failed to create device (%i)",
+ __func__, ret);
+ of_node_put(child);
+ break;
+ }
+ }
+
+ of_node_put(root);
+ return ret;
+}
+
+int ibmebus_register_driver(struct of_platform_driver *drv)
+{
+ /* If the driver uses devices that ibmebus doesn't know, add them */
+ ibmebus_create_devices(drv->driver.of_match_table);
+
+ drv->driver.bus = &ibmebus_bus_type;
+ return driver_register(&drv->driver);
+}
+EXPORT_SYMBOL(ibmebus_register_driver);
+
+void ibmebus_unregister_driver(struct of_platform_driver *drv)
+{
+ driver_unregister(&drv->driver);
+}
+EXPORT_SYMBOL(ibmebus_unregister_driver);
+
+int ibmebus_request_irq(u32 ist, irq_handler_t handler,
+ unsigned long irq_flags, const char *devname,
+ void *dev_id)
+{
+ unsigned int irq = irq_create_mapping(NULL, ist);
+
+ if (irq == NO_IRQ)
+ return -EINVAL;
+
+ return request_irq(irq, handler, irq_flags, devname, dev_id);
+}
+EXPORT_SYMBOL(ibmebus_request_irq);
+
+void ibmebus_free_irq(u32 ist, void *dev_id)
+{
+ unsigned int irq = irq_find_mapping(NULL, ist);
+
+ free_irq(irq, dev_id);
+ irq_dispose_mapping(irq);
+}
+EXPORT_SYMBOL(ibmebus_free_irq);
+
+static char *ibmebus_chomp(const char *in, size_t count)
+{
+ char *out = kmalloc(count + 1, GFP_KERNEL);
+
+ if (!out)
+ return NULL;
+
+ memcpy(out, in, count);
+ out[count] = '\0';
+ if (out[count - 1] == '\n')
+ out[count - 1] = '\0';
+
+ return out;
+}
+
+static ssize_t ibmebus_store_probe(struct bus_type *bus,
+ const char *buf, size_t count)
+{
+ struct device_node *dn = NULL;
+ char *path;
+ ssize_t rc = 0;
+
+ path = ibmebus_chomp(buf, count);
+ if (!path)
+ return -ENOMEM;
+
+ if (bus_find_device(&ibmebus_bus_type, NULL, path,
+ ibmebus_match_path)) {
+ printk(KERN_WARNING "%s: %s has already been probed\n",
+ __func__, path);
+ rc = -EEXIST;
+ goto out;
+ }
+
+ if ((dn = of_find_node_by_path(path))) {
+ rc = ibmebus_create_device(dn);
+ of_node_put(dn);
+ } else {
+ printk(KERN_WARNING "%s: no such device node: %s\n",
+ __func__, path);
+ rc = -ENODEV;
+ }
+
+out:
+ kfree(path);
+ if (rc)
+ return rc;
+ return count;
+}
+
+static ssize_t ibmebus_store_remove(struct bus_type *bus,
+ const char *buf, size_t count)
+{
+ struct device *dev;
+ char *path;
+
+ path = ibmebus_chomp(buf, count);
+ if (!path)
+ return -ENOMEM;
+
+ if ((dev = bus_find_device(&ibmebus_bus_type, NULL, path,
+ ibmebus_match_path))) {
+ of_device_unregister(to_platform_device(dev));
+
+ kfree(path);
+ return count;
+ } else {
+ printk(KERN_WARNING "%s: %s not on the bus\n",
+ __func__, path);
+
+ kfree(path);
+ return -ENODEV;
+ }
+}
+
+
+static struct bus_attribute ibmebus_bus_attrs[] = {
+ __ATTR(probe, S_IWUSR, NULL, ibmebus_store_probe),
+ __ATTR(remove, S_IWUSR, NULL, ibmebus_store_remove),
+ __ATTR_NULL
+};
+
+static int ibmebus_bus_bus_match(struct device *dev, struct device_driver *drv)
+{
+ const struct of_device_id *matches = drv->of_match_table;
+
+ if (!matches)
+ return 0;
+
+ return of_match_device(matches, dev) != NULL;
+}
+
+static int ibmebus_bus_device_probe(struct device *dev)
+{
+ int error = -ENODEV;
+ struct of_platform_driver *drv;
+ struct platform_device *of_dev;
+ const struct of_device_id *match;
+
+ drv = to_of_platform_driver(dev->driver);
+ of_dev = to_platform_device(dev);
+
+ if (!drv->probe)
+ return error;
+
+ of_dev_get(of_dev);
+
+ match = of_match_device(drv->driver.of_match_table, dev);
+ if (match)
+ error = drv->probe(of_dev, match);
+ if (error)
+ of_dev_put(of_dev);
+
+ return error;
+}
+
+static int ibmebus_bus_device_remove(struct device *dev)
+{
+ struct platform_device *of_dev = to_platform_device(dev);
+ struct of_platform_driver *drv = to_of_platform_driver(dev->driver);
+
+ if (dev->driver && drv->remove)
+ drv->remove(of_dev);
+ return 0;
+}
+
+static void ibmebus_bus_device_shutdown(struct device *dev)
+{
+ struct platform_device *of_dev = to_platform_device(dev);
+ struct of_platform_driver *drv = to_of_platform_driver(dev->driver);
+
+ if (dev->driver && drv->shutdown)
+ drv->shutdown(of_dev);
+}
+
+/*
+ * ibmebus_bus_device_attrs
+ */
+static ssize_t devspec_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct platform_device *ofdev;
+
+ ofdev = to_platform_device(dev);
+ return sprintf(buf, "%s\n", ofdev->dev.of_node->full_name);
+}
+
+static ssize_t name_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct platform_device *ofdev;
+
+ ofdev = to_platform_device(dev);
+ return sprintf(buf, "%s\n", ofdev->dev.of_node->name);
+}
+
+static ssize_t modalias_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ ssize_t len = of_device_get_modalias(dev, buf, PAGE_SIZE - 2);
+ buf[len] = '\n';
+ buf[len+1] = 0;
+ return len+1;
+}
+
+struct device_attribute ibmebus_bus_device_attrs[] = {
+ __ATTR_RO(devspec),
+ __ATTR_RO(name),
+ __ATTR_RO(modalias),
+ __ATTR_NULL
+};
+
+#ifdef CONFIG_PM_SLEEP
+static int ibmebus_bus_legacy_suspend(struct device *dev, pm_message_t mesg)
+{
+ struct platform_device *of_dev = to_platform_device(dev);
+ struct of_platform_driver *drv = to_of_platform_driver(dev->driver);
+ int ret = 0;
+
+ if (dev->driver && drv->suspend)
+ ret = drv->suspend(of_dev, mesg);
+ return ret;
+}
+
+static int ibmebus_bus_legacy_resume(struct device *dev)
+{
+ struct platform_device *of_dev = to_platform_device(dev);
+ struct of_platform_driver *drv = to_of_platform_driver(dev->driver);
+ int ret = 0;
+
+ if (dev->driver && drv->resume)
+ ret = drv->resume(of_dev);
+ return ret;
+}
+
+static int ibmebus_bus_pm_prepare(struct device *dev)
+{
+ struct device_driver *drv = dev->driver;
+ int ret = 0;
+
+ if (drv && drv->pm && drv->pm->prepare)
+ ret = drv->pm->prepare(dev);
+
+ return ret;
+}
+
+static void ibmebus_bus_pm_complete(struct device *dev)
+{
+ struct device_driver *drv = dev->driver;
+
+ if (drv && drv->pm && drv->pm->complete)
+ drv->pm->complete(dev);
+}
+
+#ifdef CONFIG_SUSPEND
+
+static int ibmebus_bus_pm_suspend(struct device *dev)
+{
+ struct device_driver *drv = dev->driver;
+ int ret = 0;
+
+ if (!drv)
+ return 0;
+
+ if (drv->pm) {
+ if (drv->pm->suspend)
+ ret = drv->pm->suspend(dev);
+ } else {
+ ret = ibmebus_bus_legacy_suspend(dev, PMSG_SUSPEND);
+ }
+
+ return ret;
+}
+
+static int ibmebus_bus_pm_suspend_noirq(struct device *dev)
+{
+ struct device_driver *drv = dev->driver;
+ int ret = 0;
+
+ if (!drv)
+ return 0;
+
+ if (drv->pm) {
+ if (drv->pm->suspend_noirq)
+ ret = drv->pm->suspend_noirq(dev);
+ }
+
+ return ret;
+}
+
+static int ibmebus_bus_pm_resume(struct device *dev)
+{
+ struct device_driver *drv = dev->driver;
+ int ret = 0;
+
+ if (!drv)
+ return 0;
+
+ if (drv->pm) {
+ if (drv->pm->resume)
+ ret = drv->pm->resume(dev);
+ } else {
+ ret = ibmebus_bus_legacy_resume(dev);
+ }
+
+ return ret;
+}
+
+static int ibmebus_bus_pm_resume_noirq(struct device *dev)
+{
+ struct device_driver *drv = dev->driver;
+ int ret = 0;
+
+ if (!drv)
+ return 0;
+
+ if (drv->pm) {
+ if (drv->pm->resume_noirq)
+ ret = drv->pm->resume_noirq(dev);
+ }
+
+ return ret;
+}
+
+#else /* !CONFIG_SUSPEND */
+
+#define ibmebus_bus_pm_suspend NULL
+#define ibmebus_bus_pm_resume NULL
+#define ibmebus_bus_pm_suspend_noirq NULL
+#define ibmebus_bus_pm_resume_noirq NULL
+
+#endif /* !CONFIG_SUSPEND */
+
+#ifdef CONFIG_HIBERNATE_CALLBACKS
+
+static int ibmebus_bus_pm_freeze(struct device *dev)
+{
+ struct device_driver *drv = dev->driver;
+ int ret = 0;
+
+ if (!drv)
+ return 0;
+
+ if (drv->pm) {
+ if (drv->pm->freeze)
+ ret = drv->pm->freeze(dev);
+ } else {
+ ret = ibmebus_bus_legacy_suspend(dev, PMSG_FREEZE);
+ }
+
+ return ret;
+}
+
+static int ibmebus_bus_pm_freeze_noirq(struct device *dev)
+{
+ struct device_driver *drv = dev->driver;
+ int ret = 0;
+
+ if (!drv)
+ return 0;
+
+ if (drv->pm) {
+ if (drv->pm->freeze_noirq)
+ ret = drv->pm->freeze_noirq(dev);
+ }
+
+ return ret;
+}
+
+static int ibmebus_bus_pm_thaw(struct device *dev)
+{
+ struct device_driver *drv = dev->driver;
+ int ret = 0;
+
+ if (!drv)
+ return 0;
+
+ if (drv->pm) {
+ if (drv->pm->thaw)
+ ret = drv->pm->thaw(dev);
+ } else {
+ ret = ibmebus_bus_legacy_resume(dev);
+ }
+
+ return ret;
+}
+
+static int ibmebus_bus_pm_thaw_noirq(struct device *dev)
+{
+ struct device_driver *drv = dev->driver;
+ int ret = 0;
+
+ if (!drv)
+ return 0;
+
+ if (drv->pm) {
+ if (drv->pm->thaw_noirq)
+ ret = drv->pm->thaw_noirq(dev);
+ }
+
+ return ret;
+}
+
+static int ibmebus_bus_pm_poweroff(struct device *dev)
+{
+ struct device_driver *drv = dev->driver;
+ int ret = 0;
+
+ if (!drv)
+ return 0;
+
+ if (drv->pm) {
+ if (drv->pm->poweroff)
+ ret = drv->pm->poweroff(dev);
+ } else {
+ ret = ibmebus_bus_legacy_suspend(dev, PMSG_HIBERNATE);
+ }
+
+ return ret;
+}
+
+static int ibmebus_bus_pm_poweroff_noirq(struct device *dev)
+{
+ struct device_driver *drv = dev->driver;
+ int ret = 0;
+
+ if (!drv)
+ return 0;
+
+ if (drv->pm) {
+ if (drv->pm->poweroff_noirq)
+ ret = drv->pm->poweroff_noirq(dev);
+ }
+
+ return ret;
+}
+
+static int ibmebus_bus_pm_restore(struct device *dev)
+{
+ struct device_driver *drv = dev->driver;
+ int ret = 0;
+
+ if (!drv)
+ return 0;
+
+ if (drv->pm) {
+ if (drv->pm->restore)
+ ret = drv->pm->restore(dev);
+ } else {
+ ret = ibmebus_bus_legacy_resume(dev);
+ }
+
+ return ret;
+}
+
+static int ibmebus_bus_pm_restore_noirq(struct device *dev)
+{
+ struct device_driver *drv = dev->driver;
+ int ret = 0;
+
+ if (!drv)
+ return 0;
+
+ if (drv->pm) {
+ if (drv->pm->restore_noirq)
+ ret = drv->pm->restore_noirq(dev);
+ }
+
+ return ret;
+}
+
+#else /* !CONFIG_HIBERNATE_CALLBACKS */
+
+#define ibmebus_bus_pm_freeze NULL
+#define ibmebus_bus_pm_thaw NULL
+#define ibmebus_bus_pm_poweroff NULL
+#define ibmebus_bus_pm_restore NULL
+#define ibmebus_bus_pm_freeze_noirq NULL
+#define ibmebus_bus_pm_thaw_noirq NULL
+#define ibmebus_bus_pm_poweroff_noirq NULL
+#define ibmebus_bus_pm_restore_noirq NULL
+
+#endif /* !CONFIG_HIBERNATE_CALLBACKS */
+
+static struct dev_pm_ops ibmebus_bus_dev_pm_ops = {
+ .prepare = ibmebus_bus_pm_prepare,
+ .complete = ibmebus_bus_pm_complete,
+ .suspend = ibmebus_bus_pm_suspend,
+ .resume = ibmebus_bus_pm_resume,
+ .freeze = ibmebus_bus_pm_freeze,
+ .thaw = ibmebus_bus_pm_thaw,
+ .poweroff = ibmebus_bus_pm_poweroff,
+ .restore = ibmebus_bus_pm_restore,
+ .suspend_noirq = ibmebus_bus_pm_suspend_noirq,
+ .resume_noirq = ibmebus_bus_pm_resume_noirq,
+ .freeze_noirq = ibmebus_bus_pm_freeze_noirq,
+ .thaw_noirq = ibmebus_bus_pm_thaw_noirq,
+ .poweroff_noirq = ibmebus_bus_pm_poweroff_noirq,
+ .restore_noirq = ibmebus_bus_pm_restore_noirq,
+};
+
+#define IBMEBUS_BUS_PM_OPS_PTR (&ibmebus_bus_dev_pm_ops)
+
+#else /* !CONFIG_PM_SLEEP */
+
+#define IBMEBUS_BUS_PM_OPS_PTR NULL
+
+#endif /* !CONFIG_PM_SLEEP */
+
+struct bus_type ibmebus_bus_type = {
+ .name = "ibmebus",
+ .uevent = of_device_uevent_modalias,
+ .bus_attrs = ibmebus_bus_attrs,
+ .match = ibmebus_bus_bus_match,
+ .probe = ibmebus_bus_device_probe,
+ .remove = ibmebus_bus_device_remove,
+ .shutdown = ibmebus_bus_device_shutdown,
+ .dev_attrs = ibmebus_bus_device_attrs,
+ .pm = IBMEBUS_BUS_PM_OPS_PTR,
+};
+EXPORT_SYMBOL(ibmebus_bus_type);
+
+static int __init ibmebus_bus_init(void)
+{
+ int err;
+
+ printk(KERN_INFO "IBM eBus Device Driver\n");
+
+ err = bus_register(&ibmebus_bus_type);
+ if (err) {
+ printk(KERN_ERR "%s: failed to register IBM eBus.\n",
+ __func__);
+ return err;
+ }
+
+ err = device_register(&ibmebus_bus_device);
+ if (err) {
+ printk(KERN_WARNING "%s: device_register returned %i\n",
+ __func__, err);
+ bus_unregister(&ibmebus_bus_type);
+
+ return err;
+ }
+
+ err = ibmebus_create_devices(ibmebus_matches);
+ if (err) {
+ device_unregister(&ibmebus_bus_device);
+ bus_unregister(&ibmebus_bus_type);
+ return err;
+ }
+
+ return 0;
+}
+postcore_initcall(ibmebus_bus_init);
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
new file mode 100644
index 00000000..04d79093
--- /dev/null
+++ b/arch/powerpc/kernel/idle.c
@@ -0,0 +1,168 @@
+/*
+ * Idle daemon for PowerPC. Idle daemon will handle any action
+ * that needs to be taken when the system becomes idle.
+ *
+ * Originally written by Cort Dougan (cort@cs.nmt.edu).
+ * Subsequent 32-bit hacking by Tom Rini, Armin Kuster,
+ * Paul Mackerras and others.
+ *
+ * iSeries supported added by Mike Corrigan <mikejc@us.ibm.com>
+ *
+ * Additional shared processor, SMT, and firmware support
+ * Copyright (c) 2003 Dave Engebretsen <engebret@us.ibm.com>
+ *
+ * 32-bit and 64-bit versions merged by Paul Mackerras <paulus@samba.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/cpu.h>
+#include <linux/sysctl.h>
+#include <linux/tick.h>
+
+#include <asm/processor.h>
+#include <asm/cputable.h>
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/runlatch.h>
+#include <asm/smp.h>
+
+#ifdef CONFIG_HOTPLUG_CPU
+#define cpu_should_die() cpu_is_offline(smp_processor_id())
+#else
+#define cpu_should_die() 0
+#endif
+
+unsigned long cpuidle_disable = IDLE_NO_OVERRIDE;
+EXPORT_SYMBOL(cpuidle_disable);
+
+static int __init powersave_off(char *arg)
+{
+ ppc_md.power_save = NULL;
+ cpuidle_disable = IDLE_POWERSAVE_OFF;
+ return 0;
+}
+__setup("powersave=off", powersave_off);
+
+/*
+ * The body of the idle task.
+ */
+void cpu_idle(void)
+{
+ if (ppc_md.idle_loop)
+ ppc_md.idle_loop(); /* doesn't return */
+
+ set_thread_flag(TIF_POLLING_NRFLAG);
+ while (1) {
+ tick_nohz_idle_enter();
+ rcu_idle_enter();
+
+ while (!need_resched() && !cpu_should_die()) {
+ ppc64_runlatch_off();
+
+ if (ppc_md.power_save) {
+ clear_thread_flag(TIF_POLLING_NRFLAG);
+ /*
+ * smp_mb is so clearing of TIF_POLLING_NRFLAG
+ * is ordered w.r.t. need_resched() test.
+ */
+ smp_mb();
+ local_irq_disable();
+
+ /* Don't trace irqs off for idle */
+ stop_critical_timings();
+
+ /* check again after disabling irqs */
+ if (!need_resched() && !cpu_should_die())
+ ppc_md.power_save();
+
+ start_critical_timings();
+
+ /* Some power_save functions return with
+ * interrupts enabled, some don't.
+ */
+ if (irqs_disabled())
+ local_irq_enable();
+ set_thread_flag(TIF_POLLING_NRFLAG);
+
+ } else {
+ /*
+ * Go into low thread priority and possibly
+ * low power mode.
+ */
+ HMT_low();
+ HMT_very_low();
+ }
+ }
+
+ HMT_medium();
+ ppc64_runlatch_on();
+ rcu_idle_exit();
+ tick_nohz_idle_exit();
+ if (cpu_should_die()) {
+ sched_preempt_enable_no_resched();
+ cpu_die();
+ }
+ schedule_preempt_disabled();
+ }
+}
+
+static void do_nothing(void *unused)
+{
+}
+
+/*
+ * cpu_idle_wait - Used to ensure that all the CPUs come out of the old
+ * idle loop and start using the new idle loop.
+ * Required while changing idle handler on SMP systems.
+ * Caller must have changed idle handler to the new value before the call.
+ * This window may be larger on shared systems.
+ */
+void cpu_idle_wait(void)
+{
+ smp_mb();
+ /* kick all the CPUs so that they exit out of pm_idle */
+ smp_call_function(do_nothing, NULL, 1);
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
+int powersave_nap;
+
+#ifdef CONFIG_SYSCTL
+/*
+ * Register the sysctl to set/clear powersave_nap.
+ */
+static ctl_table powersave_nap_ctl_table[]={
+ {
+ .procname = "powersave-nap",
+ .data = &powersave_nap,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {}
+};
+static ctl_table powersave_nap_sysctl_root[] = {
+ {
+ .procname = "kernel",
+ .mode = 0555,
+ .child = powersave_nap_ctl_table,
+ },
+ {}
+};
+
+static int __init
+register_powersave_nap_sysctl(void)
+{
+ register_sysctl_table(powersave_nap_sysctl_root);
+
+ return 0;
+}
+__initcall(register_powersave_nap_sysctl);
+#endif
diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S
new file mode 100644
index 00000000..15c611de
--- /dev/null
+++ b/arch/powerpc/kernel/idle_6xx.S
@@ -0,0 +1,197 @@
+/*
+ * This file contains the power_save function for 6xx & 7xxx CPUs
+ * rewritten in assembler
+ *
+ * Warning ! This code assumes that if your machine has a 750fx
+ * it will have PLL 1 set to low speed mode (used during NAP/DOZE).
+ * if this is not the case some additional changes will have to
+ * be done to check a runtime var (a bit like powersave-nap)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/threads.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+ .text
+
+/*
+ * Init idle, called at early CPU setup time from head.S for each CPU
+ * Make sure no rest of NAP mode remains in HID0, save default
+ * values for some CPU specific registers. Called with r24
+ * containing CPU number and r3 reloc offset
+ */
+_GLOBAL(init_idle_6xx)
+BEGIN_FTR_SECTION
+ mfspr r4,SPRN_HID0
+ rlwinm r4,r4,0,10,8 /* Clear NAP */
+ mtspr SPRN_HID0, r4
+ b 1f
+END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
+ blr
+1:
+ slwi r5,r24,2
+ add r5,r5,r3
+BEGIN_FTR_SECTION
+ mfspr r4,SPRN_MSSCR0
+ addis r6,r5, nap_save_msscr0@ha
+ stw r4,nap_save_msscr0@l(r6)
+END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR)
+BEGIN_FTR_SECTION
+ mfspr r4,SPRN_HID1
+ addis r6,r5,nap_save_hid1@ha
+ stw r4,nap_save_hid1@l(r6)
+END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX)
+ blr
+
+/*
+ * Here is the power_save_6xx function. This could eventually be
+ * split into several functions & changing the function pointer
+ * depending on the various features.
+ */
+_GLOBAL(ppc6xx_idle)
+ /* Check if we can nap or doze, put HID0 mask in r3
+ */
+ lis r3, 0
+BEGIN_FTR_SECTION
+ lis r3,HID0_DOZE@h
+END_FTR_SECTION_IFSET(CPU_FTR_CAN_DOZE)
+BEGIN_FTR_SECTION
+ /* We must dynamically check for the NAP feature as it
+ * can be cleared by CPU init after the fixups are done
+ */
+ lis r4,cur_cpu_spec@ha
+ lwz r4,cur_cpu_spec@l(r4)
+ lwz r4,CPU_SPEC_FEATURES(r4)
+ andi. r0,r4,CPU_FTR_CAN_NAP
+ beq 1f
+ /* Now check if user or arch enabled NAP mode */
+ lis r4,powersave_nap@ha
+ lwz r4,powersave_nap@l(r4)
+ cmpwi 0,r4,0
+ beq 1f
+ lis r3,HID0_NAP@h
+1:
+END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
+ cmpwi 0,r3,0
+ beqlr
+
+ /* Some pre-nap cleanups needed on some CPUs */
+ andis. r0,r3,HID0_NAP@h
+ beq 2f
+BEGIN_FTR_SECTION
+ /* Disable L2 prefetch on some 745x and try to ensure
+ * L2 prefetch engines are idle. As explained by errata
+ * text, we can't be sure they are, we just hope very hard
+ * that well be enough (sic !). At least I noticed Apple
+ * doesn't even bother doing the dcbf's here...
+ */
+ mfspr r4,SPRN_MSSCR0
+ rlwinm r4,r4,0,0,29
+ sync
+ mtspr SPRN_MSSCR0,r4
+ sync
+ isync
+ lis r4,KERNELBASE@h
+ dcbf 0,r4
+ dcbf 0,r4
+ dcbf 0,r4
+ dcbf 0,r4
+END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR)
+2:
+BEGIN_FTR_SECTION
+ /* Go to low speed mode on some 750FX */
+ lis r4,powersave_lowspeed@ha
+ lwz r4,powersave_lowspeed@l(r4)
+ cmpwi 0,r4,0
+ beq 1f
+ mfspr r4,SPRN_HID1
+ oris r4,r4,0x0001
+ mtspr SPRN_HID1,r4
+1:
+END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX)
+
+ /* Go to NAP or DOZE now */
+ mfspr r4,SPRN_HID0
+ lis r5,(HID0_NAP|HID0_SLEEP)@h
+BEGIN_FTR_SECTION
+ oris r5,r5,HID0_DOZE@h
+END_FTR_SECTION_IFSET(CPU_FTR_CAN_DOZE)
+ andc r4,r4,r5
+ or r4,r4,r3
+BEGIN_FTR_SECTION
+ oris r4,r4,HID0_DPM@h /* that should be done once for all */
+END_FTR_SECTION_IFCLR(CPU_FTR_NO_DPM)
+ mtspr SPRN_HID0,r4
+BEGIN_FTR_SECTION
+ DSSALL
+ sync
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+ rlwinm r9,r1,0,0,31-THREAD_SHIFT /* current thread_info */
+ lwz r8,TI_LOCAL_FLAGS(r9) /* set napping bit */
+ ori r8,r8,_TLF_NAPPING /* so when we take an exception */
+ stw r8,TI_LOCAL_FLAGS(r9) /* it will return to our caller */
+ mfmsr r7
+ ori r7,r7,MSR_EE
+ oris r7,r7,MSR_POW@h
+1: sync
+ mtmsr r7
+ isync
+ b 1b
+
+/*
+ * Return from NAP/DOZE mode, restore some CPU specific registers,
+ * we are called with DR/IR still off and r2 containing physical
+ * address of current. R11 points to the exception frame (physical
+ * address). We have to preserve r10.
+ */
+_GLOBAL(power_save_ppc32_restore)
+ lwz r9,_LINK(r11) /* interrupted in ppc6xx_idle: */
+ stw r9,_NIP(r11) /* make it do a blr */
+
+#ifdef CONFIG_SMP
+ rlwinm r12,r11,0,0,31-THREAD_SHIFT
+ lwz r11,TI_CPU(r12) /* get cpu number * 4 */
+ slwi r11,r11,2
+#else
+ li r11,0
+#endif
+ /* Todo make sure all these are in the same page
+ * and load r11 (@ha part + CPU offset) only once
+ */
+BEGIN_FTR_SECTION
+ mfspr r9,SPRN_HID0
+ andis. r9,r9,HID0_NAP@h
+ beq 1f
+ addis r9,r11,(nap_save_msscr0-KERNELBASE)@ha
+ lwz r9,nap_save_msscr0@l(r9)
+ mtspr SPRN_MSSCR0, r9
+ sync
+ isync
+1:
+END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR)
+BEGIN_FTR_SECTION
+ addis r9,r11,(nap_save_hid1-KERNELBASE)@ha
+ lwz r9,nap_save_hid1@l(r9)
+ mtspr SPRN_HID1, r9
+END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX)
+ b transfer_to_handler_cont
+
+ .data
+
+_GLOBAL(nap_save_msscr0)
+ .space 4*NR_CPUS
+
+_GLOBAL(nap_save_hid1)
+ .space 4*NR_CPUS
+
+_GLOBAL(powersave_lowspeed)
+ .long 0
diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S
new file mode 100644
index 00000000..ff007b59
--- /dev/null
+++ b/arch/powerpc/kernel/idle_book3e.S
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2010 IBM Corp, Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ *
+ * Generic idle routine for Book3E processors
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/threads.h>
+#include <asm/reg.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ppc-opcode.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+
+/* 64-bit version only for now */
+#ifdef CONFIG_PPC64
+
+_GLOBAL(book3e_idle)
+ /* Save LR for later */
+ mflr r0
+ std r0,16(r1)
+
+ /* Hard disable interrupts */
+ wrteei 0
+
+ /* Now check if an interrupt came in while we were soft disabled
+ * since we may otherwise lose it (doorbells etc...).
+ */
+ lbz r3,PACAIRQHAPPENED(r13)
+ cmpwi cr0,r3,0
+ bnelr
+
+ /* Now we are going to mark ourselves as soft and hard enabled in
+ * order to be able to take interrupts while asleep. We inform lockdep
+ * of that. We don't actually turn interrupts on just yet tho.
+ */
+#ifdef CONFIG_TRACE_IRQFLAGS
+ stdu r1,-128(r1)
+ bl .trace_hardirqs_on
+ addi r1,r1,128
+#endif
+ li r0,1
+ stb r0,PACASOFTIRQEN(r13)
+
+ /* Interrupts will make use return to LR, so get something we want
+ * in there
+ */
+ bl 1f
+
+ /* And return (interrupts are on) */
+ ld r0,16(r1)
+ mtlr r0
+ blr
+
+1: /* Let's set the _TLF_NAPPING flag so interrupts make us return
+ * to the right spot
+ */
+ clrrdi r11,r1,THREAD_SHIFT
+ ld r10,TI_LOCAL_FLAGS(r11)
+ ori r10,r10,_TLF_NAPPING
+ std r10,TI_LOCAL_FLAGS(r11)
+
+ /* We can now re-enable hard interrupts and go to sleep */
+ wrteei 1
+1: PPC_WAIT(0)
+ b 1b
+
+#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S
new file mode 100644
index 00000000..4f0ab85f
--- /dev/null
+++ b/arch/powerpc/kernel/idle_e500.S
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ * Dave Liu <daveliu@freescale.com>
+ * copy from idle_6xx.S and modify for e500 based processor,
+ * implement the power_save function in idle.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/threads.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+ .text
+
+_GLOBAL(e500_idle)
+ rlwinm r3,r1,0,0,31-THREAD_SHIFT /* current thread_info */
+ lwz r4,TI_LOCAL_FLAGS(r3) /* set napping bit */
+ ori r4,r4,_TLF_NAPPING /* so when we take an exception */
+ stw r4,TI_LOCAL_FLAGS(r3) /* it will return to our caller */
+
+#ifdef CONFIG_PPC_E500MC
+ wrteei 1
+1: wait
+
+ /*
+ * Guard against spurious wakeups (e.g. from a hypervisor) --
+ * any real interrupt will cause us to return to LR due to
+ * _TLF_NAPPING.
+ */
+ b 1b
+#else
+ /* Check if we can nap or doze, put HID0 mask in r3 */
+ lis r3,0
+BEGIN_FTR_SECTION
+ lis r3,HID0_DOZE@h
+END_FTR_SECTION_IFSET(CPU_FTR_CAN_DOZE)
+
+BEGIN_FTR_SECTION
+ /* Now check if user enabled NAP mode */
+ lis r4,powersave_nap@ha
+ lwz r4,powersave_nap@l(r4)
+ cmpwi 0,r4,0
+ beq 1f
+ stwu r1,-16(r1)
+ mflr r0
+ stw r0,20(r1)
+ bl flush_dcache_L1
+ lwz r0,20(r1)
+ addi r1,r1,16
+ mtlr r0
+ lis r3,HID0_NAP@h
+END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
+BEGIN_FTR_SECTION
+ msync
+ li r7,L2CSR0_L2FL@l
+ mtspr SPRN_L2CSR0,r7
+2:
+ mfspr r7,SPRN_L2CSR0
+ andi. r4,r7,L2CSR0_L2FL@l
+ bne 2b
+END_FTR_SECTION_IFSET(CPU_FTR_L2CSR|CPU_FTR_CAN_NAP)
+1:
+ /* Go to NAP or DOZE now */
+ mfspr r4,SPRN_HID0
+ rlwinm r4,r4,0,~(HID0_DOZE|HID0_NAP|HID0_SLEEP)
+ or r4,r4,r3
+ isync
+ mtspr SPRN_HID0,r4
+ isync
+
+ mfmsr r7
+ oris r7,r7,MSR_WE@h
+ ori r7,r7,MSR_EE
+ msync
+ mtmsr r7
+ isync
+2: b 2b
+#endif /* !E500MC */
+
+/*
+ * Return from NAP/DOZE mode, restore some CPU specific registers,
+ * r2 containing physical address of current.
+ * r11 points to the exception frame (physical address).
+ * We have to preserve r10.
+ */
+_GLOBAL(power_save_ppc32_restore)
+ lwz r9,_LINK(r11) /* interrupted in e500_idle */
+ stw r9,_NIP(r11) /* make it do a blr */
+
+#ifdef CONFIG_SMP
+ rlwinm r12,r1,0,0,31-THREAD_SHIFT
+ lwz r11,TI_CPU(r12) /* get cpu number * 4 */
+ slwi r11,r11,2
+#else
+ li r11,0
+#endif
+
+ b transfer_to_handler_cont
diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S
new file mode 100644
index 00000000..2c71b0fc
--- /dev/null
+++ b/arch/powerpc/kernel/idle_power4.S
@@ -0,0 +1,73 @@
+/*
+ * This file contains the power_save function for 970-family CPUs.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/threads.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/irqflags.h>
+
+#undef DEBUG
+
+ .text
+
+_GLOBAL(power4_idle)
+BEGIN_FTR_SECTION
+ blr
+END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP)
+ /* Now check if user or arch enabled NAP mode */
+ LOAD_REG_ADDRBASE(r3,powersave_nap)
+ lwz r4,ADDROFF(powersave_nap)(r3)
+ cmpwi 0,r4,0
+ beqlr
+
+ /* Hard disable interrupts */
+ mfmsr r7
+ rldicl r0,r7,48,1
+ rotldi r0,r0,16
+ mtmsrd r0,1
+
+ /* Check if something happened while soft-disabled */
+ lbz r0,PACAIRQHAPPENED(r13)
+ cmpwi cr0,r0,0
+ bnelr
+
+ /* Soft-enable interrupts */
+#ifdef CONFIG_TRACE_IRQFLAGS
+ mflr r0
+ std r0,16(r1)
+ stdu r1,-128(r1)
+ bl .trace_hardirqs_on
+ addi r1,r1,128
+ ld r0,16(r1)
+ mtlr r0
+ mfmsr r7
+#endif /* CONFIG_TRACE_IRQFLAGS */
+
+ li r0,1
+ stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */
+BEGIN_FTR_SECTION
+ DSSALL
+ sync
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+ clrrdi r9,r1,THREAD_SHIFT /* current thread_info */
+ ld r8,TI_LOCAL_FLAGS(r9) /* set napping bit */
+ ori r8,r8,_TLF_NAPPING /* so when we take an exception */
+ std r8,TI_LOCAL_FLAGS(r9) /* it will return to our caller */
+ ori r7,r7,MSR_EE
+ oris r7,r7,MSR_POW@h
+1: sync
+ isync
+ mtmsrd r7
+ isync
+ b 1b
+
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
new file mode 100644
index 00000000..0cdc9a39
--- /dev/null
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -0,0 +1,116 @@
+/*
+ * This file contains the power_save function for Power7 CPUs.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/threads.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ppc-opcode.h>
+#include <asm/hw_irq.h>
+
+#undef DEBUG
+
+ .text
+
+_GLOBAL(power7_idle)
+ /* Now check if user or arch enabled NAP mode */
+ LOAD_REG_ADDRBASE(r3,powersave_nap)
+ lwz r4,ADDROFF(powersave_nap)(r3)
+ cmpwi 0,r4,0
+ beqlr
+
+ /* NAP is a state loss, we create a regs frame on the
+ * stack, fill it up with the state we care about and
+ * stick a pointer to it in PACAR1. We really only
+ * need to save PC, some CR bits and the NV GPRs,
+ * but for now an interrupt frame will do.
+ */
+ mflr r0
+ std r0,16(r1)
+ stdu r1,-INT_FRAME_SIZE(r1)
+ std r0,_LINK(r1)
+ std r0,_NIP(r1)
+
+#ifndef CONFIG_SMP
+ /* Make sure FPU, VSX etc... are flushed as we may lose
+ * state when going to nap mode
+ */
+ bl .discard_lazy_cpu_state
+#endif /* CONFIG_SMP */
+
+ /* Hard disable interrupts */
+ mfmsr r9
+ rldicl r9,r9,48,1
+ rotldi r9,r9,16
+ mtmsrd r9,1 /* hard-disable interrupts */
+
+ /* Check if something happened while soft-disabled */
+ lbz r0,PACAIRQHAPPENED(r13)
+ cmpwi cr0,r0,0
+ beq 1f
+ addi r1,r1,INT_FRAME_SIZE
+ ld r0,16(r1)
+ mtlr r0
+ blr
+
+1: /* We mark irqs hard disabled as this is the state we'll
+ * be in when returning and we need to tell arch_local_irq_restore()
+ * about it
+ */
+ li r0,PACA_IRQ_HARD_DIS
+ stb r0,PACAIRQHAPPENED(r13)
+
+ /* We haven't lost state ... yet */
+ li r0,0
+ stb r0,PACA_NAPSTATELOST(r13)
+
+ /* Continue saving state */
+ SAVE_GPR(2, r1)
+ SAVE_NVGPRS(r1)
+ mfcr r3
+ std r3,_CCR(r1)
+ std r9,_MSR(r1)
+ std r1,PACAR1(r13)
+
+ /* Magic NAP mode enter sequence */
+ std r0,0(r1)
+ ptesync
+ ld r0,0(r1)
+1: cmp cr0,r0,r0
+ bne 1b
+ PPC_NAP
+ b .
+
+_GLOBAL(power7_wakeup_loss)
+ ld r1,PACAR1(r13)
+ REST_NVGPRS(r1)
+ REST_GPR(2, r1)
+ ld r3,_CCR(r1)
+ ld r4,_MSR(r1)
+ ld r5,_NIP(r1)
+ addi r1,r1,INT_FRAME_SIZE
+ mtcr r3
+ mtspr SPRN_SRR1,r4
+ mtspr SPRN_SRR0,r5
+ rfid
+
+_GLOBAL(power7_wakeup_noloss)
+ lbz r0,PACA_NAPSTATELOST(r13)
+ cmpwi r0,0
+ bne .power7_wakeup_loss
+ ld r1,PACAR1(r13)
+ ld r4,_MSR(r1)
+ ld r5,_NIP(r1)
+ addi r1,r1,INT_FRAME_SIZE
+ mtspr SPRN_SRR1,r4
+ mtspr SPRN_SRR0,r5
+ rfid
diff --git a/arch/powerpc/kernel/init_task.c b/arch/powerpc/kernel/init_task.c
new file mode 100644
index 00000000..d076d465
--- /dev/null
+++ b/arch/powerpc/kernel/init_task.c
@@ -0,0 +1,29 @@
+#include <linux/mm.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/init_task.h>
+#include <linux/fs.h>
+#include <linux/mqueue.h>
+#include <asm/uaccess.h>
+
+static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+/*
+ * Initial thread structure.
+ *
+ * We need to make sure that this is 16384-byte aligned due to the
+ * way process stacks are handled. This is done by having a special
+ * "init_task" linker map entry..
+ */
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
+
+/*
+ * Initial task structure.
+ *
+ * All other task structs will be allocated on slabs in fork.c
+ */
+struct task_struct init_task = INIT_TASK(init_task);
+
+EXPORT_SYMBOL(init_task);
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
new file mode 100644
index 00000000..12d329bc
--- /dev/null
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -0,0 +1,189 @@
+/*
+ * Support PCI IO workaround
+ *
+ * Copyright (C) 2006 Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ * IBM, Corp.
+ * (C) Copyright 2007-2008 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/sched.h> /* for init_mm */
+
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/pgtable.h>
+#include <asm/ppc-pci.h>
+#include <asm/io-workarounds.h>
+
+#define IOWA_MAX_BUS 8
+
+static struct iowa_bus iowa_busses[IOWA_MAX_BUS];
+static unsigned int iowa_bus_count;
+
+static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr)
+{
+ int i, j;
+ struct resource *res;
+ unsigned long vstart, vend;
+
+ for (i = 0; i < iowa_bus_count; i++) {
+ struct iowa_bus *bus = &iowa_busses[i];
+ struct pci_controller *phb = bus->phb;
+
+ if (vaddr) {
+ vstart = (unsigned long)phb->io_base_virt;
+ vend = vstart + phb->pci_io_size - 1;
+ if ((vaddr >= vstart) && (vaddr <= vend))
+ return bus;
+ }
+
+ if (paddr)
+ for (j = 0; j < 3; j++) {
+ res = &phb->mem_resources[j];
+ if (paddr >= res->start && paddr <= res->end)
+ return bus;
+ }
+ }
+
+ return NULL;
+}
+
+struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
+{
+ struct iowa_bus *bus;
+ int token;
+
+ token = PCI_GET_ADDR_TOKEN(addr);
+
+ if (token && token <= iowa_bus_count)
+ bus = &iowa_busses[token - 1];
+ else {
+ unsigned long vaddr, paddr;
+ pte_t *ptep;
+
+ vaddr = (unsigned long)PCI_FIX_ADDR(addr);
+ if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END)
+ return NULL;
+
+ ptep = find_linux_pte(init_mm.pgd, vaddr);
+ if (ptep == NULL)
+ paddr = 0;
+ else
+ paddr = pte_pfn(*ptep) << PAGE_SHIFT;
+ bus = iowa_pci_find(vaddr, paddr);
+
+ if (bus == NULL)
+ return NULL;
+ }
+
+ return bus;
+}
+
+struct iowa_bus *iowa_pio_find_bus(unsigned long port)
+{
+ unsigned long vaddr = (unsigned long)pci_io_base + port;
+ return iowa_pci_find(vaddr, 0);
+}
+
+
+#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) \
+static ret iowa_##name at \
+{ \
+ struct iowa_bus *bus; \
+ bus = iowa_##space##_find_bus(aa); \
+ if (bus && bus->ops && bus->ops->name) \
+ return bus->ops->name al; \
+ return __do_##name al; \
+}
+
+#define DEF_PCI_AC_NORET(name, at, al, space, aa) \
+static void iowa_##name at \
+{ \
+ struct iowa_bus *bus; \
+ bus = iowa_##space##_find_bus(aa); \
+ if (bus && bus->ops && bus->ops->name) { \
+ bus->ops->name al; \
+ return; \
+ } \
+ __do_##name al; \
+}
+
+#include <asm/io-defs.h>
+
+#undef DEF_PCI_AC_RET
+#undef DEF_PCI_AC_NORET
+
+static const struct ppc_pci_io __devinitconst iowa_pci_io = {
+
+#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) .name = iowa_##name,
+#define DEF_PCI_AC_NORET(name, at, al, space, aa) .name = iowa_##name,
+
+#include <asm/io-defs.h>
+
+#undef DEF_PCI_AC_RET
+#undef DEF_PCI_AC_NORET
+
+};
+
+static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
+ unsigned long flags, void *caller)
+{
+ struct iowa_bus *bus;
+ void __iomem *res = __ioremap_caller(addr, size, flags, caller);
+ int busno;
+
+ bus = iowa_pci_find(0, (unsigned long)addr);
+ if (bus != NULL) {
+ busno = bus - iowa_busses;
+ PCI_SET_ADDR_TOKEN(res, busno + 1);
+ }
+ return res;
+}
+
+/* Enable IO workaround */
+static void __devinit io_workaround_init(void)
+{
+ static int io_workaround_inited;
+
+ if (io_workaround_inited)
+ return;
+ ppc_pci_io = iowa_pci_io;
+ ppc_md.ioremap = iowa_ioremap;
+ io_workaround_inited = 1;
+}
+
+/* Register new bus to support workaround */
+void __devinit iowa_register_bus(struct pci_controller *phb,
+ struct ppc_pci_io *ops,
+ int (*initfunc)(struct iowa_bus *, void *), void *data)
+{
+ struct iowa_bus *bus;
+ struct device_node *np = phb->dn;
+
+ io_workaround_init();
+
+ if (iowa_bus_count >= IOWA_MAX_BUS) {
+ pr_err("IOWA:Too many pci bridges, "
+ "workarounds disabled for %s\n", np->full_name);
+ return;
+ }
+
+ bus = &iowa_busses[iowa_bus_count];
+ bus->phb = phb;
+ bus->ops = ops;
+ bus->private = data;
+
+ if (initfunc)
+ if ((*initfunc)(bus, data))
+ return;
+
+ iowa_bus_count++;
+
+ pr_debug("IOWA:[%d]Add bus, %s.\n", iowa_bus_count-1, np->full_name);
+}
+
diff --git a/arch/powerpc/kernel/io.c b/arch/powerpc/kernel/io.c
new file mode 100644
index 00000000..886381f3
--- /dev/null
+++ b/arch/powerpc/kernel/io.c
@@ -0,0 +1,207 @@
+/*
+ * I/O string operations
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ * Copyright (C) 2006 IBM Corporation
+ *
+ * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
+ * and Paul Mackerras.
+ *
+ * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com)
+ * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com)
+ *
+ * Rewritten in C by Stephen Rothwell.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <linux/export.h>
+
+#include <asm/io.h>
+#include <asm/firmware.h>
+#include <asm/bug.h>
+
+void _insb(const volatile u8 __iomem *port, void *buf, long count)
+{
+ u8 *tbuf = buf;
+ u8 tmp;
+
+ if (unlikely(count <= 0))
+ return;
+ asm volatile("sync");
+ do {
+ tmp = *port;
+ eieio();
+ *tbuf++ = tmp;
+ } while (--count != 0);
+ asm volatile("twi 0,%0,0; isync" : : "r" (tmp));
+}
+EXPORT_SYMBOL(_insb);
+
+void _outsb(volatile u8 __iomem *port, const void *buf, long count)
+{
+ const u8 *tbuf = buf;
+
+ if (unlikely(count <= 0))
+ return;
+ asm volatile("sync");
+ do {
+ *port = *tbuf++;
+ } while (--count != 0);
+ asm volatile("sync");
+}
+EXPORT_SYMBOL(_outsb);
+
+void _insw_ns(const volatile u16 __iomem *port, void *buf, long count)
+{
+ u16 *tbuf = buf;
+ u16 tmp;
+
+ if (unlikely(count <= 0))
+ return;
+ asm volatile("sync");
+ do {
+ tmp = *port;
+ eieio();
+ *tbuf++ = tmp;
+ } while (--count != 0);
+ asm volatile("twi 0,%0,0; isync" : : "r" (tmp));
+}
+EXPORT_SYMBOL(_insw_ns);
+
+void _outsw_ns(volatile u16 __iomem *port, const void *buf, long count)
+{
+ const u16 *tbuf = buf;
+
+ if (unlikely(count <= 0))
+ return;
+ asm volatile("sync");
+ do {
+ *port = *tbuf++;
+ } while (--count != 0);
+ asm volatile("sync");
+}
+EXPORT_SYMBOL(_outsw_ns);
+
+void _insl_ns(const volatile u32 __iomem *port, void *buf, long count)
+{
+ u32 *tbuf = buf;
+ u32 tmp;
+
+ if (unlikely(count <= 0))
+ return;
+ asm volatile("sync");
+ do {
+ tmp = *port;
+ eieio();
+ *tbuf++ = tmp;
+ } while (--count != 0);
+ asm volatile("twi 0,%0,0; isync" : : "r" (tmp));
+}
+EXPORT_SYMBOL(_insl_ns);
+
+void _outsl_ns(volatile u32 __iomem *port, const void *buf, long count)
+{
+ const u32 *tbuf = buf;
+
+ if (unlikely(count <= 0))
+ return;
+ asm volatile("sync");
+ do {
+ *port = *tbuf++;
+ } while (--count != 0);
+ asm volatile("sync");
+}
+EXPORT_SYMBOL(_outsl_ns);
+
+#define IO_CHECK_ALIGN(v,a) ((((unsigned long)(v)) & ((a) - 1)) == 0)
+
+notrace void
+_memset_io(volatile void __iomem *addr, int c, unsigned long n)
+{
+ void *p = (void __force *)addr;
+ u32 lc = c;
+ lc |= lc << 8;
+ lc |= lc << 16;
+
+ __asm__ __volatile__ ("sync" : : : "memory");
+ while(n && !IO_CHECK_ALIGN(p, 4)) {
+ *((volatile u8 *)p) = c;
+ p++;
+ n--;
+ }
+ while(n >= 4) {
+ *((volatile u32 *)p) = lc;
+ p += 4;
+ n -= 4;
+ }
+ while(n) {
+ *((volatile u8 *)p) = c;
+ p++;
+ n--;
+ }
+ __asm__ __volatile__ ("sync" : : : "memory");
+}
+EXPORT_SYMBOL(_memset_io);
+
+void _memcpy_fromio(void *dest, const volatile void __iomem *src,
+ unsigned long n)
+{
+ void *vsrc = (void __force *) src;
+
+ __asm__ __volatile__ ("sync" : : : "memory");
+ while(n && (!IO_CHECK_ALIGN(vsrc, 4) || !IO_CHECK_ALIGN(dest, 4))) {
+ *((u8 *)dest) = *((volatile u8 *)vsrc);
+ eieio();
+ vsrc++;
+ dest++;
+ n--;
+ }
+ while(n >= 4) {
+ *((u32 *)dest) = *((volatile u32 *)vsrc);
+ eieio();
+ vsrc += 4;
+ dest += 4;
+ n -= 4;
+ }
+ while(n) {
+ *((u8 *)dest) = *((volatile u8 *)vsrc);
+ eieio();
+ vsrc++;
+ dest++;
+ n--;
+ }
+ __asm__ __volatile__ ("sync" : : : "memory");
+}
+EXPORT_SYMBOL(_memcpy_fromio);
+
+void _memcpy_toio(volatile void __iomem *dest, const void *src, unsigned long n)
+{
+ void *vdest = (void __force *) dest;
+
+ __asm__ __volatile__ ("sync" : : : "memory");
+ while(n && (!IO_CHECK_ALIGN(vdest, 4) || !IO_CHECK_ALIGN(src, 4))) {
+ *((volatile u8 *)vdest) = *((u8 *)src);
+ src++;
+ vdest++;
+ n--;
+ }
+ while(n >= 4) {
+ *((volatile u32 *)vdest) = *((volatile u32 *)src);
+ src += 4;
+ vdest += 4;
+ n-=4;
+ }
+ while(n) {
+ *((volatile u8 *)vdest) = *((u8 *)src);
+ src++;
+ vdest++;
+ n--;
+ }
+ __asm__ __volatile__ ("sync" : : : "memory");
+}
+EXPORT_SYMBOL(_memcpy_toio);
diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c
new file mode 100644
index 00000000..97a3715a
--- /dev/null
+++ b/arch/powerpc/kernel/iomap.c
@@ -0,0 +1,132 @@
+/*
+ * ppc64 "iomap" interface implementation.
+ *
+ * (C) Copyright 2004 Linus Torvalds
+ */
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/mm.h>
+#include <linux/export.h>
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+
+/*
+ * Here comes the ppc64 implementation of the IOMAP
+ * interfaces.
+ */
+unsigned int ioread8(void __iomem *addr)
+{
+ return readb(addr);
+}
+unsigned int ioread16(void __iomem *addr)
+{
+ return readw(addr);
+}
+unsigned int ioread16be(void __iomem *addr)
+{
+ return in_be16(addr);
+}
+unsigned int ioread32(void __iomem *addr)
+{
+ return readl(addr);
+}
+unsigned int ioread32be(void __iomem *addr)
+{
+ return in_be32(addr);
+}
+EXPORT_SYMBOL(ioread8);
+EXPORT_SYMBOL(ioread16);
+EXPORT_SYMBOL(ioread16be);
+EXPORT_SYMBOL(ioread32);
+EXPORT_SYMBOL(ioread32be);
+
+void iowrite8(u8 val, void __iomem *addr)
+{
+ writeb(val, addr);
+}
+void iowrite16(u16 val, void __iomem *addr)
+{
+ writew(val, addr);
+}
+void iowrite16be(u16 val, void __iomem *addr)
+{
+ out_be16(addr, val);
+}
+void iowrite32(u32 val, void __iomem *addr)
+{
+ writel(val, addr);
+}
+void iowrite32be(u32 val, void __iomem *addr)
+{
+ out_be32(addr, val);
+}
+EXPORT_SYMBOL(iowrite8);
+EXPORT_SYMBOL(iowrite16);
+EXPORT_SYMBOL(iowrite16be);
+EXPORT_SYMBOL(iowrite32);
+EXPORT_SYMBOL(iowrite32be);
+
+/*
+ * These are the "repeat read/write" functions. Note the
+ * non-CPU byte order. We do things in "IO byteorder"
+ * here.
+ *
+ * FIXME! We could make these do EEH handling if we really
+ * wanted. Not clear if we do.
+ */
+void ioread8_rep(void __iomem *addr, void *dst, unsigned long count)
+{
+ _insb((u8 __iomem *) addr, dst, count);
+}
+void ioread16_rep(void __iomem *addr, void *dst, unsigned long count)
+{
+ _insw_ns((u16 __iomem *) addr, dst, count);
+}
+void ioread32_rep(void __iomem *addr, void *dst, unsigned long count)
+{
+ _insl_ns((u32 __iomem *) addr, dst, count);
+}
+EXPORT_SYMBOL(ioread8_rep);
+EXPORT_SYMBOL(ioread16_rep);
+EXPORT_SYMBOL(ioread32_rep);
+
+void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count)
+{
+ _outsb((u8 __iomem *) addr, src, count);
+}
+void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count)
+{
+ _outsw_ns((u16 __iomem *) addr, src, count);
+}
+void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count)
+{
+ _outsl_ns((u32 __iomem *) addr, src, count);
+}
+EXPORT_SYMBOL(iowrite8_rep);
+EXPORT_SYMBOL(iowrite16_rep);
+EXPORT_SYMBOL(iowrite32_rep);
+
+void __iomem *ioport_map(unsigned long port, unsigned int len)
+{
+ return (void __iomem *) (port + _IO_BASE);
+}
+
+void ioport_unmap(void __iomem *addr)
+{
+ /* Nothing to do */
+}
+EXPORT_SYMBOL(ioport_map);
+EXPORT_SYMBOL(ioport_unmap);
+
+#ifdef CONFIG_PCI
+void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
+{
+ if (isa_vaddr_is_ioport(addr))
+ return;
+ if (pcibios_vaddr_is_ioport(addr))
+ return;
+ iounmap(addr);
+}
+
+EXPORT_SYMBOL(pci_iounmap);
+#endif /* CONFIG_PCI */
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
new file mode 100644
index 00000000..359f0785
--- /dev/null
+++ b/arch/powerpc/kernel/iommu.c
@@ -0,0 +1,685 @@
+/*
+ * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
+ *
+ * Rewrite, cleanup, new allocation schemes, virtual merging:
+ * Copyright (C) 2004 Olof Johansson, IBM Corporation
+ * and Ben. Herrenschmidt, IBM Corporation
+ *
+ * Dynamic DMA mapping support, bus-independent parts.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/dma-mapping.h>
+#include <linux/bitmap.h>
+#include <linux/iommu-helper.h>
+#include <linux/crash_dump.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/iommu.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/kdump.h>
+#include <asm/fadump.h>
+
+#define DBG(...)
+
+static int novmerge;
+
+static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int);
+
+static int __init setup_iommu(char *str)
+{
+ if (!strcmp(str, "novmerge"))
+ novmerge = 1;
+ else if (!strcmp(str, "vmerge"))
+ novmerge = 0;
+ return 1;
+}
+
+__setup("iommu=", setup_iommu);
+
+static unsigned long iommu_range_alloc(struct device *dev,
+ struct iommu_table *tbl,
+ unsigned long npages,
+ unsigned long *handle,
+ unsigned long mask,
+ unsigned int align_order)
+{
+ unsigned long n, end, start;
+ unsigned long limit;
+ int largealloc = npages > 15;
+ int pass = 0;
+ unsigned long align_mask;
+ unsigned long boundary_size;
+
+ align_mask = 0xffffffffffffffffl >> (64 - align_order);
+
+ /* This allocator was derived from x86_64's bit string search */
+
+ /* Sanity check */
+ if (unlikely(npages == 0)) {
+ if (printk_ratelimit())
+ WARN_ON(1);
+ return DMA_ERROR_CODE;
+ }
+
+ if (handle && *handle)
+ start = *handle;
+ else
+ start = largealloc ? tbl->it_largehint : tbl->it_hint;
+
+ /* Use only half of the table for small allocs (15 pages or less) */
+ limit = largealloc ? tbl->it_size : tbl->it_halfpoint;
+
+ if (largealloc && start < tbl->it_halfpoint)
+ start = tbl->it_halfpoint;
+
+ /* The case below can happen if we have a small segment appended
+ * to a large, or when the previous alloc was at the very end of
+ * the available space. If so, go back to the initial start.
+ */
+ if (start >= limit)
+ start = largealloc ? tbl->it_largehint : tbl->it_hint;
+
+ again:
+
+ if (limit + tbl->it_offset > mask) {
+ limit = mask - tbl->it_offset + 1;
+ /* If we're constrained on address range, first try
+ * at the masked hint to avoid O(n) search complexity,
+ * but on second pass, start at 0.
+ */
+ if ((start & mask) >= limit || pass > 0)
+ start = 0;
+ else
+ start &= mask;
+ }
+
+ if (dev)
+ boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+ 1 << IOMMU_PAGE_SHIFT);
+ else
+ boundary_size = ALIGN(1UL << 32, 1 << IOMMU_PAGE_SHIFT);
+ /* 4GB boundary for iseries_hv_alloc and iseries_hv_map */
+
+ n = iommu_area_alloc(tbl->it_map, limit, start, npages,
+ tbl->it_offset, boundary_size >> IOMMU_PAGE_SHIFT,
+ align_mask);
+ if (n == -1) {
+ if (likely(pass < 2)) {
+ /* First failure, just rescan the half of the table.
+ * Second failure, rescan the other half of the table.
+ */
+ start = (largealloc ^ pass) ? tbl->it_halfpoint : 0;
+ limit = pass ? tbl->it_size : limit;
+ pass++;
+ goto again;
+ } else {
+ /* Third failure, give up */
+ return DMA_ERROR_CODE;
+ }
+ }
+
+ end = n + npages;
+
+ /* Bump the hint to a new block for small allocs. */
+ if (largealloc) {
+ /* Don't bump to new block to avoid fragmentation */
+ tbl->it_largehint = end;
+ } else {
+ /* Overflow will be taken care of at the next allocation */
+ tbl->it_hint = (end + tbl->it_blocksize - 1) &
+ ~(tbl->it_blocksize - 1);
+ }
+
+ /* Update handle for SG allocations */
+ if (handle)
+ *handle = end;
+
+ return n;
+}
+
+static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
+ void *page, unsigned int npages,
+ enum dma_data_direction direction,
+ unsigned long mask, unsigned int align_order,
+ struct dma_attrs *attrs)
+{
+ unsigned long entry, flags;
+ dma_addr_t ret = DMA_ERROR_CODE;
+ int build_fail;
+
+ spin_lock_irqsave(&(tbl->it_lock), flags);
+
+ entry = iommu_range_alloc(dev, tbl, npages, NULL, mask, align_order);
+
+ if (unlikely(entry == DMA_ERROR_CODE)) {
+ spin_unlock_irqrestore(&(tbl->it_lock), flags);
+ return DMA_ERROR_CODE;
+ }
+
+ entry += tbl->it_offset; /* Offset into real TCE table */
+ ret = entry << IOMMU_PAGE_SHIFT; /* Set the return dma address */
+
+ /* Put the TCEs in the HW table */
+ build_fail = ppc_md.tce_build(tbl, entry, npages,
+ (unsigned long)page & IOMMU_PAGE_MASK,
+ direction, attrs);
+
+ /* ppc_md.tce_build() only returns non-zero for transient errors.
+ * Clean up the table bitmap in this case and return
+ * DMA_ERROR_CODE. For all other errors the functionality is
+ * not altered.
+ */
+ if (unlikely(build_fail)) {
+ __iommu_free(tbl, ret, npages);
+
+ spin_unlock_irqrestore(&(tbl->it_lock), flags);
+ return DMA_ERROR_CODE;
+ }
+
+ /* Flush/invalidate TLB caches if necessary */
+ if (ppc_md.tce_flush)
+ ppc_md.tce_flush(tbl);
+
+ spin_unlock_irqrestore(&(tbl->it_lock), flags);
+
+ /* Make sure updates are seen by hardware */
+ mb();
+
+ return ret;
+}
+
+static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
+ unsigned int npages)
+{
+ unsigned long entry, free_entry;
+
+ entry = dma_addr >> IOMMU_PAGE_SHIFT;
+ free_entry = entry - tbl->it_offset;
+
+ if (((free_entry + npages) > tbl->it_size) ||
+ (entry < tbl->it_offset)) {
+ if (printk_ratelimit()) {
+ printk(KERN_INFO "iommu_free: invalid entry\n");
+ printk(KERN_INFO "\tentry = 0x%lx\n", entry);
+ printk(KERN_INFO "\tdma_addr = 0x%llx\n", (u64)dma_addr);
+ printk(KERN_INFO "\tTable = 0x%llx\n", (u64)tbl);
+ printk(KERN_INFO "\tbus# = 0x%llx\n", (u64)tbl->it_busno);
+ printk(KERN_INFO "\tsize = 0x%llx\n", (u64)tbl->it_size);
+ printk(KERN_INFO "\tstartOff = 0x%llx\n", (u64)tbl->it_offset);
+ printk(KERN_INFO "\tindex = 0x%llx\n", (u64)tbl->it_index);
+ WARN_ON(1);
+ }
+ return;
+ }
+
+ ppc_md.tce_free(tbl, entry, npages);
+ bitmap_clear(tbl->it_map, free_entry, npages);
+}
+
+static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
+ unsigned int npages)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&(tbl->it_lock), flags);
+
+ __iommu_free(tbl, dma_addr, npages);
+
+ /* Make sure TLB cache is flushed if the HW needs it. We do
+ * not do an mb() here on purpose, it is not needed on any of
+ * the current platforms.
+ */
+ if (ppc_md.tce_flush)
+ ppc_md.tce_flush(tbl);
+
+ spin_unlock_irqrestore(&(tbl->it_lock), flags);
+}
+
+int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
+ struct scatterlist *sglist, int nelems,
+ unsigned long mask, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ dma_addr_t dma_next = 0, dma_addr;
+ unsigned long flags;
+ struct scatterlist *s, *outs, *segstart;
+ int outcount, incount, i, build_fail = 0;
+ unsigned int align;
+ unsigned long handle;
+ unsigned int max_seg_size;
+
+ BUG_ON(direction == DMA_NONE);
+
+ if ((nelems == 0) || !tbl)
+ return 0;
+
+ outs = s = segstart = &sglist[0];
+ outcount = 1;
+ incount = nelems;
+ handle = 0;
+
+ /* Init first segment length for backout at failure */
+ outs->dma_length = 0;
+
+ DBG("sg mapping %d elements:\n", nelems);
+
+ spin_lock_irqsave(&(tbl->it_lock), flags);
+
+ max_seg_size = dma_get_max_seg_size(dev);
+ for_each_sg(sglist, s, nelems, i) {
+ unsigned long vaddr, npages, entry, slen;
+
+ slen = s->length;
+ /* Sanity check */
+ if (slen == 0) {
+ dma_next = 0;
+ continue;
+ }
+ /* Allocate iommu entries for that segment */
+ vaddr = (unsigned long) sg_virt(s);
+ npages = iommu_num_pages(vaddr, slen, IOMMU_PAGE_SIZE);
+ align = 0;
+ if (IOMMU_PAGE_SHIFT < PAGE_SHIFT && slen >= PAGE_SIZE &&
+ (vaddr & ~PAGE_MASK) == 0)
+ align = PAGE_SHIFT - IOMMU_PAGE_SHIFT;
+ entry = iommu_range_alloc(dev, tbl, npages, &handle,
+ mask >> IOMMU_PAGE_SHIFT, align);
+
+ DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen);
+
+ /* Handle failure */
+ if (unlikely(entry == DMA_ERROR_CODE)) {
+ if (printk_ratelimit())
+ dev_info(dev, "iommu_alloc failed, tbl %p "
+ "vaddr %lx npages %lu\n", tbl, vaddr,
+ npages);
+ goto failure;
+ }
+
+ /* Convert entry to a dma_addr_t */
+ entry += tbl->it_offset;
+ dma_addr = entry << IOMMU_PAGE_SHIFT;
+ dma_addr |= (s->offset & ~IOMMU_PAGE_MASK);
+
+ DBG(" - %lu pages, entry: %lx, dma_addr: %lx\n",
+ npages, entry, dma_addr);
+
+ /* Insert into HW table */
+ build_fail = ppc_md.tce_build(tbl, entry, npages,
+ vaddr & IOMMU_PAGE_MASK,
+ direction, attrs);
+ if(unlikely(build_fail))
+ goto failure;
+
+ /* If we are in an open segment, try merging */
+ if (segstart != s) {
+ DBG(" - trying merge...\n");
+ /* We cannot merge if:
+ * - allocated dma_addr isn't contiguous to previous allocation
+ */
+ if (novmerge || (dma_addr != dma_next) ||
+ (outs->dma_length + s->length > max_seg_size)) {
+ /* Can't merge: create a new segment */
+ segstart = s;
+ outcount++;
+ outs = sg_next(outs);
+ DBG(" can't merge, new segment.\n");
+ } else {
+ outs->dma_length += s->length;
+ DBG(" merged, new len: %ux\n", outs->dma_length);
+ }
+ }
+
+ if (segstart == s) {
+ /* This is a new segment, fill entries */
+ DBG(" - filling new segment.\n");
+ outs->dma_address = dma_addr;
+ outs->dma_length = slen;
+ }
+
+ /* Calculate next page pointer for contiguous check */
+ dma_next = dma_addr + slen;
+
+ DBG(" - dma next is: %lx\n", dma_next);
+ }
+
+ /* Flush/invalidate TLB caches if necessary */
+ if (ppc_md.tce_flush)
+ ppc_md.tce_flush(tbl);
+
+ spin_unlock_irqrestore(&(tbl->it_lock), flags);
+
+ DBG("mapped %d elements:\n", outcount);
+
+ /* For the sake of iommu_unmap_sg, we clear out the length in the
+ * next entry of the sglist if we didn't fill the list completely
+ */
+ if (outcount < incount) {
+ outs = sg_next(outs);
+ outs->dma_address = DMA_ERROR_CODE;
+ outs->dma_length = 0;
+ }
+
+ /* Make sure updates are seen by hardware */
+ mb();
+
+ return outcount;
+
+ failure:
+ for_each_sg(sglist, s, nelems, i) {
+ if (s->dma_length != 0) {
+ unsigned long vaddr, npages;
+
+ vaddr = s->dma_address & IOMMU_PAGE_MASK;
+ npages = iommu_num_pages(s->dma_address, s->dma_length,
+ IOMMU_PAGE_SIZE);
+ __iommu_free(tbl, vaddr, npages);
+ s->dma_address = DMA_ERROR_CODE;
+ s->dma_length = 0;
+ }
+ if (s == outs)
+ break;
+ }
+ spin_unlock_irqrestore(&(tbl->it_lock), flags);
+ return 0;
+}
+
+
+void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
+ int nelems, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ struct scatterlist *sg;
+ unsigned long flags;
+
+ BUG_ON(direction == DMA_NONE);
+
+ if (!tbl)
+ return;
+
+ spin_lock_irqsave(&(tbl->it_lock), flags);
+
+ sg = sglist;
+ while (nelems--) {
+ unsigned int npages;
+ dma_addr_t dma_handle = sg->dma_address;
+
+ if (sg->dma_length == 0)
+ break;
+ npages = iommu_num_pages(dma_handle, sg->dma_length,
+ IOMMU_PAGE_SIZE);
+ __iommu_free(tbl, dma_handle, npages);
+ sg = sg_next(sg);
+ }
+
+ /* Flush/invalidate TLBs if necessary. As for iommu_free(), we
+ * do not do an mb() here, the affected platforms do not need it
+ * when freeing.
+ */
+ if (ppc_md.tce_flush)
+ ppc_md.tce_flush(tbl);
+
+ spin_unlock_irqrestore(&(tbl->it_lock), flags);
+}
+
+static void iommu_table_clear(struct iommu_table *tbl)
+{
+ /*
+ * In case of firmware assisted dump system goes through clean
+ * reboot process at the time of system crash. Hence it's safe to
+ * clear the TCE entries if firmware assisted dump is active.
+ */
+ if (!is_kdump_kernel() || is_fadump_active()) {
+ /* Clear the table in case firmware left allocations in it */
+ ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size);
+ return;
+ }
+
+#ifdef CONFIG_CRASH_DUMP
+ if (ppc_md.tce_get) {
+ unsigned long index, tceval, tcecount = 0;
+
+ /* Reserve the existing mappings left by the first kernel. */
+ for (index = 0; index < tbl->it_size; index++) {
+ tceval = ppc_md.tce_get(tbl, index + tbl->it_offset);
+ /*
+ * Freed TCE entry contains 0x7fffffffffffffff on JS20
+ */
+ if (tceval && (tceval != 0x7fffffffffffffffUL)) {
+ __set_bit(index, tbl->it_map);
+ tcecount++;
+ }
+ }
+
+ if ((tbl->it_size - tcecount) < KDUMP_MIN_TCE_ENTRIES) {
+ printk(KERN_WARNING "TCE table is full; freeing ");
+ printk(KERN_WARNING "%d entries for the kdump boot\n",
+ KDUMP_MIN_TCE_ENTRIES);
+ for (index = tbl->it_size - KDUMP_MIN_TCE_ENTRIES;
+ index < tbl->it_size; index++)
+ __clear_bit(index, tbl->it_map);
+ }
+ }
+#endif
+}
+
+/*
+ * Build a iommu_table structure. This contains a bit map which
+ * is used to manage allocation of the tce space.
+ */
+struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
+{
+ unsigned long sz;
+ static int welcomed = 0;
+ struct page *page;
+
+ /* Set aside 1/4 of the table for large allocations. */
+ tbl->it_halfpoint = tbl->it_size * 3 / 4;
+
+ /* number of bytes needed for the bitmap */
+ sz = (tbl->it_size + 7) >> 3;
+
+ page = alloc_pages_node(nid, GFP_ATOMIC, get_order(sz));
+ if (!page)
+ panic("iommu_init_table: Can't allocate %ld bytes\n", sz);
+ tbl->it_map = page_address(page);
+ memset(tbl->it_map, 0, sz);
+
+ /*
+ * Reserve page 0 so it will not be used for any mappings.
+ * This avoids buggy drivers that consider page 0 to be invalid
+ * to crash the machine or even lose data.
+ */
+ if (tbl->it_offset == 0)
+ set_bit(0, tbl->it_map);
+
+ tbl->it_hint = 0;
+ tbl->it_largehint = tbl->it_halfpoint;
+ spin_lock_init(&tbl->it_lock);
+
+ iommu_table_clear(tbl);
+
+ if (!welcomed) {
+ printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n",
+ novmerge ? "disabled" : "enabled");
+ welcomed = 1;
+ }
+
+ return tbl;
+}
+
+void iommu_free_table(struct iommu_table *tbl, const char *node_name)
+{
+ unsigned long bitmap_sz, i;
+ unsigned int order;
+
+ if (!tbl || !tbl->it_map) {
+ printk(KERN_ERR "%s: expected TCE map for %s\n", __func__,
+ node_name);
+ return;
+ }
+
+ /* verify that table contains no entries */
+ /* it_size is in entries, and we're examining 64 at a time */
+ for (i = 0; i < (tbl->it_size/64); i++) {
+ if (tbl->it_map[i] != 0) {
+ printk(KERN_WARNING "%s: Unexpected TCEs for %s\n",
+ __func__, node_name);
+ break;
+ }
+ }
+
+ /* calculate bitmap size in bytes */
+ bitmap_sz = (tbl->it_size + 7) / 8;
+
+ /* free bitmap */
+ order = get_order(bitmap_sz);
+ free_pages((unsigned long) tbl->it_map, order);
+
+ /* free table */
+ kfree(tbl);
+}
+
+/* Creates TCEs for a user provided buffer. The user buffer must be
+ * contiguous real kernel storage (not vmalloc). The address passed here
+ * comprises a page address and offset into that page. The dma_addr_t
+ * returned will point to the same byte within the page as was passed in.
+ */
+dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
+ struct page *page, unsigned long offset, size_t size,
+ unsigned long mask, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ dma_addr_t dma_handle = DMA_ERROR_CODE;
+ void *vaddr;
+ unsigned long uaddr;
+ unsigned int npages, align;
+
+ BUG_ON(direction == DMA_NONE);
+
+ vaddr = page_address(page) + offset;
+ uaddr = (unsigned long)vaddr;
+ npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE);
+
+ if (tbl) {
+ align = 0;
+ if (IOMMU_PAGE_SHIFT < PAGE_SHIFT && size >= PAGE_SIZE &&
+ ((unsigned long)vaddr & ~PAGE_MASK) == 0)
+ align = PAGE_SHIFT - IOMMU_PAGE_SHIFT;
+
+ dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction,
+ mask >> IOMMU_PAGE_SHIFT, align,
+ attrs);
+ if (dma_handle == DMA_ERROR_CODE) {
+ if (printk_ratelimit()) {
+ dev_info(dev, "iommu_alloc failed, tbl %p "
+ "vaddr %p npages %d\n", tbl, vaddr,
+ npages);
+ }
+ } else
+ dma_handle |= (uaddr & ~IOMMU_PAGE_MASK);
+ }
+
+ return dma_handle;
+}
+
+void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle,
+ size_t size, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ unsigned int npages;
+
+ BUG_ON(direction == DMA_NONE);
+
+ if (tbl) {
+ npages = iommu_num_pages(dma_handle, size, IOMMU_PAGE_SIZE);
+ iommu_free(tbl, dma_handle, npages);
+ }
+}
+
+/* Allocates a contiguous real buffer and creates mappings over it.
+ * Returns the virtual address of the buffer and sets dma_handle
+ * to the dma address (mapping) of the first page.
+ */
+void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
+ size_t size, dma_addr_t *dma_handle,
+ unsigned long mask, gfp_t flag, int node)
+{
+ void *ret = NULL;
+ dma_addr_t mapping;
+ unsigned int order;
+ unsigned int nio_pages, io_order;
+ struct page *page;
+
+ size = PAGE_ALIGN(size);
+ order = get_order(size);
+
+ /*
+ * Client asked for way too much space. This is checked later
+ * anyway. It is easier to debug here for the drivers than in
+ * the tce tables.
+ */
+ if (order >= IOMAP_MAX_ORDER) {
+ dev_info(dev, "iommu_alloc_consistent size too large: 0x%lx\n",
+ size);
+ return NULL;
+ }
+
+ if (!tbl)
+ return NULL;
+
+ /* Alloc enough pages (and possibly more) */
+ page = alloc_pages_node(node, flag, order);
+ if (!page)
+ return NULL;
+ ret = page_address(page);
+ memset(ret, 0, size);
+
+ /* Set up tces to cover the allocated range */
+ nio_pages = size >> IOMMU_PAGE_SHIFT;
+ io_order = get_iommu_order(size);
+ mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
+ mask >> IOMMU_PAGE_SHIFT, io_order, NULL);
+ if (mapping == DMA_ERROR_CODE) {
+ free_pages((unsigned long)ret, order);
+ return NULL;
+ }
+ *dma_handle = mapping;
+ return ret;
+}
+
+void iommu_free_coherent(struct iommu_table *tbl, size_t size,
+ void *vaddr, dma_addr_t dma_handle)
+{
+ if (tbl) {
+ unsigned int nio_pages;
+
+ size = PAGE_ALIGN(size);
+ nio_pages = size >> IOMMU_PAGE_SHIFT;
+ iommu_free(tbl, dma_handle, nio_pages);
+ size = PAGE_ALIGN(size);
+ free_pages((unsigned long)vaddr, get_order(size));
+ }
+}
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
new file mode 100644
index 00000000..d7ebc58c
--- /dev/null
+++ b/arch/powerpc/kernel/irq.c
@@ -0,0 +1,680 @@
+/*
+ * Derived from arch/i386/kernel/irq.c
+ * Copyright (C) 1992 Linus Torvalds
+ * Adapted from arch/i386 by Gary Thomas
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ * Updated and modified by Cort Dougan <cort@fsmlabs.com>
+ * Copyright (C) 1996-2001 Cort Dougan
+ * Adapted for Power Macintosh by Paul Mackerras
+ * Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQ's should be done through these routines
+ * instead of just grabbing them. Thus setups with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ *
+ * The MPC8xx has an interrupt mask in the SIU. If a bit is set, the
+ * interrupt is _enabled_. As expected, IRQ0 is bit 0 in the 32-bit
+ * mask register (of which only 16 are defined), hence the weird shifting
+ * and complement of the cached_irq_mask. I want to be able to stuff
+ * this right into the SIU SMASK register.
+ * Many of the prep/chrp functions are conditional compiled on CONFIG_8xx
+ * to reduce code space and undefined function references.
+ */
+
+#undef DEBUG
+
+#include <linux/export.h>
+#include <linux/threads.h>
+#include <linux/kernel_stat.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/timex.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/cpumask.h>
+#include <linux/profile.h>
+#include <linux/bitops.h>
+#include <linux/list.h>
+#include <linux/radix-tree.h>
+#include <linux/mutex.h>
+#include <linux/bootmem.h>
+#include <linux/pci.h>
+#include <linux/debugfs.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/irq.h>
+#include <asm/cache.h>
+#include <asm/prom.h>
+#include <asm/ptrace.h>
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/smp.h>
+#include <asm/debug.h>
+
+#ifdef CONFIG_PPC64
+#include <asm/paca.h>
+#include <asm/firmware.h>
+#include <asm/lv1call.h>
+#endif
+#define CREATE_TRACE_POINTS
+#include <asm/trace.h>
+
+DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
+EXPORT_PER_CPU_SYMBOL(irq_stat);
+
+int __irq_offset_value;
+
+#ifdef CONFIG_PPC32
+EXPORT_SYMBOL(__irq_offset_value);
+atomic_t ppc_n_lost_interrupts;
+
+#ifdef CONFIG_TAU_INT
+extern int tau_initialized;
+extern int tau_interrupts(int);
+#endif
+#endif /* CONFIG_PPC32 */
+
+#ifdef CONFIG_PPC64
+
+int distribute_irqs = 1;
+
+static inline notrace unsigned long get_irq_happened(void)
+{
+ unsigned long happened;
+
+ __asm__ __volatile__("lbz %0,%1(13)"
+ : "=r" (happened) : "i" (offsetof(struct paca_struct, irq_happened)));
+
+ return happened;
+}
+
+static inline notrace void set_soft_enabled(unsigned long enable)
+{
+ __asm__ __volatile__("stb %0,%1(13)"
+ : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
+}
+
+static inline notrace int decrementer_check_overflow(void)
+{
+ u64 now = get_tb_or_rtc();
+ u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
+
+ if (now >= *next_tb)
+ set_dec(1);
+ return now >= *next_tb;
+}
+
+/* This is called whenever we are re-enabling interrupts
+ * and returns either 0 (nothing to do) or 500/900 if there's
+ * either an EE or a DEC to generate.
+ *
+ * This is called in two contexts: From arch_local_irq_restore()
+ * before soft-enabling interrupts, and from the exception exit
+ * path when returning from an interrupt from a soft-disabled to
+ * a soft enabled context. In both case we have interrupts hard
+ * disabled.
+ *
+ * We take care of only clearing the bits we handled in the
+ * PACA irq_happened field since we can only re-emit one at a
+ * time and we don't want to "lose" one.
+ */
+notrace unsigned int __check_irq_replay(void)
+{
+ /*
+ * We use local_paca rather than get_paca() to avoid all
+ * the debug_smp_processor_id() business in this low level
+ * function
+ */
+ unsigned char happened = local_paca->irq_happened;
+
+ /* Clear bit 0 which we wouldn't clear otherwise */
+ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+
+ /*
+ * Force the delivery of pending soft-disabled interrupts on PS3.
+ * Any HV call will have this side effect.
+ */
+ if (firmware_has_feature(FW_FEATURE_PS3_LV1)) {
+ u64 tmp, tmp2;
+ lv1_get_version_info(&tmp, &tmp2);
+ }
+
+ /*
+ * We may have missed a decrementer interrupt. We check the
+ * decrementer itself rather than the paca irq_happened field
+ * in case we also had a rollover while hard disabled
+ */
+ local_paca->irq_happened &= ~PACA_IRQ_DEC;
+ if (decrementer_check_overflow())
+ return 0x900;
+
+ /* Finally check if an external interrupt happened */
+ local_paca->irq_happened &= ~PACA_IRQ_EE;
+ if (happened & PACA_IRQ_EE)
+ return 0x500;
+
+#ifdef CONFIG_PPC_BOOK3E
+ /* Finally check if an EPR external interrupt happened
+ * this bit is typically set if we need to handle another
+ * "edge" interrupt from within the MPIC "EPR" handler
+ */
+ local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE;
+ if (happened & PACA_IRQ_EE_EDGE)
+ return 0x500;
+
+ local_paca->irq_happened &= ~PACA_IRQ_DBELL;
+ if (happened & PACA_IRQ_DBELL)
+ return 0x280;
+#endif /* CONFIG_PPC_BOOK3E */
+
+ /* There should be nothing left ! */
+ BUG_ON(local_paca->irq_happened != 0);
+
+ return 0;
+}
+
+notrace void arch_local_irq_restore(unsigned long en)
+{
+ unsigned char irq_happened;
+ unsigned int replay;
+
+ /* Write the new soft-enabled value */
+ set_soft_enabled(en);
+ if (!en)
+ return;
+ /*
+ * From this point onward, we can take interrupts, preempt,
+ * etc... unless we got hard-disabled. We check if an event
+ * happened. If none happened, we know we can just return.
+ *
+ * We may have preempted before the check below, in which case
+ * we are checking the "new" CPU instead of the old one. This
+ * is only a problem if an event happened on the "old" CPU.
+ *
+ * External interrupt events will have caused interrupts to
+ * be hard-disabled, so there is no problem, we
+ * cannot have preempted.
+ */
+ irq_happened = get_irq_happened();
+ if (!irq_happened)
+ return;
+
+ /*
+ * We need to hard disable to get a trusted value from
+ * __check_irq_replay(). We also need to soft-disable
+ * again to avoid warnings in there due to the use of
+ * per-cpu variables.
+ *
+ * We know that if the value in irq_happened is exactly 0x01
+ * then we are already hard disabled (there are other less
+ * common cases that we'll ignore for now), so we skip the
+ * (expensive) mtmsrd.
+ */
+ if (unlikely(irq_happened != PACA_IRQ_HARD_DIS))
+ __hard_irq_disable();
+#ifdef CONFIG_TRACE_IRQFLAGS
+ else {
+ /*
+ * We should already be hard disabled here. We had bugs
+ * where that wasn't the case so let's dbl check it and
+ * warn if we are wrong. Only do that when IRQ tracing
+ * is enabled as mfmsr() can be costly.
+ */
+ if (WARN_ON(mfmsr() & MSR_EE))
+ __hard_irq_disable();
+ }
+#endif /* CONFIG_TRACE_IRQFLAG */
+
+ set_soft_enabled(0);
+
+ /*
+ * Check if anything needs to be re-emitted. We haven't
+ * soft-enabled yet to avoid warnings in decrementer_check_overflow
+ * accessing per-cpu variables
+ */
+ replay = __check_irq_replay();
+
+ /* We can soft-enable now */
+ set_soft_enabled(1);
+
+ /*
+ * And replay if we have to. This will return with interrupts
+ * hard-enabled.
+ */
+ if (replay) {
+ __replay_interrupt(replay);
+ return;
+ }
+
+ /* Finally, let's ensure we are hard enabled */
+ __hard_irq_enable();
+}
+EXPORT_SYMBOL(arch_local_irq_restore);
+
+/*
+ * This is specifically called by assembly code to re-enable interrupts
+ * if they are currently disabled. This is typically called before
+ * schedule() or do_signal() when returning to userspace. We do it
+ * in C to avoid the burden of dealing with lockdep etc...
+ *
+ * NOTE: This is called with interrupts hard disabled but not marked
+ * as such in paca->irq_happened, so we need to resync this.
+ */
+void notrace restore_interrupts(void)
+{
+ if (irqs_disabled()) {
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ local_irq_enable();
+ } else
+ __hard_irq_enable();
+}
+
+/*
+ * This is a helper to use when about to go into idle low-power
+ * when the latter has the side effect of re-enabling interrupts
+ * (such as calling H_CEDE under pHyp).
+ *
+ * You call this function with interrupts soft-disabled (this is
+ * already the case when ppc_md.power_save is called). The function
+ * will return whether to enter power save or just return.
+ *
+ * In the former case, it will have notified lockdep of interrupts
+ * being re-enabled and generally sanitized the lazy irq state,
+ * and in the latter case it will leave with interrupts hard
+ * disabled and marked as such, so the local_irq_enable() call
+ * in cpu_idle() will properly re-enable everything.
+ */
+bool prep_irq_for_idle(void)
+{
+ /*
+ * First we need to hard disable to ensure no interrupt
+ * occurs before we effectively enter the low power state
+ */
+ hard_irq_disable();
+
+ /*
+ * If anything happened while we were soft-disabled,
+ * we return now and do not enter the low power state.
+ */
+ if (lazy_irq_pending())
+ return false;
+
+ /* Tell lockdep we are about to re-enable */
+ trace_hardirqs_on();
+
+ /*
+ * Mark interrupts as soft-enabled and clear the
+ * PACA_IRQ_HARD_DIS from the pending mask since we
+ * are about to hard enable as well as a side effect
+ * of entering the low power state.
+ */
+ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+ local_paca->soft_enabled = 1;
+
+ /* Tell the caller to enter the low power state */
+ return true;
+}
+
+#endif /* CONFIG_PPC64 */
+
+int arch_show_interrupts(struct seq_file *p, int prec)
+{
+ int j;
+
+#if defined(CONFIG_PPC32) && defined(CONFIG_TAU_INT)
+ if (tau_initialized) {
+ seq_printf(p, "%*s: ", prec, "TAU");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", tau_interrupts(j));
+ seq_puts(p, " PowerPC Thermal Assist (cpu temp)\n");
+ }
+#endif /* CONFIG_PPC32 && CONFIG_TAU_INT */
+
+ seq_printf(p, "%*s: ", prec, "LOC");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs);
+ seq_printf(p, " Local timer interrupts\n");
+
+ seq_printf(p, "%*s: ", prec, "SPU");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", per_cpu(irq_stat, j).spurious_irqs);
+ seq_printf(p, " Spurious interrupts\n");
+
+ seq_printf(p, "%*s: ", prec, "CNT");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", per_cpu(irq_stat, j).pmu_irqs);
+ seq_printf(p, " Performance monitoring interrupts\n");
+
+ seq_printf(p, "%*s: ", prec, "MCE");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", per_cpu(irq_stat, j).mce_exceptions);
+ seq_printf(p, " Machine check exceptions\n");
+
+ return 0;
+}
+
+/*
+ * /proc/stat helpers
+ */
+u64 arch_irq_stat_cpu(unsigned int cpu)
+{
+ u64 sum = per_cpu(irq_stat, cpu).timer_irqs;
+
+ sum += per_cpu(irq_stat, cpu).pmu_irqs;
+ sum += per_cpu(irq_stat, cpu).mce_exceptions;
+ sum += per_cpu(irq_stat, cpu).spurious_irqs;
+
+ return sum;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+void migrate_irqs(void)
+{
+ struct irq_desc *desc;
+ unsigned int irq;
+ static int warned;
+ cpumask_var_t mask;
+ const struct cpumask *map = cpu_online_mask;
+
+ alloc_cpumask_var(&mask, GFP_KERNEL);
+
+ for_each_irq_desc(irq, desc) {
+ struct irq_data *data;
+ struct irq_chip *chip;
+
+ data = irq_desc_get_irq_data(desc);
+ if (irqd_is_per_cpu(data))
+ continue;
+
+ chip = irq_data_get_irq_chip(data);
+
+ cpumask_and(mask, data->affinity, map);
+ if (cpumask_any(mask) >= nr_cpu_ids) {
+ printk("Breaking affinity for irq %i\n", irq);
+ cpumask_copy(mask, map);
+ }
+ if (chip->irq_set_affinity)
+ chip->irq_set_affinity(data, mask, true);
+ else if (desc->action && !(warned++))
+ printk("Cannot set affinity for irq %i\n", irq);
+ }
+
+ free_cpumask_var(mask);
+
+ local_irq_enable();
+ mdelay(1);
+ local_irq_disable();
+}
+#endif
+
+static inline void handle_one_irq(unsigned int irq)
+{
+ struct thread_info *curtp, *irqtp;
+ unsigned long saved_sp_limit;
+ struct irq_desc *desc;
+
+ desc = irq_to_desc(irq);
+ if (!desc)
+ return;
+
+ /* Switch to the irq stack to handle this */
+ curtp = current_thread_info();
+ irqtp = hardirq_ctx[smp_processor_id()];
+
+ if (curtp == irqtp) {
+ /* We're already on the irq stack, just handle it */
+ desc->handle_irq(irq, desc);
+ return;
+ }
+
+ saved_sp_limit = current->thread.ksp_limit;
+
+ irqtp->task = curtp->task;
+ irqtp->flags = 0;
+
+ /* Copy the softirq bits in preempt_count so that the
+ * softirq checks work in the hardirq context. */
+ irqtp->preempt_count = (irqtp->preempt_count & ~SOFTIRQ_MASK) |
+ (curtp->preempt_count & SOFTIRQ_MASK);
+
+ current->thread.ksp_limit = (unsigned long)irqtp +
+ _ALIGN_UP(sizeof(struct thread_info), 16);
+
+ call_handle_irq(irq, desc, irqtp, desc->handle_irq);
+ current->thread.ksp_limit = saved_sp_limit;
+ irqtp->task = NULL;
+
+ /* Set any flag that may have been set on the
+ * alternate stack
+ */
+ if (irqtp->flags)
+ set_bits(irqtp->flags, &curtp->flags);
+}
+
+static inline void check_stack_overflow(void)
+{
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+ long sp;
+
+ sp = __get_SP() & (THREAD_SIZE-1);
+
+ /* check for stack overflow: is there less than 2KB free? */
+ if (unlikely(sp < (sizeof(struct thread_info) + 2048))) {
+ printk("do_IRQ: stack overflow: %ld\n",
+ sp - sizeof(struct thread_info));
+ dump_stack();
+ }
+#endif
+}
+
+void do_IRQ(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+ unsigned int irq;
+
+ trace_irq_entry(regs);
+
+ irq_enter();
+
+ check_stack_overflow();
+
+ /*
+ * Query the platform PIC for the interrupt & ack it.
+ *
+ * This will typically lower the interrupt line to the CPU
+ */
+ irq = ppc_md.get_irq();
+
+ /* We can hard enable interrupts now */
+ may_hard_irq_enable();
+
+ /* And finally process it */
+ if (irq != NO_IRQ)
+ handle_one_irq(irq);
+ else
+ __get_cpu_var(irq_stat).spurious_irqs++;
+
+ irq_exit();
+ set_irq_regs(old_regs);
+
+ trace_irq_exit(regs);
+}
+
+void __init init_IRQ(void)
+{
+ if (ppc_md.init_IRQ)
+ ppc_md.init_IRQ();
+
+ exc_lvl_ctx_init();
+
+ irq_ctx_init();
+}
+
+#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+struct thread_info *critirq_ctx[NR_CPUS] __read_mostly;
+struct thread_info *dbgirq_ctx[NR_CPUS] __read_mostly;
+struct thread_info *mcheckirq_ctx[NR_CPUS] __read_mostly;
+
+void exc_lvl_ctx_init(void)
+{
+ struct thread_info *tp;
+ int i, cpu_nr;
+
+ for_each_possible_cpu(i) {
+#ifdef CONFIG_PPC64
+ cpu_nr = i;
+#else
+ cpu_nr = get_hard_smp_processor_id(i);
+#endif
+ memset((void *)critirq_ctx[cpu_nr], 0, THREAD_SIZE);
+ tp = critirq_ctx[cpu_nr];
+ tp->cpu = cpu_nr;
+ tp->preempt_count = 0;
+
+#ifdef CONFIG_BOOKE
+ memset((void *)dbgirq_ctx[cpu_nr], 0, THREAD_SIZE);
+ tp = dbgirq_ctx[cpu_nr];
+ tp->cpu = cpu_nr;
+ tp->preempt_count = 0;
+
+ memset((void *)mcheckirq_ctx[cpu_nr], 0, THREAD_SIZE);
+ tp = mcheckirq_ctx[cpu_nr];
+ tp->cpu = cpu_nr;
+ tp->preempt_count = HARDIRQ_OFFSET;
+#endif
+ }
+}
+#endif
+
+struct thread_info *softirq_ctx[NR_CPUS] __read_mostly;
+struct thread_info *hardirq_ctx[NR_CPUS] __read_mostly;
+
+void irq_ctx_init(void)
+{
+ struct thread_info *tp;
+ int i;
+
+ for_each_possible_cpu(i) {
+ memset((void *)softirq_ctx[i], 0, THREAD_SIZE);
+ tp = softirq_ctx[i];
+ tp->cpu = i;
+ tp->preempt_count = 0;
+
+ memset((void *)hardirq_ctx[i], 0, THREAD_SIZE);
+ tp = hardirq_ctx[i];
+ tp->cpu = i;
+ tp->preempt_count = HARDIRQ_OFFSET;
+ }
+}
+
+static inline void do_softirq_onstack(void)
+{
+ struct thread_info *curtp, *irqtp;
+ unsigned long saved_sp_limit = current->thread.ksp_limit;
+
+ curtp = current_thread_info();
+ irqtp = softirq_ctx[smp_processor_id()];
+ irqtp->task = curtp->task;
+ irqtp->flags = 0;
+ current->thread.ksp_limit = (unsigned long)irqtp +
+ _ALIGN_UP(sizeof(struct thread_info), 16);
+ call_do_softirq(irqtp);
+ current->thread.ksp_limit = saved_sp_limit;
+ irqtp->task = NULL;
+
+ /* Set any flag that may have been set on the
+ * alternate stack
+ */
+ if (irqtp->flags)
+ set_bits(irqtp->flags, &curtp->flags);
+}
+
+void do_softirq(void)
+{
+ unsigned long flags;
+
+ if (in_interrupt())
+ return;
+
+ local_irq_save(flags);
+
+ if (local_softirq_pending())
+ do_softirq_onstack();
+
+ local_irq_restore(flags);
+}
+
+irq_hw_number_t virq_to_hw(unsigned int virq)
+{
+ struct irq_data *irq_data = irq_get_irq_data(virq);
+ return WARN_ON(!irq_data) ? 0 : irq_data->hwirq;
+}
+EXPORT_SYMBOL_GPL(virq_to_hw);
+
+#ifdef CONFIG_SMP
+int irq_choose_cpu(const struct cpumask *mask)
+{
+ int cpuid;
+
+ if (cpumask_equal(mask, cpu_all_mask)) {
+ static int irq_rover;
+ static DEFINE_RAW_SPINLOCK(irq_rover_lock);
+ unsigned long flags;
+
+ /* Round-robin distribution... */
+do_round_robin:
+ raw_spin_lock_irqsave(&irq_rover_lock, flags);
+
+ irq_rover = cpumask_next(irq_rover, cpu_online_mask);
+ if (irq_rover >= nr_cpu_ids)
+ irq_rover = cpumask_first(cpu_online_mask);
+
+ cpuid = irq_rover;
+
+ raw_spin_unlock_irqrestore(&irq_rover_lock, flags);
+ } else {
+ cpuid = cpumask_first_and(mask, cpu_online_mask);
+ if (cpuid >= nr_cpu_ids)
+ goto do_round_robin;
+ }
+
+ return get_hard_smp_processor_id(cpuid);
+}
+#else
+int irq_choose_cpu(const struct cpumask *mask)
+{
+ return hard_smp_processor_id();
+}
+#endif
+
+int arch_early_irq_init(void)
+{
+ return 0;
+}
+
+#ifdef CONFIG_PPC64
+static int __init setup_noirqdistrib(char *str)
+{
+ distribute_irqs = 0;
+ return 1;
+}
+
+__setup("noirqdistrib", setup_noirqdistrib);
+#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c
new file mode 100644
index 00000000..d45ec587
--- /dev/null
+++ b/arch/powerpc/kernel/isa-bridge.c
@@ -0,0 +1,266 @@
+/*
+ * Routines for tracking a legacy ISA bridge
+ *
+ * Copyrigh 2007 Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp.
+ *
+ * Some bits and pieces moved over from pci_64.c
+ *
+ * Copyrigh 2003 Anton Blanchard <anton@au.ibm.com>, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define DEBUG
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/notifier.h>
+
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/ppc-pci.h>
+
+unsigned long isa_io_base; /* NULL if no ISA bus */
+EXPORT_SYMBOL(isa_io_base);
+
+/* Cached ISA bridge dev. */
+static struct device_node *isa_bridge_devnode;
+struct pci_dev *isa_bridge_pcidev;
+EXPORT_SYMBOL_GPL(isa_bridge_pcidev);
+
+#define ISA_SPACE_MASK 0x1
+#define ISA_SPACE_IO 0x1
+
+static void __devinit pci_process_ISA_OF_ranges(struct device_node *isa_node,
+ unsigned long phb_io_base_phys)
+{
+ /* We should get some saner parsing here and remove these structs */
+ struct pci_address {
+ u32 a_hi;
+ u32 a_mid;
+ u32 a_lo;
+ };
+
+ struct isa_address {
+ u32 a_hi;
+ u32 a_lo;
+ };
+
+ struct isa_range {
+ struct isa_address isa_addr;
+ struct pci_address pci_addr;
+ unsigned int size;
+ };
+
+ const struct isa_range *range;
+ unsigned long pci_addr;
+ unsigned int isa_addr;
+ unsigned int size;
+ int rlen = 0;
+
+ range = of_get_property(isa_node, "ranges", &rlen);
+ if (range == NULL || (rlen < sizeof(struct isa_range)))
+ goto inval_range;
+
+ /* From "ISA Binding to 1275"
+ * The ranges property is laid out as an array of elements,
+ * each of which comprises:
+ * cells 0 - 1: an ISA address
+ * cells 2 - 4: a PCI address
+ * (size depending on dev->n_addr_cells)
+ * cell 5: the size of the range
+ */
+ if ((range->isa_addr.a_hi & ISA_SPACE_MASK) != ISA_SPACE_IO) {
+ range++;
+ rlen -= sizeof(struct isa_range);
+ if (rlen < sizeof(struct isa_range))
+ goto inval_range;
+ }
+ if ((range->isa_addr.a_hi & ISA_SPACE_MASK) != ISA_SPACE_IO)
+ goto inval_range;
+
+ isa_addr = range->isa_addr.a_lo;
+ pci_addr = (unsigned long) range->pci_addr.a_mid << 32 |
+ range->pci_addr.a_lo;
+
+ /* Assume these are both zero. Note: We could fix that and
+ * do a proper parsing instead ... oh well, that will do for
+ * now as nobody uses fancy mappings for ISA bridges
+ */
+ if ((pci_addr != 0) || (isa_addr != 0)) {
+ printk(KERN_ERR "unexpected isa to pci mapping: %s\n",
+ __func__);
+ return;
+ }
+
+ /* Align size and make sure it's cropped to 64K */
+ size = PAGE_ALIGN(range->size);
+ if (size > 0x10000)
+ size = 0x10000;
+
+ __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
+ size, _PAGE_NO_CACHE|_PAGE_GUARDED);
+ return;
+
+inval_range:
+ printk(KERN_ERR "no ISA IO ranges or unexpected isa range, "
+ "mapping 64k\n");
+ __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
+ 0x10000, _PAGE_NO_CACHE|_PAGE_GUARDED);
+}
+
+
+/**
+ * isa_bridge_find_early - Find and map the ISA IO space early before
+ * main PCI discovery. This is optionally called by
+ * the arch code when adding PCI PHBs to get early
+ * access to ISA IO ports
+ */
+void __init isa_bridge_find_early(struct pci_controller *hose)
+{
+ struct device_node *np, *parent = NULL, *tmp;
+
+ /* If we already have an ISA bridge, bail off */
+ if (isa_bridge_devnode != NULL)
+ return;
+
+ /* For each "isa" node in the system. Note : we do a search by
+ * type and not by name. It might be better to do by name but that's
+ * what the code used to do and I don't want to break too much at
+ * once. We can look into changing that separately
+ */
+ for_each_node_by_type(np, "isa") {
+ /* Look for our hose being a parent */
+ for (parent = of_get_parent(np); parent;) {
+ if (parent == hose->dn) {
+ of_node_put(parent);
+ break;
+ }
+ tmp = parent;
+ parent = of_get_parent(parent);
+ of_node_put(tmp);
+ }
+ if (parent != NULL)
+ break;
+ }
+ if (np == NULL)
+ return;
+ isa_bridge_devnode = np;
+
+ /* Now parse the "ranges" property and setup the ISA mapping */
+ pci_process_ISA_OF_ranges(np, hose->io_base_phys);
+
+ /* Set the global ISA io base to indicate we have an ISA bridge */
+ isa_io_base = ISA_IO_BASE;
+
+ pr_debug("ISA bridge (early) is %s\n", np->full_name);
+}
+
+/**
+ * isa_bridge_find_late - Find and map the ISA IO space upon discovery of
+ * a new ISA bridge
+ */
+static void __devinit isa_bridge_find_late(struct pci_dev *pdev,
+ struct device_node *devnode)
+{
+ struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+
+ /* Store ISA device node and PCI device */
+ isa_bridge_devnode = of_node_get(devnode);
+ isa_bridge_pcidev = pdev;
+
+ /* Now parse the "ranges" property and setup the ISA mapping */
+ pci_process_ISA_OF_ranges(devnode, hose->io_base_phys);
+
+ /* Set the global ISA io base to indicate we have an ISA bridge */
+ isa_io_base = ISA_IO_BASE;
+
+ pr_debug("ISA bridge (late) is %s on %s\n",
+ devnode->full_name, pci_name(pdev));
+}
+
+/**
+ * isa_bridge_remove - Remove/unmap an ISA bridge
+ */
+static void isa_bridge_remove(void)
+{
+ pr_debug("ISA bridge removed !\n");
+
+ /* Clear the global ISA io base to indicate that we have no more
+ * ISA bridge. Note that drivers don't quite handle that, though
+ * we should probably do something about it. But do we ever really
+ * have ISA bridges being removed on machines using legacy devices ?
+ */
+ isa_io_base = ISA_IO_BASE;
+
+ /* Clear references to the bridge */
+ of_node_put(isa_bridge_devnode);
+ isa_bridge_devnode = NULL;
+ isa_bridge_pcidev = NULL;
+
+ /* Unmap the ISA area */
+ __iounmap_at((void *)ISA_IO_BASE, 0x10000);
+}
+
+/**
+ * isa_bridge_notify - Get notified of PCI devices addition/removal
+ */
+static int __devinit isa_bridge_notify(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct device *dev = data;
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct device_node *devnode = pci_device_to_OF_node(pdev);
+
+ switch(action) {
+ case BUS_NOTIFY_ADD_DEVICE:
+ /* Check if we have an early ISA device, without PCI dev */
+ if (isa_bridge_devnode && isa_bridge_devnode == devnode &&
+ !isa_bridge_pcidev) {
+ pr_debug("ISA bridge PCI attached: %s\n",
+ pci_name(pdev));
+ isa_bridge_pcidev = pdev;
+ }
+
+ /* Check if we have no ISA device, and this happens to be one,
+ * register it as such if it has an OF device
+ */
+ if (!isa_bridge_devnode && devnode && devnode->type &&
+ !strcmp(devnode->type, "isa"))
+ isa_bridge_find_late(pdev, devnode);
+
+ return 0;
+ case BUS_NOTIFY_DEL_DEVICE:
+ /* Check if this our existing ISA device */
+ if (pdev == isa_bridge_pcidev ||
+ (devnode && devnode == isa_bridge_devnode))
+ isa_bridge_remove();
+ return 0;
+ }
+ return 0;
+}
+
+static struct notifier_block isa_bridge_notifier = {
+ .notifier_call = isa_bridge_notify
+};
+
+/**
+ * isa_bridge_init - register to be notified of ISA bridge addition/removal
+ *
+ */
+static int __init isa_bridge_init(void)
+{
+ bus_register_notifier(&pci_bus_type, &isa_bridge_notifier);
+ return 0;
+}
+arch_initcall(isa_bridge_init);
diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c
new file mode 100644
index 00000000..a1ed8a8c
--- /dev/null
+++ b/arch/powerpc/kernel/jump_label.c
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2010 Michael Ellerman, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/jump_label.h>
+#include <asm/code-patching.h>
+
+#ifdef HAVE_JUMP_LABEL
+void arch_jump_label_transform(struct jump_entry *entry,
+ enum jump_label_type type)
+{
+ u32 *addr = (u32 *)(unsigned long)entry->code;
+
+ if (type == JUMP_LABEL_ENABLE)
+ patch_branch(addr, entry->target, 0);
+ else
+ patch_instruction(addr, PPC_INST_NOP);
+}
+#endif
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
new file mode 100644
index 00000000..782bd0a3
--- /dev/null
+++ b/arch/powerpc/kernel/kgdb.c
@@ -0,0 +1,473 @@
+/*
+ * PowerPC backend to the KGDB stub.
+ *
+ * 1998 (c) Michael AK Tesch (tesch@cs.wisc.edu)
+ * Copyright (C) 2003 Timesys Corporation.
+ * Copyright (C) 2004-2006 MontaVista Software, Inc.
+ * PPC64 Mods (C) 2005 Frank Rowand (frowand@mvista.com)
+ * PPC32 support restored by Vitaly Wool <vwool@ru.mvista.com> and
+ * Sergei Shtylyov <sshtylyov@ru.mvista.com>
+ * Copyright (C) 2007-2008 Wind River Systems, Inc.
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program as licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/kgdb.h>
+#include <linux/smp.h>
+#include <linux/signal.h>
+#include <linux/ptrace.h>
+#include <linux/kdebug.h>
+#include <asm/current.h>
+#include <asm/processor.h>
+#include <asm/machdep.h>
+#include <asm/debug.h>
+
+/*
+ * This table contains the mapping between PowerPC hardware trap types, and
+ * signals, which are primarily what GDB understands. GDB and the kernel
+ * don't always agree on values, so we use constants taken from gdb-6.2.
+ */
+static struct hard_trap_info
+{
+ unsigned int tt; /* Trap type code for powerpc */
+ unsigned char signo; /* Signal that we map this trap into */
+} hard_trap_info[] = {
+ { 0x0100, 0x02 /* SIGINT */ }, /* system reset */
+ { 0x0200, 0x0b /* SIGSEGV */ }, /* machine check */
+ { 0x0300, 0x0b /* SIGSEGV */ }, /* data access */
+ { 0x0400, 0x0b /* SIGSEGV */ }, /* instruction access */
+ { 0x0500, 0x02 /* SIGINT */ }, /* external interrupt */
+ { 0x0600, 0x0a /* SIGBUS */ }, /* alignment */
+ { 0x0700, 0x05 /* SIGTRAP */ }, /* program check */
+ { 0x0800, 0x08 /* SIGFPE */ }, /* fp unavailable */
+ { 0x0900, 0x0e /* SIGALRM */ }, /* decrementer */
+ { 0x0c00, 0x14 /* SIGCHLD */ }, /* system call */
+#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
+ { 0x2002, 0x05 /* SIGTRAP */ }, /* debug */
+#if defined(CONFIG_FSL_BOOKE)
+ { 0x2010, 0x08 /* SIGFPE */ }, /* spe unavailable */
+ { 0x2020, 0x08 /* SIGFPE */ }, /* spe unavailable */
+ { 0x2030, 0x08 /* SIGFPE */ }, /* spe fp data */
+ { 0x2040, 0x08 /* SIGFPE */ }, /* spe fp data */
+ { 0x2050, 0x08 /* SIGFPE */ }, /* spe fp round */
+ { 0x2060, 0x0e /* SIGILL */ }, /* performance monitor */
+ { 0x2900, 0x08 /* SIGFPE */ }, /* apu unavailable */
+ { 0x3100, 0x0e /* SIGALRM */ }, /* fixed interval timer */
+ { 0x3200, 0x02 /* SIGINT */ }, /* watchdog */
+#else /* ! CONFIG_FSL_BOOKE */
+ { 0x1000, 0x0e /* SIGALRM */ }, /* prog interval timer */
+ { 0x1010, 0x0e /* SIGALRM */ }, /* fixed interval timer */
+ { 0x1020, 0x02 /* SIGINT */ }, /* watchdog */
+ { 0x2010, 0x08 /* SIGFPE */ }, /* fp unavailable */
+ { 0x2020, 0x08 /* SIGFPE */ }, /* ap unavailable */
+#endif
+#else /* ! (defined(CONFIG_40x) || defined(CONFIG_BOOKE)) */
+ { 0x0d00, 0x05 /* SIGTRAP */ }, /* single-step */
+#if defined(CONFIG_8xx)
+ { 0x1000, 0x04 /* SIGILL */ }, /* software emulation */
+#else /* ! CONFIG_8xx */
+ { 0x0f00, 0x04 /* SIGILL */ }, /* performance monitor */
+ { 0x0f20, 0x08 /* SIGFPE */ }, /* altivec unavailable */
+ { 0x1300, 0x05 /* SIGTRAP */ }, /* instruction address break */
+#if defined(CONFIG_PPC64)
+ { 0x1200, 0x05 /* SIGILL */ }, /* system error */
+ { 0x1500, 0x04 /* SIGILL */ }, /* soft patch */
+ { 0x1600, 0x04 /* SIGILL */ }, /* maintenance */
+ { 0x1700, 0x08 /* SIGFPE */ }, /* altivec assist */
+ { 0x1800, 0x04 /* SIGILL */ }, /* thermal */
+#else /* ! CONFIG_PPC64 */
+ { 0x1400, 0x02 /* SIGINT */ }, /* SMI */
+ { 0x1600, 0x08 /* SIGFPE */ }, /* altivec assist */
+ { 0x1700, 0x04 /* SIGILL */ }, /* TAU */
+ { 0x2000, 0x05 /* SIGTRAP */ }, /* run mode */
+#endif
+#endif
+#endif
+ { 0x0000, 0x00 } /* Must be last */
+};
+
+static int computeSignal(unsigned int tt)
+{
+ struct hard_trap_info *ht;
+
+ for (ht = hard_trap_info; ht->tt && ht->signo; ht++)
+ if (ht->tt == tt)
+ return ht->signo;
+
+ return SIGHUP; /* default for things we don't know about */
+}
+
+static int kgdb_call_nmi_hook(struct pt_regs *regs)
+{
+ kgdb_nmicallback(raw_smp_processor_id(), regs);
+ return 0;
+}
+
+#ifdef CONFIG_SMP
+void kgdb_roundup_cpus(unsigned long flags)
+{
+ smp_send_debugger_break();
+}
+#endif
+
+/* KGDB functions to use existing PowerPC64 hooks. */
+static int kgdb_debugger(struct pt_regs *regs)
+{
+ return !kgdb_handle_exception(1, computeSignal(TRAP(regs)),
+ DIE_OOPS, regs);
+}
+
+static int kgdb_handle_breakpoint(struct pt_regs *regs)
+{
+ if (user_mode(regs))
+ return 0;
+
+ if (kgdb_handle_exception(1, SIGTRAP, 0, regs) != 0)
+ return 0;
+
+ if (*(u32 *) (regs->nip) == *(u32 *) (&arch_kgdb_ops.gdb_bpt_instr))
+ regs->nip += BREAK_INSTR_SIZE;
+
+ return 1;
+}
+
+static int kgdb_singlestep(struct pt_regs *regs)
+{
+ struct thread_info *thread_info, *exception_thread_info;
+
+ if (user_mode(regs))
+ return 0;
+
+ /*
+ * On Book E and perhaps other processors, singlestep is handled on
+ * the critical exception stack. This causes current_thread_info()
+ * to fail, since it it locates the thread_info by masking off
+ * the low bits of the current stack pointer. We work around
+ * this issue by copying the thread_info from the kernel stack
+ * before calling kgdb_handle_exception, and copying it back
+ * afterwards. On most processors the copy is avoided since
+ * exception_thread_info == thread_info.
+ */
+ thread_info = (struct thread_info *)(regs->gpr[1] & ~(THREAD_SIZE-1));
+ exception_thread_info = current_thread_info();
+
+ if (thread_info != exception_thread_info)
+ memcpy(exception_thread_info, thread_info, sizeof *thread_info);
+
+ kgdb_handle_exception(0, SIGTRAP, 0, regs);
+
+ if (thread_info != exception_thread_info)
+ memcpy(thread_info, exception_thread_info, sizeof *thread_info);
+
+ return 1;
+}
+
+static int kgdb_iabr_match(struct pt_regs *regs)
+{
+ if (user_mode(regs))
+ return 0;
+
+ if (kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs) != 0)
+ return 0;
+ return 1;
+}
+
+static int kgdb_dabr_match(struct pt_regs *regs)
+{
+ if (user_mode(regs))
+ return 0;
+
+ if (kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs) != 0)
+ return 0;
+ return 1;
+}
+
+#define PACK64(ptr, src) do { *(ptr++) = (src); } while (0)
+
+#define PACK32(ptr, src) do { \
+ u32 *ptr32; \
+ ptr32 = (u32 *)ptr; \
+ *(ptr32++) = (src); \
+ ptr = (unsigned long *)ptr32; \
+ } while (0)
+
+void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
+{
+ struct pt_regs *regs = (struct pt_regs *)(p->thread.ksp +
+ STACK_FRAME_OVERHEAD);
+ unsigned long *ptr = gdb_regs;
+ int reg;
+
+ memset(gdb_regs, 0, NUMREGBYTES);
+
+ /* Regs GPR0-2 */
+ for (reg = 0; reg < 3; reg++)
+ PACK64(ptr, regs->gpr[reg]);
+
+ /* Regs GPR3-13 are caller saved, not in regs->gpr[] */
+ ptr += 11;
+
+ /* Regs GPR14-31 */
+ for (reg = 14; reg < 32; reg++)
+ PACK64(ptr, regs->gpr[reg]);
+
+#ifdef CONFIG_FSL_BOOKE
+#ifdef CONFIG_SPE
+ for (reg = 0; reg < 32; reg++)
+ PACK64(ptr, p->thread.evr[reg]);
+#else
+ ptr += 32;
+#endif
+#else
+ /* fp registers not used by kernel, leave zero */
+ ptr += 32 * 8 / sizeof(long);
+#endif
+
+ PACK64(ptr, regs->nip);
+ PACK64(ptr, regs->msr);
+ PACK32(ptr, regs->ccr);
+ PACK64(ptr, regs->link);
+ PACK64(ptr, regs->ctr);
+ PACK32(ptr, regs->xer);
+
+ BUG_ON((unsigned long)ptr >
+ (unsigned long)(((void *)gdb_regs) + NUMREGBYTES));
+}
+
+#define GDB_SIZEOF_REG sizeof(unsigned long)
+#define GDB_SIZEOF_REG_U32 sizeof(u32)
+
+#ifdef CONFIG_FSL_BOOKE
+#define GDB_SIZEOF_FLOAT_REG sizeof(unsigned long)
+#else
+#define GDB_SIZEOF_FLOAT_REG sizeof(u64)
+#endif
+
+struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
+{
+ { "r0", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[0]) },
+ { "r1", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[1]) },
+ { "r2", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[2]) },
+ { "r3", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[3]) },
+ { "r4", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[4]) },
+ { "r5", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[5]) },
+ { "r6", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[6]) },
+ { "r7", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[7]) },
+ { "r8", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[8]) },
+ { "r9", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[9]) },
+ { "r10", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[10]) },
+ { "r11", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[11]) },
+ { "r12", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[12]) },
+ { "r13", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[13]) },
+ { "r14", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[14]) },
+ { "r15", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[15]) },
+ { "r16", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[16]) },
+ { "r17", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[17]) },
+ { "r18", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[18]) },
+ { "r19", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[19]) },
+ { "r20", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[20]) },
+ { "r21", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[21]) },
+ { "r22", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[22]) },
+ { "r23", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[23]) },
+ { "r24", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[24]) },
+ { "r25", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[25]) },
+ { "r26", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[26]) },
+ { "r27", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[27]) },
+ { "r28", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[28]) },
+ { "r29", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[29]) },
+ { "r30", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[30]) },
+ { "r31", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[31]) },
+
+ { "f0", GDB_SIZEOF_FLOAT_REG, 0 },
+ { "f1", GDB_SIZEOF_FLOAT_REG, 1 },
+ { "f2", GDB_SIZEOF_FLOAT_REG, 2 },
+ { "f3", GDB_SIZEOF_FLOAT_REG, 3 },
+ { "f4", GDB_SIZEOF_FLOAT_REG, 4 },
+ { "f5", GDB_SIZEOF_FLOAT_REG, 5 },
+ { "f6", GDB_SIZEOF_FLOAT_REG, 6 },
+ { "f7", GDB_SIZEOF_FLOAT_REG, 7 },
+ { "f8", GDB_SIZEOF_FLOAT_REG, 8 },
+ { "f9", GDB_SIZEOF_FLOAT_REG, 9 },
+ { "f10", GDB_SIZEOF_FLOAT_REG, 10 },
+ { "f11", GDB_SIZEOF_FLOAT_REG, 11 },
+ { "f12", GDB_SIZEOF_FLOAT_REG, 12 },
+ { "f13", GDB_SIZEOF_FLOAT_REG, 13 },
+ { "f14", GDB_SIZEOF_FLOAT_REG, 14 },
+ { "f15", GDB_SIZEOF_FLOAT_REG, 15 },
+ { "f16", GDB_SIZEOF_FLOAT_REG, 16 },
+ { "f17", GDB_SIZEOF_FLOAT_REG, 17 },
+ { "f18", GDB_SIZEOF_FLOAT_REG, 18 },
+ { "f19", GDB_SIZEOF_FLOAT_REG, 19 },
+ { "f20", GDB_SIZEOF_FLOAT_REG, 20 },
+ { "f21", GDB_SIZEOF_FLOAT_REG, 21 },
+ { "f22", GDB_SIZEOF_FLOAT_REG, 22 },
+ { "f23", GDB_SIZEOF_FLOAT_REG, 23 },
+ { "f24", GDB_SIZEOF_FLOAT_REG, 24 },
+ { "f25", GDB_SIZEOF_FLOAT_REG, 25 },
+ { "f26", GDB_SIZEOF_FLOAT_REG, 26 },
+ { "f27", GDB_SIZEOF_FLOAT_REG, 27 },
+ { "f28", GDB_SIZEOF_FLOAT_REG, 28 },
+ { "f29", GDB_SIZEOF_FLOAT_REG, 29 },
+ { "f30", GDB_SIZEOF_FLOAT_REG, 30 },
+ { "f31", GDB_SIZEOF_FLOAT_REG, 31 },
+
+ { "pc", GDB_SIZEOF_REG, offsetof(struct pt_regs, nip) },
+ { "msr", GDB_SIZEOF_REG, offsetof(struct pt_regs, msr) },
+ { "cr", GDB_SIZEOF_REG_U32, offsetof(struct pt_regs, ccr) },
+ { "lr", GDB_SIZEOF_REG, offsetof(struct pt_regs, link) },
+ { "ctr", GDB_SIZEOF_REG_U32, offsetof(struct pt_regs, ctr) },
+ { "xer", GDB_SIZEOF_REG, offsetof(struct pt_regs, xer) },
+};
+
+char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
+{
+ if (regno >= DBG_MAX_REG_NUM || regno < 0)
+ return NULL;
+
+ if (regno < 32 || regno >= 64)
+ /* First 0 -> 31 gpr registers*/
+ /* pc, msr, ls... registers 64 -> 69 */
+ memcpy(mem, (void *)regs + dbg_reg_def[regno].offset,
+ dbg_reg_def[regno].size);
+
+ if (regno >= 32 && regno < 64) {
+ /* FP registers 32 -> 63 */
+#if defined(CONFIG_FSL_BOOKE) && defined(CONFIG_SPE)
+ if (current)
+ memcpy(mem, &current->thread.evr[regno-32],
+ dbg_reg_def[regno].size);
+#else
+ /* fp registers not used by kernel, leave zero */
+ memset(mem, 0, dbg_reg_def[regno].size);
+#endif
+ }
+
+ return dbg_reg_def[regno].name;
+}
+
+int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
+{
+ if (regno >= DBG_MAX_REG_NUM || regno < 0)
+ return -EINVAL;
+
+ if (regno < 32 || regno >= 64)
+ /* First 0 -> 31 gpr registers*/
+ /* pc, msr, ls... registers 64 -> 69 */
+ memcpy((void *)regs + dbg_reg_def[regno].offset, mem,
+ dbg_reg_def[regno].size);
+
+ if (regno >= 32 && regno < 64) {
+ /* FP registers 32 -> 63 */
+#if defined(CONFIG_FSL_BOOKE) && defined(CONFIG_SPE)
+ memcpy(&current->thread.evr[regno-32], mem,
+ dbg_reg_def[regno].size);
+#else
+ /* fp registers not used by kernel, leave zero */
+ return 0;
+#endif
+ }
+
+ return 0;
+}
+
+void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
+{
+ regs->nip = pc;
+}
+
+/*
+ * This function does PowerPC specific procesing for interfacing to gdb.
+ */
+int kgdb_arch_handle_exception(int vector, int signo, int err_code,
+ char *remcom_in_buffer, char *remcom_out_buffer,
+ struct pt_regs *linux_regs)
+{
+ char *ptr = &remcom_in_buffer[1];
+ unsigned long addr;
+
+ switch (remcom_in_buffer[0]) {
+ /*
+ * sAA..AA Step one instruction from AA..AA
+ * This will return an error to gdb ..
+ */
+ case 's':
+ case 'c':
+ /* handle the optional parameter */
+ if (kgdb_hex2long(&ptr, &addr))
+ linux_regs->nip = addr;
+
+ atomic_set(&kgdb_cpu_doing_single_step, -1);
+ /* set the trace bit if we're stepping */
+ if (remcom_in_buffer[0] == 's') {
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+ mtspr(SPRN_DBCR0,
+ mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM);
+ linux_regs->msr |= MSR_DE;
+#else
+ linux_regs->msr |= MSR_SE;
+#endif
+ kgdb_single_step = 1;
+ atomic_set(&kgdb_cpu_doing_single_step,
+ raw_smp_processor_id());
+ }
+ return 0;
+ }
+
+ return -1;
+}
+
+/*
+ * Global data
+ */
+struct kgdb_arch arch_kgdb_ops = {
+ .gdb_bpt_instr = {0x7d, 0x82, 0x10, 0x08},
+};
+
+static int kgdb_not_implemented(struct pt_regs *regs)
+{
+ return 0;
+}
+
+static void *old__debugger_ipi;
+static void *old__debugger;
+static void *old__debugger_bpt;
+static void *old__debugger_sstep;
+static void *old__debugger_iabr_match;
+static void *old__debugger_dabr_match;
+static void *old__debugger_fault_handler;
+
+int kgdb_arch_init(void)
+{
+ old__debugger_ipi = __debugger_ipi;
+ old__debugger = __debugger;
+ old__debugger_bpt = __debugger_bpt;
+ old__debugger_sstep = __debugger_sstep;
+ old__debugger_iabr_match = __debugger_iabr_match;
+ old__debugger_dabr_match = __debugger_dabr_match;
+ old__debugger_fault_handler = __debugger_fault_handler;
+
+ __debugger_ipi = kgdb_call_nmi_hook;
+ __debugger = kgdb_debugger;
+ __debugger_bpt = kgdb_handle_breakpoint;
+ __debugger_sstep = kgdb_singlestep;
+ __debugger_iabr_match = kgdb_iabr_match;
+ __debugger_dabr_match = kgdb_dabr_match;
+ __debugger_fault_handler = kgdb_not_implemented;
+
+ return 0;
+}
+
+void kgdb_arch_exit(void)
+{
+ __debugger_ipi = old__debugger_ipi;
+ __debugger = old__debugger;
+ __debugger_bpt = old__debugger_bpt;
+ __debugger_sstep = old__debugger_sstep;
+ __debugger_iabr_match = old__debugger_iabr_match;
+ __debugger_dabr_match = old__debugger_dabr_match;
+ __debugger_fault_handler = old__debugger_fault_handler;
+}
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
new file mode 100644
index 00000000..e88c6433
--- /dev/null
+++ b/arch/powerpc/kernel/kprobes.c
@@ -0,0 +1,574 @@
+/*
+ * Kernel Probes (KProbes)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ *
+ * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
+ * Probes initial implementation ( includes contributions from
+ * Rusty Russell).
+ * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
+ * interface to access function arguments.
+ * 2004-Nov Ananth N Mavinakayanahalli <ananth@in.ibm.com> kprobes port
+ * for PPC64
+ */
+
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/preempt.h>
+#include <linux/module.h>
+#include <linux/kdebug.h>
+#include <linux/slab.h>
+#include <asm/cacheflush.h>
+#include <asm/sstep.h>
+#include <asm/uaccess.h>
+
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+#define MSR_SINGLESTEP (MSR_DE)
+#else
+#define MSR_SINGLESTEP (MSR_SE)
+#endif
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+ int ret = 0;
+ kprobe_opcode_t insn = *p->addr;
+
+ if ((unsigned long)p->addr & 0x03) {
+ printk("Attempt to register kprobe at an unaligned address\n");
+ ret = -EINVAL;
+ } else if (IS_MTMSRD(insn) || IS_RFID(insn) || IS_RFI(insn)) {
+ printk("Cannot register a kprobe on rfi/rfid or mtmsr[d]\n");
+ ret = -EINVAL;
+ }
+
+ /* insn must be on a special executable page on ppc64. This is
+ * not explicitly required on ppc32 (right now), but it doesn't hurt */
+ if (!ret) {
+ p->ainsn.insn = get_insn_slot();
+ if (!p->ainsn.insn)
+ ret = -ENOMEM;
+ }
+
+ if (!ret) {
+ memcpy(p->ainsn.insn, p->addr,
+ MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+ p->opcode = *p->addr;
+ flush_icache_range((unsigned long)p->ainsn.insn,
+ (unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t));
+ }
+
+ p->ainsn.boostable = 0;
+ return ret;
+}
+
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+ *p->addr = BREAKPOINT_INSTRUCTION;
+ flush_icache_range((unsigned long) p->addr,
+ (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+}
+
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
+{
+ *p->addr = p->opcode;
+ flush_icache_range((unsigned long) p->addr,
+ (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+}
+
+void __kprobes arch_remove_kprobe(struct kprobe *p)
+{
+ if (p->ainsn.insn) {
+ free_insn_slot(p->ainsn.insn, 0);
+ p->ainsn.insn = NULL;
+ }
+}
+
+static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
+{
+ /* We turn off async exceptions to ensure that the single step will
+ * be for the instruction we have the kprobe on, if we dont its
+ * possible we'd get the single step reported for an exception handler
+ * like Decrementer or External Interrupt */
+ regs->msr &= ~MSR_EE;
+ regs->msr |= MSR_SINGLESTEP;
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+ regs->msr &= ~MSR_CE;
+ mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM);
+#ifdef CONFIG_PPC_47x
+ isync();
+#endif
+#endif
+
+ /*
+ * On powerpc we should single step on the original
+ * instruction even if the probed insn is a trap
+ * variant as values in regs could play a part in
+ * if the trap is taken or not
+ */
+ regs->nip = (unsigned long)p->ainsn.insn;
+}
+
+static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ kcb->prev_kprobe.kp = kprobe_running();
+ kcb->prev_kprobe.status = kcb->kprobe_status;
+ kcb->prev_kprobe.saved_msr = kcb->kprobe_saved_msr;
+}
+
+static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
+ kcb->kprobe_status = kcb->prev_kprobe.status;
+ kcb->kprobe_saved_msr = kcb->prev_kprobe.saved_msr;
+}
+
+static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ __get_cpu_var(current_kprobe) = p;
+ kcb->kprobe_saved_msr = regs->msr;
+}
+
+void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
+ struct pt_regs *regs)
+{
+ ri->ret_addr = (kprobe_opcode_t *)regs->link;
+
+ /* Replace the return addr with trampoline addr */
+ regs->link = (unsigned long)kretprobe_trampoline;
+}
+
+static int __kprobes kprobe_handler(struct pt_regs *regs)
+{
+ struct kprobe *p;
+ int ret = 0;
+ unsigned int *addr = (unsigned int *)regs->nip;
+ struct kprobe_ctlblk *kcb;
+
+ /*
+ * We don't want to be preempted for the entire
+ * duration of kprobe processing
+ */
+ preempt_disable();
+ kcb = get_kprobe_ctlblk();
+
+ /* Check we're not actually recursing */
+ if (kprobe_running()) {
+ p = get_kprobe(addr);
+ if (p) {
+ kprobe_opcode_t insn = *p->ainsn.insn;
+ if (kcb->kprobe_status == KPROBE_HIT_SS &&
+ is_trap(insn)) {
+ /* Turn off 'trace' bits */
+ regs->msr &= ~MSR_SINGLESTEP;
+ regs->msr |= kcb->kprobe_saved_msr;
+ goto no_kprobe;
+ }
+ /* We have reentered the kprobe_handler(), since
+ * another probe was hit while within the handler.
+ * We here save the original kprobes variables and
+ * just single step on the instruction of the new probe
+ * without calling any user handlers.
+ */
+ save_previous_kprobe(kcb);
+ set_current_kprobe(p, regs, kcb);
+ kcb->kprobe_saved_msr = regs->msr;
+ kprobes_inc_nmissed_count(p);
+ prepare_singlestep(p, regs);
+ kcb->kprobe_status = KPROBE_REENTER;
+ return 1;
+ } else {
+ if (*addr != BREAKPOINT_INSTRUCTION) {
+ /* If trap variant, then it belongs not to us */
+ kprobe_opcode_t cur_insn = *addr;
+ if (is_trap(cur_insn))
+ goto no_kprobe;
+ /* The breakpoint instruction was removed by
+ * another cpu right after we hit, no further
+ * handling of this interrupt is appropriate
+ */
+ ret = 1;
+ goto no_kprobe;
+ }
+ p = __get_cpu_var(current_kprobe);
+ if (p->break_handler && p->break_handler(p, regs)) {
+ goto ss_probe;
+ }
+ }
+ goto no_kprobe;
+ }
+
+ p = get_kprobe(addr);
+ if (!p) {
+ if (*addr != BREAKPOINT_INSTRUCTION) {
+ /*
+ * PowerPC has multiple variants of the "trap"
+ * instruction. If the current instruction is a
+ * trap variant, it could belong to someone else
+ */
+ kprobe_opcode_t cur_insn = *addr;
+ if (is_trap(cur_insn))
+ goto no_kprobe;
+ /*
+ * The breakpoint instruction was removed right
+ * after we hit it. Another cpu has removed
+ * either a probepoint or a debugger breakpoint
+ * at this address. In either case, no further
+ * handling of this interrupt is appropriate.
+ */
+ ret = 1;
+ }
+ /* Not one of ours: let kernel handle it */
+ goto no_kprobe;
+ }
+
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ set_current_kprobe(p, regs, kcb);
+ if (p->pre_handler && p->pre_handler(p, regs))
+ /* handler has already set things up, so skip ss setup */
+ return 1;
+
+ss_probe:
+ if (p->ainsn.boostable >= 0) {
+ unsigned int insn = *p->ainsn.insn;
+
+ /* regs->nip is also adjusted if emulate_step returns 1 */
+ ret = emulate_step(regs, insn);
+ if (ret > 0) {
+ /*
+ * Once this instruction has been boosted
+ * successfully, set the boostable flag
+ */
+ if (unlikely(p->ainsn.boostable == 0))
+ p->ainsn.boostable = 1;
+
+ if (p->post_handler)
+ p->post_handler(p, regs, 0);
+
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ reset_current_kprobe();
+ preempt_enable_no_resched();
+ return 1;
+ } else if (ret < 0) {
+ /*
+ * We don't allow kprobes on mtmsr(d)/rfi(d), etc.
+ * So, we should never get here... but, its still
+ * good to catch them, just in case...
+ */
+ printk("Can't step on instruction %x\n", insn);
+ BUG();
+ } else if (ret == 0)
+ /* This instruction can't be boosted */
+ p->ainsn.boostable = -1;
+ }
+ prepare_singlestep(p, regs);
+ kcb->kprobe_status = KPROBE_HIT_SS;
+ return 1;
+
+no_kprobe:
+ preempt_enable_no_resched();
+ return ret;
+}
+
+/*
+ * Function return probe trampoline:
+ * - init_kprobes() establishes a probepoint here
+ * - When the probed function returns, this probe
+ * causes the handlers to fire
+ */
+static void __used kretprobe_trampoline_holder(void)
+{
+ asm volatile(".global kretprobe_trampoline\n"
+ "kretprobe_trampoline:\n"
+ "nop\n");
+}
+
+/*
+ * Called when the probe at kretprobe trampoline is hit
+ */
+static int __kprobes trampoline_probe_handler(struct kprobe *p,
+ struct pt_regs *regs)
+{
+ struct kretprobe_instance *ri = NULL;
+ struct hlist_head *head, empty_rp;
+ struct hlist_node *node, *tmp;
+ unsigned long flags, orig_ret_address = 0;
+ unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
+
+ INIT_HLIST_HEAD(&empty_rp);
+ kretprobe_hash_lock(current, &head, &flags);
+
+ /*
+ * It is possible to have multiple instances associated with a given
+ * task either because an multiple functions in the call path
+ * have a return probe installed on them, and/or more than one return
+ * return probe was registered for a target function.
+ *
+ * We can handle this because:
+ * - instances are always inserted at the head of the list
+ * - when multiple return probes are registered for the same
+ * function, the first instance's ret_addr will point to the
+ * real return address, and all the rest will point to
+ * kretprobe_trampoline
+ */
+ hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ if (ri->rp && ri->rp->handler)
+ ri->rp->handler(ri, regs);
+
+ orig_ret_address = (unsigned long)ri->ret_addr;
+ recycle_rp_inst(ri, &empty_rp);
+
+ if (orig_ret_address != trampoline_address)
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
+ }
+
+ kretprobe_assert(ri, orig_ret_address, trampoline_address);
+ regs->nip = orig_ret_address;
+
+ reset_current_kprobe();
+ kretprobe_hash_unlock(current, &flags);
+ preempt_enable_no_resched();
+
+ hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+ hlist_del(&ri->hlist);
+ kfree(ri);
+ }
+ /*
+ * By returning a non-zero value, we are telling
+ * kprobe_handler() that we don't want the post_handler
+ * to run (and have re-enabled preemption)
+ */
+ return 1;
+}
+
+/*
+ * Called after single-stepping. p->addr is the address of the
+ * instruction whose first byte has been replaced by the "breakpoint"
+ * instruction. To avoid the SMP problems that can occur when we
+ * temporarily put back the original opcode to single-step, we
+ * single-stepped a copy of the instruction. The address of this
+ * copy is p->ainsn.insn.
+ */
+static int __kprobes post_kprobe_handler(struct pt_regs *regs)
+{
+ struct kprobe *cur = kprobe_running();
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ if (!cur)
+ return 0;
+
+ /* make sure we got here for instruction we have a kprobe on */
+ if (((unsigned long)cur->ainsn.insn + 4) != regs->nip)
+ return 0;
+
+ if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ cur->post_handler(cur, regs, 0);
+ }
+
+ /* Adjust nip to after the single-stepped instruction */
+ regs->nip = (unsigned long)cur->addr + 4;
+ regs->msr |= kcb->kprobe_saved_msr;
+
+ /*Restore back the original saved kprobes variables and continue. */
+ if (kcb->kprobe_status == KPROBE_REENTER) {
+ restore_previous_kprobe(kcb);
+ goto out;
+ }
+ reset_current_kprobe();
+out:
+ preempt_enable_no_resched();
+
+ /*
+ * if somebody else is singlestepping across a probe point, msr
+ * will have DE/SE set, in which case, continue the remaining processing
+ * of do_debug, as if this is not a probe hit.
+ */
+ if (regs->msr & MSR_SINGLESTEP)
+ return 0;
+
+ return 1;
+}
+
+int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+ struct kprobe *cur = kprobe_running();
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ const struct exception_table_entry *entry;
+
+ switch(kcb->kprobe_status) {
+ case KPROBE_HIT_SS:
+ case KPROBE_REENTER:
+ /*
+ * We are here because the instruction being single
+ * stepped caused a page fault. We reset the current
+ * kprobe and the nip points back to the probe address
+ * and allow the page fault handler to continue as a
+ * normal page fault.
+ */
+ regs->nip = (unsigned long)cur->addr;
+ regs->msr &= ~MSR_SINGLESTEP; /* Turn off 'trace' bits */
+ regs->msr |= kcb->kprobe_saved_msr;
+ if (kcb->kprobe_status == KPROBE_REENTER)
+ restore_previous_kprobe(kcb);
+ else
+ reset_current_kprobe();
+ preempt_enable_no_resched();
+ break;
+ case KPROBE_HIT_ACTIVE:
+ case KPROBE_HIT_SSDONE:
+ /*
+ * We increment the nmissed count for accounting,
+ * we can also use npre/npostfault count for accouting
+ * these specific fault cases.
+ */
+ kprobes_inc_nmissed_count(cur);
+
+ /*
+ * We come here because instructions in the pre/post
+ * handler caused the page_fault, this could happen
+ * if handler tries to access user space by
+ * copy_from_user(), get_user() etc. Let the
+ * user-specified handler try to fix it first.
+ */
+ if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
+ return 1;
+
+ /*
+ * In case the user-specified fault handler returned
+ * zero, try to fix up.
+ */
+ if ((entry = search_exception_tables(regs->nip)) != NULL) {
+ regs->nip = entry->fixup;
+ return 1;
+ }
+
+ /*
+ * fixup_exception() could not handle it,
+ * Let do_page_fault() fix it.
+ */
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+/*
+ * Wrapper routine to for handling exceptions.
+ */
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ struct die_args *args = (struct die_args *)data;
+ int ret = NOTIFY_DONE;
+
+ if (args->regs && user_mode(args->regs))
+ return ret;
+
+ switch (val) {
+ case DIE_BPT:
+ if (kprobe_handler(args->regs))
+ ret = NOTIFY_STOP;
+ break;
+ case DIE_SSTEP:
+ if (post_kprobe_handler(args->regs))
+ ret = NOTIFY_STOP;
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+#ifdef CONFIG_PPC64
+unsigned long arch_deref_entry_point(void *entry)
+{
+ return ((func_descr_t *)entry)->entry;
+}
+#endif
+
+int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct jprobe *jp = container_of(p, struct jprobe, kp);
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs));
+
+ /* setup return addr to the jprobe handler routine */
+ regs->nip = arch_deref_entry_point(jp->entry);
+#ifdef CONFIG_PPC64
+ regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc);
+#endif
+
+ return 1;
+}
+
+void __used __kprobes jprobe_return(void)
+{
+ asm volatile("trap" ::: "memory");
+}
+
+static void __used __kprobes jprobe_return_end(void)
+{
+};
+
+int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ /*
+ * FIXME - we should ideally be validating that we got here 'cos
+ * of the "trap" in jprobe_return() above, before restoring the
+ * saved regs...
+ */
+ memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
+ preempt_enable_no_resched();
+ return 1;
+}
+
+static struct kprobe trampoline_p = {
+ .addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+ .pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init_kprobes(void)
+{
+ return register_kprobe(&trampoline_p);
+}
+
+int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+{
+ if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline)
+ return 1;
+
+ return 0;
+}
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
new file mode 100644
index 00000000..62bdf238
--- /dev/null
+++ b/arch/powerpc/kernel/kvm.c
@@ -0,0 +1,811 @@
+/*
+ * Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved.
+ * Copyright 2010-2011 Freescale Semiconductor, Inc.
+ *
+ * Authors:
+ * Alexander Graf <agraf@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/kvm_para.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+
+#include <asm/reg.h>
+#include <asm/sections.h>
+#include <asm/cacheflush.h>
+#include <asm/disassemble.h>
+#include <asm/ppc-opcode.h>
+
+#define KVM_MAGIC_PAGE (-4096L)
+#define magic_var(x) KVM_MAGIC_PAGE + offsetof(struct kvm_vcpu_arch_shared, x)
+
+#define KVM_INST_LWZ 0x80000000
+#define KVM_INST_STW 0x90000000
+#define KVM_INST_LD 0xe8000000
+#define KVM_INST_STD 0xf8000000
+#define KVM_INST_NOP 0x60000000
+#define KVM_INST_B 0x48000000
+#define KVM_INST_B_MASK 0x03ffffff
+#define KVM_INST_B_MAX 0x01ffffff
+#define KVM_INST_LI 0x38000000
+
+#define KVM_MASK_RT 0x03e00000
+#define KVM_RT_30 0x03c00000
+#define KVM_MASK_RB 0x0000f800
+#define KVM_INST_MFMSR 0x7c0000a6
+
+#define SPR_FROM 0
+#define SPR_TO 0x100
+
+#define KVM_INST_SPR(sprn, moveto) (0x7c0002a6 | \
+ (((sprn) & 0x1f) << 16) | \
+ (((sprn) & 0x3e0) << 6) | \
+ (moveto))
+
+#define KVM_INST_MFSPR(sprn) KVM_INST_SPR(sprn, SPR_FROM)
+#define KVM_INST_MTSPR(sprn) KVM_INST_SPR(sprn, SPR_TO)
+
+#define KVM_INST_TLBSYNC 0x7c00046c
+#define KVM_INST_MTMSRD_L0 0x7c000164
+#define KVM_INST_MTMSRD_L1 0x7c010164
+#define KVM_INST_MTMSR 0x7c000124
+
+#define KVM_INST_WRTEE 0x7c000106
+#define KVM_INST_WRTEEI_0 0x7c000146
+#define KVM_INST_WRTEEI_1 0x7c008146
+
+#define KVM_INST_MTSRIN 0x7c0001e4
+
+static bool kvm_patching_worked = true;
+static char kvm_tmp[1024 * 1024];
+static int kvm_tmp_index;
+
+static inline void kvm_patch_ins(u32 *inst, u32 new_inst)
+{
+ *inst = new_inst;
+ flush_icache_range((ulong)inst, (ulong)inst + 4);
+}
+
+static void kvm_patch_ins_ll(u32 *inst, long addr, u32 rt)
+{
+#ifdef CONFIG_64BIT
+ kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc));
+#else
+ kvm_patch_ins(inst, KVM_INST_LWZ | rt | (addr & 0x0000fffc));
+#endif
+}
+
+static void kvm_patch_ins_ld(u32 *inst, long addr, u32 rt)
+{
+#ifdef CONFIG_64BIT
+ kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc));
+#else
+ kvm_patch_ins(inst, KVM_INST_LWZ | rt | ((addr + 4) & 0x0000fffc));
+#endif
+}
+
+static void kvm_patch_ins_lwz(u32 *inst, long addr, u32 rt)
+{
+ kvm_patch_ins(inst, KVM_INST_LWZ | rt | (addr & 0x0000ffff));
+}
+
+static void kvm_patch_ins_std(u32 *inst, long addr, u32 rt)
+{
+#ifdef CONFIG_64BIT
+ kvm_patch_ins(inst, KVM_INST_STD | rt | (addr & 0x0000fffc));
+#else
+ kvm_patch_ins(inst, KVM_INST_STW | rt | ((addr + 4) & 0x0000fffc));
+#endif
+}
+
+static void kvm_patch_ins_stw(u32 *inst, long addr, u32 rt)
+{
+ kvm_patch_ins(inst, KVM_INST_STW | rt | (addr & 0x0000fffc));
+}
+
+static void kvm_patch_ins_nop(u32 *inst)
+{
+ kvm_patch_ins(inst, KVM_INST_NOP);
+}
+
+static void kvm_patch_ins_b(u32 *inst, int addr)
+{
+#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_PPC_BOOK3S)
+ /* On relocatable kernels interrupts handlers and our code
+ can be in different regions, so we don't patch them */
+
+ if ((ulong)inst < (ulong)&__end_interrupts)
+ return;
+#endif
+
+ kvm_patch_ins(inst, KVM_INST_B | (addr & KVM_INST_B_MASK));
+}
+
+static u32 *kvm_alloc(int len)
+{
+ u32 *p;
+
+ if ((kvm_tmp_index + len) > ARRAY_SIZE(kvm_tmp)) {
+ printk(KERN_ERR "KVM: No more space (%d + %d)\n",
+ kvm_tmp_index, len);
+ kvm_patching_worked = false;
+ return NULL;
+ }
+
+ p = (void*)&kvm_tmp[kvm_tmp_index];
+ kvm_tmp_index += len;
+
+ return p;
+}
+
+extern u32 kvm_emulate_mtmsrd_branch_offs;
+extern u32 kvm_emulate_mtmsrd_reg_offs;
+extern u32 kvm_emulate_mtmsrd_orig_ins_offs;
+extern u32 kvm_emulate_mtmsrd_len;
+extern u32 kvm_emulate_mtmsrd[];
+
+static void kvm_patch_ins_mtmsrd(u32 *inst, u32 rt)
+{
+ u32 *p;
+ int distance_start;
+ int distance_end;
+ ulong next_inst;
+
+ p = kvm_alloc(kvm_emulate_mtmsrd_len * 4);
+ if (!p)
+ return;
+
+ /* Find out where we are and put everything there */
+ distance_start = (ulong)p - (ulong)inst;
+ next_inst = ((ulong)inst + 4);
+ distance_end = next_inst - (ulong)&p[kvm_emulate_mtmsrd_branch_offs];
+
+ /* Make sure we only write valid b instructions */
+ if (distance_start > KVM_INST_B_MAX) {
+ kvm_patching_worked = false;
+ return;
+ }
+
+ /* Modify the chunk to fit the invocation */
+ memcpy(p, kvm_emulate_mtmsrd, kvm_emulate_mtmsrd_len * 4);
+ p[kvm_emulate_mtmsrd_branch_offs] |= distance_end & KVM_INST_B_MASK;
+ switch (get_rt(rt)) {
+ case 30:
+ kvm_patch_ins_ll(&p[kvm_emulate_mtmsrd_reg_offs],
+ magic_var(scratch2), KVM_RT_30);
+ break;
+ case 31:
+ kvm_patch_ins_ll(&p[kvm_emulate_mtmsrd_reg_offs],
+ magic_var(scratch1), KVM_RT_30);
+ break;
+ default:
+ p[kvm_emulate_mtmsrd_reg_offs] |= rt;
+ break;
+ }
+
+ p[kvm_emulate_mtmsrd_orig_ins_offs] = *inst;
+ flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtmsrd_len * 4);
+
+ /* Patch the invocation */
+ kvm_patch_ins_b(inst, distance_start);
+}
+
+extern u32 kvm_emulate_mtmsr_branch_offs;
+extern u32 kvm_emulate_mtmsr_reg1_offs;
+extern u32 kvm_emulate_mtmsr_reg2_offs;
+extern u32 kvm_emulate_mtmsr_orig_ins_offs;
+extern u32 kvm_emulate_mtmsr_len;
+extern u32 kvm_emulate_mtmsr[];
+
+static void kvm_patch_ins_mtmsr(u32 *inst, u32 rt)
+{
+ u32 *p;
+ int distance_start;
+ int distance_end;
+ ulong next_inst;
+
+ p = kvm_alloc(kvm_emulate_mtmsr_len * 4);
+ if (!p)
+ return;
+
+ /* Find out where we are and put everything there */
+ distance_start = (ulong)p - (ulong)inst;
+ next_inst = ((ulong)inst + 4);
+ distance_end = next_inst - (ulong)&p[kvm_emulate_mtmsr_branch_offs];
+
+ /* Make sure we only write valid b instructions */
+ if (distance_start > KVM_INST_B_MAX) {
+ kvm_patching_worked = false;
+ return;
+ }
+
+ /* Modify the chunk to fit the invocation */
+ memcpy(p, kvm_emulate_mtmsr, kvm_emulate_mtmsr_len * 4);
+ p[kvm_emulate_mtmsr_branch_offs] |= distance_end & KVM_INST_B_MASK;
+
+ /* Make clobbered registers work too */
+ switch (get_rt(rt)) {
+ case 30:
+ kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg1_offs],
+ magic_var(scratch2), KVM_RT_30);
+ kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg2_offs],
+ magic_var(scratch2), KVM_RT_30);
+ break;
+ case 31:
+ kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg1_offs],
+ magic_var(scratch1), KVM_RT_30);
+ kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg2_offs],
+ magic_var(scratch1), KVM_RT_30);
+ break;
+ default:
+ p[kvm_emulate_mtmsr_reg1_offs] |= rt;
+ p[kvm_emulate_mtmsr_reg2_offs] |= rt;
+ break;
+ }
+
+ p[kvm_emulate_mtmsr_orig_ins_offs] = *inst;
+ flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtmsr_len * 4);
+
+ /* Patch the invocation */
+ kvm_patch_ins_b(inst, distance_start);
+}
+
+#ifdef CONFIG_BOOKE
+
+extern u32 kvm_emulate_wrtee_branch_offs;
+extern u32 kvm_emulate_wrtee_reg_offs;
+extern u32 kvm_emulate_wrtee_orig_ins_offs;
+extern u32 kvm_emulate_wrtee_len;
+extern u32 kvm_emulate_wrtee[];
+
+static void kvm_patch_ins_wrtee(u32 *inst, u32 rt, int imm_one)
+{
+ u32 *p;
+ int distance_start;
+ int distance_end;
+ ulong next_inst;
+
+ p = kvm_alloc(kvm_emulate_wrtee_len * 4);
+ if (!p)
+ return;
+
+ /* Find out where we are and put everything there */
+ distance_start = (ulong)p - (ulong)inst;
+ next_inst = ((ulong)inst + 4);
+ distance_end = next_inst - (ulong)&p[kvm_emulate_wrtee_branch_offs];
+
+ /* Make sure we only write valid b instructions */
+ if (distance_start > KVM_INST_B_MAX) {
+ kvm_patching_worked = false;
+ return;
+ }
+
+ /* Modify the chunk to fit the invocation */
+ memcpy(p, kvm_emulate_wrtee, kvm_emulate_wrtee_len * 4);
+ p[kvm_emulate_wrtee_branch_offs] |= distance_end & KVM_INST_B_MASK;
+
+ if (imm_one) {
+ p[kvm_emulate_wrtee_reg_offs] =
+ KVM_INST_LI | __PPC_RT(30) | MSR_EE;
+ } else {
+ /* Make clobbered registers work too */
+ switch (get_rt(rt)) {
+ case 30:
+ kvm_patch_ins_ll(&p[kvm_emulate_wrtee_reg_offs],
+ magic_var(scratch2), KVM_RT_30);
+ break;
+ case 31:
+ kvm_patch_ins_ll(&p[kvm_emulate_wrtee_reg_offs],
+ magic_var(scratch1), KVM_RT_30);
+ break;
+ default:
+ p[kvm_emulate_wrtee_reg_offs] |= rt;
+ break;
+ }
+ }
+
+ p[kvm_emulate_wrtee_orig_ins_offs] = *inst;
+ flush_icache_range((ulong)p, (ulong)p + kvm_emulate_wrtee_len * 4);
+
+ /* Patch the invocation */
+ kvm_patch_ins_b(inst, distance_start);
+}
+
+extern u32 kvm_emulate_wrteei_0_branch_offs;
+extern u32 kvm_emulate_wrteei_0_len;
+extern u32 kvm_emulate_wrteei_0[];
+
+static void kvm_patch_ins_wrteei_0(u32 *inst)
+{
+ u32 *p;
+ int distance_start;
+ int distance_end;
+ ulong next_inst;
+
+ p = kvm_alloc(kvm_emulate_wrteei_0_len * 4);
+ if (!p)
+ return;
+
+ /* Find out where we are and put everything there */
+ distance_start = (ulong)p - (ulong)inst;
+ next_inst = ((ulong)inst + 4);
+ distance_end = next_inst - (ulong)&p[kvm_emulate_wrteei_0_branch_offs];
+
+ /* Make sure we only write valid b instructions */
+ if (distance_start > KVM_INST_B_MAX) {
+ kvm_patching_worked = false;
+ return;
+ }
+
+ memcpy(p, kvm_emulate_wrteei_0, kvm_emulate_wrteei_0_len * 4);
+ p[kvm_emulate_wrteei_0_branch_offs] |= distance_end & KVM_INST_B_MASK;
+ flush_icache_range((ulong)p, (ulong)p + kvm_emulate_wrteei_0_len * 4);
+
+ /* Patch the invocation */
+ kvm_patch_ins_b(inst, distance_start);
+}
+
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_32
+
+extern u32 kvm_emulate_mtsrin_branch_offs;
+extern u32 kvm_emulate_mtsrin_reg1_offs;
+extern u32 kvm_emulate_mtsrin_reg2_offs;
+extern u32 kvm_emulate_mtsrin_orig_ins_offs;
+extern u32 kvm_emulate_mtsrin_len;
+extern u32 kvm_emulate_mtsrin[];
+
+static void kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb)
+{
+ u32 *p;
+ int distance_start;
+ int distance_end;
+ ulong next_inst;
+
+ p = kvm_alloc(kvm_emulate_mtsrin_len * 4);
+ if (!p)
+ return;
+
+ /* Find out where we are and put everything there */
+ distance_start = (ulong)p - (ulong)inst;
+ next_inst = ((ulong)inst + 4);
+ distance_end = next_inst - (ulong)&p[kvm_emulate_mtsrin_branch_offs];
+
+ /* Make sure we only write valid b instructions */
+ if (distance_start > KVM_INST_B_MAX) {
+ kvm_patching_worked = false;
+ return;
+ }
+
+ /* Modify the chunk to fit the invocation */
+ memcpy(p, kvm_emulate_mtsrin, kvm_emulate_mtsrin_len * 4);
+ p[kvm_emulate_mtsrin_branch_offs] |= distance_end & KVM_INST_B_MASK;
+ p[kvm_emulate_mtsrin_reg1_offs] |= (rb << 10);
+ p[kvm_emulate_mtsrin_reg2_offs] |= rt;
+ p[kvm_emulate_mtsrin_orig_ins_offs] = *inst;
+ flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtsrin_len * 4);
+
+ /* Patch the invocation */
+ kvm_patch_ins_b(inst, distance_start);
+}
+
+#endif
+
+static void kvm_map_magic_page(void *data)
+{
+ u32 *features = data;
+
+ ulong in[8];
+ ulong out[8];
+
+ in[0] = KVM_MAGIC_PAGE;
+ in[1] = KVM_MAGIC_PAGE;
+
+ kvm_hypercall(in, out, HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE);
+
+ *features = out[0];
+}
+
+static void kvm_check_ins(u32 *inst, u32 features)
+{
+ u32 _inst = *inst;
+ u32 inst_no_rt = _inst & ~KVM_MASK_RT;
+ u32 inst_rt = _inst & KVM_MASK_RT;
+
+ switch (inst_no_rt) {
+ /* Loads */
+ case KVM_INST_MFMSR:
+ kvm_patch_ins_ld(inst, magic_var(msr), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_SPRG0):
+ kvm_patch_ins_ld(inst, magic_var(sprg0), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_SPRG1):
+ kvm_patch_ins_ld(inst, magic_var(sprg1), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_SPRG2):
+ kvm_patch_ins_ld(inst, magic_var(sprg2), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_SPRG3):
+ kvm_patch_ins_ld(inst, magic_var(sprg3), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_SRR0):
+ kvm_patch_ins_ld(inst, magic_var(srr0), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_SRR1):
+ kvm_patch_ins_ld(inst, magic_var(srr1), inst_rt);
+ break;
+#ifdef CONFIG_BOOKE
+ case KVM_INST_MFSPR(SPRN_DEAR):
+#else
+ case KVM_INST_MFSPR(SPRN_DAR):
+#endif
+ kvm_patch_ins_ld(inst, magic_var(dar), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_DSISR):
+ kvm_patch_ins_lwz(inst, magic_var(dsisr), inst_rt);
+ break;
+
+#ifdef CONFIG_PPC_BOOK3E_MMU
+ case KVM_INST_MFSPR(SPRN_MAS0):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_lwz(inst, magic_var(mas0), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_MAS1):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_lwz(inst, magic_var(mas1), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_MAS2):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_ld(inst, magic_var(mas2), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_MAS3):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_lwz(inst, magic_var(mas7_3) + 4, inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_MAS4):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_lwz(inst, magic_var(mas4), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_MAS6):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_lwz(inst, magic_var(mas6), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_MAS7):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_lwz(inst, magic_var(mas7_3), inst_rt);
+ break;
+#endif /* CONFIG_PPC_BOOK3E_MMU */
+
+ case KVM_INST_MFSPR(SPRN_SPRG4):
+#ifdef CONFIG_BOOKE
+ case KVM_INST_MFSPR(SPRN_SPRG4R):
+#endif
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_ld(inst, magic_var(sprg4), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_SPRG5):
+#ifdef CONFIG_BOOKE
+ case KVM_INST_MFSPR(SPRN_SPRG5R):
+#endif
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_ld(inst, magic_var(sprg5), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_SPRG6):
+#ifdef CONFIG_BOOKE
+ case KVM_INST_MFSPR(SPRN_SPRG6R):
+#endif
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_ld(inst, magic_var(sprg6), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_SPRG7):
+#ifdef CONFIG_BOOKE
+ case KVM_INST_MFSPR(SPRN_SPRG7R):
+#endif
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_ld(inst, magic_var(sprg7), inst_rt);
+ break;
+
+#ifdef CONFIG_BOOKE
+ case KVM_INST_MFSPR(SPRN_ESR):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_lwz(inst, magic_var(esr), inst_rt);
+ break;
+#endif
+
+ case KVM_INST_MFSPR(SPRN_PIR):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_lwz(inst, magic_var(pir), inst_rt);
+ break;
+
+
+ /* Stores */
+ case KVM_INST_MTSPR(SPRN_SPRG0):
+ kvm_patch_ins_std(inst, magic_var(sprg0), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_SPRG1):
+ kvm_patch_ins_std(inst, magic_var(sprg1), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_SPRG2):
+ kvm_patch_ins_std(inst, magic_var(sprg2), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_SPRG3):
+ kvm_patch_ins_std(inst, magic_var(sprg3), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_SRR0):
+ kvm_patch_ins_std(inst, magic_var(srr0), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_SRR1):
+ kvm_patch_ins_std(inst, magic_var(srr1), inst_rt);
+ break;
+#ifdef CONFIG_BOOKE
+ case KVM_INST_MTSPR(SPRN_DEAR):
+#else
+ case KVM_INST_MTSPR(SPRN_DAR):
+#endif
+ kvm_patch_ins_std(inst, magic_var(dar), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_DSISR):
+ kvm_patch_ins_stw(inst, magic_var(dsisr), inst_rt);
+ break;
+#ifdef CONFIG_PPC_BOOK3E_MMU
+ case KVM_INST_MTSPR(SPRN_MAS0):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_stw(inst, magic_var(mas0), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_MAS1):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_stw(inst, magic_var(mas1), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_MAS2):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_std(inst, magic_var(mas2), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_MAS3):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_stw(inst, magic_var(mas7_3) + 4, inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_MAS4):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_stw(inst, magic_var(mas4), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_MAS6):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_stw(inst, magic_var(mas6), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_MAS7):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_stw(inst, magic_var(mas7_3), inst_rt);
+ break;
+#endif /* CONFIG_PPC_BOOK3E_MMU */
+
+ case KVM_INST_MTSPR(SPRN_SPRG4):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_std(inst, magic_var(sprg4), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_SPRG5):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_std(inst, magic_var(sprg5), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_SPRG6):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_std(inst, magic_var(sprg6), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_SPRG7):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_std(inst, magic_var(sprg7), inst_rt);
+ break;
+
+#ifdef CONFIG_BOOKE
+ case KVM_INST_MTSPR(SPRN_ESR):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_stw(inst, magic_var(esr), inst_rt);
+ break;
+#endif
+
+ /* Nops */
+ case KVM_INST_TLBSYNC:
+ kvm_patch_ins_nop(inst);
+ break;
+
+ /* Rewrites */
+ case KVM_INST_MTMSRD_L1:
+ kvm_patch_ins_mtmsrd(inst, inst_rt);
+ break;
+ case KVM_INST_MTMSR:
+ case KVM_INST_MTMSRD_L0:
+ kvm_patch_ins_mtmsr(inst, inst_rt);
+ break;
+#ifdef CONFIG_BOOKE
+ case KVM_INST_WRTEE:
+ kvm_patch_ins_wrtee(inst, inst_rt, 0);
+ break;
+#endif
+ }
+
+ switch (inst_no_rt & ~KVM_MASK_RB) {
+#ifdef CONFIG_PPC_BOOK3S_32
+ case KVM_INST_MTSRIN:
+ if (features & KVM_MAGIC_FEAT_SR) {
+ u32 inst_rb = _inst & KVM_MASK_RB;
+ kvm_patch_ins_mtsrin(inst, inst_rt, inst_rb);
+ }
+ break;
+ break;
+#endif
+ }
+
+ switch (_inst) {
+#ifdef CONFIG_BOOKE
+ case KVM_INST_WRTEEI_0:
+ kvm_patch_ins_wrteei_0(inst);
+ break;
+
+ case KVM_INST_WRTEEI_1:
+ kvm_patch_ins_wrtee(inst, 0, 1);
+ break;
+#endif
+ }
+}
+
+extern u32 kvm_template_start[];
+extern u32 kvm_template_end[];
+
+static void kvm_use_magic_page(void)
+{
+ u32 *p;
+ u32 *start, *end;
+ u32 tmp;
+ u32 features;
+
+ /* Tell the host to map the magic page to -4096 on all CPUs */
+ on_each_cpu(kvm_map_magic_page, &features, 1);
+
+ /* Quick self-test to see if the mapping works */
+ if (__get_user(tmp, (u32*)KVM_MAGIC_PAGE)) {
+ kvm_patching_worked = false;
+ return;
+ }
+
+ /* Now loop through all code and find instructions */
+ start = (void*)_stext;
+ end = (void*)_etext;
+
+ /*
+ * Being interrupted in the middle of patching would
+ * be bad for SPRG4-7, which KVM can't keep in sync
+ * with emulated accesses because reads don't trap.
+ */
+ local_irq_disable();
+
+ for (p = start; p < end; p++) {
+ /* Avoid patching the template code */
+ if (p >= kvm_template_start && p < kvm_template_end) {
+ p = kvm_template_end - 1;
+ continue;
+ }
+ kvm_check_ins(p, features);
+ }
+
+ local_irq_enable();
+
+ printk(KERN_INFO "KVM: Live patching for a fast VM %s\n",
+ kvm_patching_worked ? "worked" : "failed");
+}
+
+unsigned long kvm_hypercall(unsigned long *in,
+ unsigned long *out,
+ unsigned long nr)
+{
+ unsigned long register r0 asm("r0");
+ unsigned long register r3 asm("r3") = in[0];
+ unsigned long register r4 asm("r4") = in[1];
+ unsigned long register r5 asm("r5") = in[2];
+ unsigned long register r6 asm("r6") = in[3];
+ unsigned long register r7 asm("r7") = in[4];
+ unsigned long register r8 asm("r8") = in[5];
+ unsigned long register r9 asm("r9") = in[6];
+ unsigned long register r10 asm("r10") = in[7];
+ unsigned long register r11 asm("r11") = nr;
+ unsigned long register r12 asm("r12");
+
+ asm volatile("bl kvm_hypercall_start"
+ : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6),
+ "=r"(r7), "=r"(r8), "=r"(r9), "=r"(r10), "=r"(r11),
+ "=r"(r12)
+ : "r"(r3), "r"(r4), "r"(r5), "r"(r6), "r"(r7), "r"(r8),
+ "r"(r9), "r"(r10), "r"(r11)
+ : "memory", "cc", "xer", "ctr", "lr");
+
+ out[0] = r4;
+ out[1] = r5;
+ out[2] = r6;
+ out[3] = r7;
+ out[4] = r8;
+ out[5] = r9;
+ out[6] = r10;
+ out[7] = r11;
+
+ return r3;
+}
+EXPORT_SYMBOL_GPL(kvm_hypercall);
+
+static int kvm_para_setup(void)
+{
+ extern u32 kvm_hypercall_start;
+ struct device_node *hyper_node;
+ u32 *insts;
+ int len, i;
+
+ hyper_node = of_find_node_by_path("/hypervisor");
+ if (!hyper_node)
+ return -1;
+
+ insts = (u32*)of_get_property(hyper_node, "hcall-instructions", &len);
+ if (len % 4)
+ return -1;
+ if (len > (4 * 4))
+ return -1;
+
+ for (i = 0; i < (len / 4); i++)
+ kvm_patch_ins(&(&kvm_hypercall_start)[i], insts[i]);
+
+ return 0;
+}
+
+static __init void kvm_free_tmp(void)
+{
+ unsigned long start, end;
+
+ start = (ulong)&kvm_tmp[kvm_tmp_index + (PAGE_SIZE - 1)] & PAGE_MASK;
+ end = (ulong)&kvm_tmp[ARRAY_SIZE(kvm_tmp)] & PAGE_MASK;
+
+ /* Free the tmp space we don't need */
+ for (; start < end; start += PAGE_SIZE) {
+ ClearPageReserved(virt_to_page(start));
+ init_page_count(virt_to_page(start));
+ free_page(start);
+ totalram_pages++;
+ }
+}
+
+static int __init kvm_guest_init(void)
+{
+ if (!kvm_para_available())
+ goto free_tmp;
+
+ if (kvm_para_setup())
+ goto free_tmp;
+
+ if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE))
+ kvm_use_magic_page();
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* Enable napping */
+ powersave_nap = 1;
+#endif
+
+free_tmp:
+ kvm_free_tmp();
+
+ return 0;
+}
+
+postcore_initcall(kvm_guest_init);
diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S
new file mode 100644
index 00000000..e291cf3c
--- /dev/null
+++ b/arch/powerpc/kernel/kvm_emul.S
@@ -0,0 +1,358 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright SUSE Linux Products GmbH 2010
+ * Copyright 2010-2011 Freescale Semiconductor, Inc.
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/kvm_asm.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/asm-offsets.h>
+
+/* Hypercall entry point. Will be patched with device tree instructions. */
+
+.global kvm_hypercall_start
+kvm_hypercall_start:
+ li r3, -1
+ nop
+ nop
+ nop
+ blr
+
+#define KVM_MAGIC_PAGE (-4096)
+
+#ifdef CONFIG_64BIT
+#define LL64(reg, offs, reg2) ld reg, (offs)(reg2)
+#define STL64(reg, offs, reg2) std reg, (offs)(reg2)
+#else
+#define LL64(reg, offs, reg2) lwz reg, (offs + 4)(reg2)
+#define STL64(reg, offs, reg2) stw reg, (offs + 4)(reg2)
+#endif
+
+#define SCRATCH_SAVE \
+ /* Enable critical section. We are critical if \
+ shared->critical == r1 */ \
+ STL64(r1, KVM_MAGIC_PAGE + KVM_MAGIC_CRITICAL, 0); \
+ \
+ /* Save state */ \
+ PPC_STL r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH1)(0); \
+ PPC_STL r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH2)(0); \
+ mfcr r31; \
+ stw r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH3)(0);
+
+#define SCRATCH_RESTORE \
+ /* Restore state */ \
+ PPC_LL r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH1)(0); \
+ lwz r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH3)(0); \
+ mtcr r30; \
+ PPC_LL r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH2)(0); \
+ \
+ /* Disable critical section. We are critical if \
+ shared->critical == r1 and r2 is always != r1 */ \
+ STL64(r2, KVM_MAGIC_PAGE + KVM_MAGIC_CRITICAL, 0);
+
+.global kvm_template_start
+kvm_template_start:
+
+.global kvm_emulate_mtmsrd
+kvm_emulate_mtmsrd:
+
+ SCRATCH_SAVE
+
+ /* Put MSR & ~(MSR_EE|MSR_RI) in r31 */
+ LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+ lis r30, (~(MSR_EE | MSR_RI))@h
+ ori r30, r30, (~(MSR_EE | MSR_RI))@l
+ and r31, r31, r30
+
+ /* OR the register's (MSR_EE|MSR_RI) on MSR */
+kvm_emulate_mtmsrd_reg:
+ ori r30, r0, 0
+ andi. r30, r30, (MSR_EE|MSR_RI)
+ or r31, r31, r30
+
+ /* Put MSR back into magic page */
+ STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+ /* Check if we have to fetch an interrupt */
+ lwz r31, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0)
+ cmpwi r31, 0
+ beq+ no_check
+
+ /* Check if we may trigger an interrupt */
+ andi. r30, r30, MSR_EE
+ beq no_check
+
+ SCRATCH_RESTORE
+
+ /* Nag hypervisor */
+kvm_emulate_mtmsrd_orig_ins:
+ tlbsync
+
+ b kvm_emulate_mtmsrd_branch
+
+no_check:
+
+ SCRATCH_RESTORE
+
+ /* Go back to caller */
+kvm_emulate_mtmsrd_branch:
+ b .
+kvm_emulate_mtmsrd_end:
+
+.global kvm_emulate_mtmsrd_branch_offs
+kvm_emulate_mtmsrd_branch_offs:
+ .long (kvm_emulate_mtmsrd_branch - kvm_emulate_mtmsrd) / 4
+
+.global kvm_emulate_mtmsrd_reg_offs
+kvm_emulate_mtmsrd_reg_offs:
+ .long (kvm_emulate_mtmsrd_reg - kvm_emulate_mtmsrd) / 4
+
+.global kvm_emulate_mtmsrd_orig_ins_offs
+kvm_emulate_mtmsrd_orig_ins_offs:
+ .long (kvm_emulate_mtmsrd_orig_ins - kvm_emulate_mtmsrd) / 4
+
+.global kvm_emulate_mtmsrd_len
+kvm_emulate_mtmsrd_len:
+ .long (kvm_emulate_mtmsrd_end - kvm_emulate_mtmsrd) / 4
+
+
+#define MSR_SAFE_BITS (MSR_EE | MSR_CE | MSR_ME | MSR_RI)
+#define MSR_CRITICAL_BITS ~MSR_SAFE_BITS
+
+.global kvm_emulate_mtmsr
+kvm_emulate_mtmsr:
+
+ SCRATCH_SAVE
+
+ /* Fetch old MSR in r31 */
+ LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+ /* Find the changed bits between old and new MSR */
+kvm_emulate_mtmsr_reg1:
+ ori r30, r0, 0
+ xor r31, r30, r31
+
+ /* Check if we need to really do mtmsr */
+ LOAD_REG_IMMEDIATE(r30, MSR_CRITICAL_BITS)
+ and. r31, r31, r30
+
+ /* No critical bits changed? Maybe we can stay in the guest. */
+ beq maybe_stay_in_guest
+
+do_mtmsr:
+
+ SCRATCH_RESTORE
+
+ /* Just fire off the mtmsr if it's critical */
+kvm_emulate_mtmsr_orig_ins:
+ mtmsr r0
+
+ b kvm_emulate_mtmsr_branch
+
+maybe_stay_in_guest:
+
+ /* Get the target register in r30 */
+kvm_emulate_mtmsr_reg2:
+ ori r30, r0, 0
+
+ /* Put MSR into magic page because we don't call mtmsr */
+ STL64(r30, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+ /* Check if we have to fetch an interrupt */
+ lwz r31, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0)
+ cmpwi r31, 0
+ beq+ no_mtmsr
+
+ /* Check if we may trigger an interrupt */
+ andi. r31, r30, MSR_EE
+ bne do_mtmsr
+
+no_mtmsr:
+
+ SCRATCH_RESTORE
+
+ /* Go back to caller */
+kvm_emulate_mtmsr_branch:
+ b .
+kvm_emulate_mtmsr_end:
+
+.global kvm_emulate_mtmsr_branch_offs
+kvm_emulate_mtmsr_branch_offs:
+ .long (kvm_emulate_mtmsr_branch - kvm_emulate_mtmsr) / 4
+
+.global kvm_emulate_mtmsr_reg1_offs
+kvm_emulate_mtmsr_reg1_offs:
+ .long (kvm_emulate_mtmsr_reg1 - kvm_emulate_mtmsr) / 4
+
+.global kvm_emulate_mtmsr_reg2_offs
+kvm_emulate_mtmsr_reg2_offs:
+ .long (kvm_emulate_mtmsr_reg2 - kvm_emulate_mtmsr) / 4
+
+.global kvm_emulate_mtmsr_orig_ins_offs
+kvm_emulate_mtmsr_orig_ins_offs:
+ .long (kvm_emulate_mtmsr_orig_ins - kvm_emulate_mtmsr) / 4
+
+.global kvm_emulate_mtmsr_len
+kvm_emulate_mtmsr_len:
+ .long (kvm_emulate_mtmsr_end - kvm_emulate_mtmsr) / 4
+
+/* also used for wrteei 1 */
+.global kvm_emulate_wrtee
+kvm_emulate_wrtee:
+
+ SCRATCH_SAVE
+
+ /* Fetch old MSR in r31 */
+ LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+ /* Insert new MSR[EE] */
+kvm_emulate_wrtee_reg:
+ ori r30, r0, 0
+ rlwimi r31, r30, 0, MSR_EE
+
+ /*
+ * If MSR[EE] is now set, check for a pending interrupt.
+ * We could skip this if MSR[EE] was already on, but that
+ * should be rare, so don't bother.
+ */
+ andi. r30, r30, MSR_EE
+
+ /* Put MSR into magic page because we don't call wrtee */
+ STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+ beq no_wrtee
+
+ /* Check if we have to fetch an interrupt */
+ lwz r30, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0)
+ cmpwi r30, 0
+ bne do_wrtee
+
+no_wrtee:
+ SCRATCH_RESTORE
+
+ /* Go back to caller */
+kvm_emulate_wrtee_branch:
+ b .
+
+do_wrtee:
+ SCRATCH_RESTORE
+
+ /* Just fire off the wrtee if it's critical */
+kvm_emulate_wrtee_orig_ins:
+ wrtee r0
+
+ b kvm_emulate_wrtee_branch
+
+kvm_emulate_wrtee_end:
+
+.global kvm_emulate_wrtee_branch_offs
+kvm_emulate_wrtee_branch_offs:
+ .long (kvm_emulate_wrtee_branch - kvm_emulate_wrtee) / 4
+
+.global kvm_emulate_wrtee_reg_offs
+kvm_emulate_wrtee_reg_offs:
+ .long (kvm_emulate_wrtee_reg - kvm_emulate_wrtee) / 4
+
+.global kvm_emulate_wrtee_orig_ins_offs
+kvm_emulate_wrtee_orig_ins_offs:
+ .long (kvm_emulate_wrtee_orig_ins - kvm_emulate_wrtee) / 4
+
+.global kvm_emulate_wrtee_len
+kvm_emulate_wrtee_len:
+ .long (kvm_emulate_wrtee_end - kvm_emulate_wrtee) / 4
+
+.global kvm_emulate_wrteei_0
+kvm_emulate_wrteei_0:
+ SCRATCH_SAVE
+
+ /* Fetch old MSR in r31 */
+ LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+ /* Remove MSR_EE from old MSR */
+ rlwinm r31, r31, 0, ~MSR_EE
+
+ /* Write new MSR value back */
+ STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+ SCRATCH_RESTORE
+
+ /* Go back to caller */
+kvm_emulate_wrteei_0_branch:
+ b .
+kvm_emulate_wrteei_0_end:
+
+.global kvm_emulate_wrteei_0_branch_offs
+kvm_emulate_wrteei_0_branch_offs:
+ .long (kvm_emulate_wrteei_0_branch - kvm_emulate_wrteei_0) / 4
+
+.global kvm_emulate_wrteei_0_len
+kvm_emulate_wrteei_0_len:
+ .long (kvm_emulate_wrteei_0_end - kvm_emulate_wrteei_0) / 4
+
+.global kvm_emulate_mtsrin
+kvm_emulate_mtsrin:
+
+ SCRATCH_SAVE
+
+ LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+ andi. r31, r31, MSR_DR | MSR_IR
+ beq kvm_emulate_mtsrin_reg1
+
+ SCRATCH_RESTORE
+
+kvm_emulate_mtsrin_orig_ins:
+ nop
+ b kvm_emulate_mtsrin_branch
+
+kvm_emulate_mtsrin_reg1:
+ /* rX >> 26 */
+ rlwinm r30,r0,6,26,29
+
+kvm_emulate_mtsrin_reg2:
+ stw r0, (KVM_MAGIC_PAGE + KVM_MAGIC_SR)(r30)
+
+ SCRATCH_RESTORE
+
+ /* Go back to caller */
+kvm_emulate_mtsrin_branch:
+ b .
+kvm_emulate_mtsrin_end:
+
+.global kvm_emulate_mtsrin_branch_offs
+kvm_emulate_mtsrin_branch_offs:
+ .long (kvm_emulate_mtsrin_branch - kvm_emulate_mtsrin) / 4
+
+.global kvm_emulate_mtsrin_reg1_offs
+kvm_emulate_mtsrin_reg1_offs:
+ .long (kvm_emulate_mtsrin_reg1 - kvm_emulate_mtsrin) / 4
+
+.global kvm_emulate_mtsrin_reg2_offs
+kvm_emulate_mtsrin_reg2_offs:
+ .long (kvm_emulate_mtsrin_reg2 - kvm_emulate_mtsrin) / 4
+
+.global kvm_emulate_mtsrin_orig_ins_offs
+kvm_emulate_mtsrin_orig_ins_offs:
+ .long (kvm_emulate_mtsrin_orig_ins - kvm_emulate_mtsrin) / 4
+
+.global kvm_emulate_mtsrin_len
+kvm_emulate_mtsrin_len:
+ .long (kvm_emulate_mtsrin_end - kvm_emulate_mtsrin) / 4
+
+.global kvm_template_end
+kvm_template_end:
diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S
new file mode 100644
index 00000000..97ec8557
--- /dev/null
+++ b/arch/powerpc/kernel/l2cr_6xx.S
@@ -0,0 +1,470 @@
+/*
+ L2CR functions
+ Copyright © 1997-1998 by PowerLogix R & D, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+/*
+ Thur, Dec. 12, 1998.
+ - First public release, contributed by PowerLogix.
+ ***********
+ Sat, Aug. 7, 1999.
+ - Terry: Made sure code disabled interrupts before running. (Previously
+ it was assumed interrupts were already disabled).
+ - Terry: Updated for tentative G4 support. 4MB of memory is now flushed
+ instead of 2MB. (Prob. only 3 is necessary).
+ - Terry: Updated for workaround to HID0[DPM] processor bug
+ during global invalidates.
+ ***********
+ Thu, July 13, 2000.
+ - Terry: Added isync to correct for an errata.
+
+ 22 August 2001.
+ - DanM: Finally added the 7450 patch I've had for the past
+ several months. The L2CR is similar, but I'm going
+ to assume the user of this functions knows what they
+ are doing.
+
+ Author: Terry Greeniaus (tgree@phys.ualberta.ca)
+ Please e-mail updates to this file to me, thanks!
+*/
+#include <asm/processor.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+#include <asm/cache.h>
+#include <asm/page.h>
+
+/* Usage:
+
+ When setting the L2CR register, you must do a few special
+ things. If you are enabling the cache, you must perform a
+ global invalidate. If you are disabling the cache, you must
+ flush the cache contents first. This routine takes care of
+ doing these things. When first enabling the cache, make sure
+ you pass in the L2CR you want, as well as passing in the
+ global invalidate bit set. A global invalidate will only be
+ performed if the L2I bit is set in applyThis. When enabling
+ the cache, you should also set the L2E bit in applyThis. If
+ you want to modify the L2CR contents after the cache has been
+ enabled, the recommended procedure is to first call
+ __setL2CR(0) to disable the cache and then call it again with
+ the new values for L2CR. Examples:
+
+ _setL2CR(0) - disables the cache
+ _setL2CR(0xB3A04000) - enables my G3 upgrade card:
+ - L2E set to turn on the cache
+ - L2SIZ set to 1MB
+ - L2CLK set to 1:1
+ - L2RAM set to pipelined synchronous late-write
+ - L2I set to perform a global invalidation
+ - L2OH set to 0.5 nS
+ - L2DF set because this upgrade card
+ requires it
+
+ A similar call should work for your card. You need to know
+ the correct setting for your card and then place them in the
+ fields I have outlined above. Other fields support optional
+ features, such as L2DO which caches only data, or L2TS which
+ causes cache pushes from the L1 cache to go to the L2 cache
+ instead of to main memory.
+
+IMPORTANT:
+ Starting with the 7450, the bits in this register have moved
+ or behave differently. The Enable, Parity Enable, Size,
+ and L2 Invalidate are the only bits that have not moved.
+ The size is read-only for these processors with internal L2
+ cache, and the invalidate is a control as well as status.
+ -- Dan
+
+*/
+/*
+ * Summary: this procedure ignores the L2I bit in the value passed in,
+ * flushes the cache if it was already enabled, always invalidates the
+ * cache, then enables the cache if the L2E bit is set in the value
+ * passed in.
+ * -- paulus.
+ */
+_GLOBAL(_set_L2CR)
+ /* Make sure this is a 750 or 7400 chip */
+BEGIN_FTR_SECTION
+ li r3,-1
+ blr
+END_FTR_SECTION_IFCLR(CPU_FTR_L2CR)
+
+ mflr r9
+
+ /* Stop DST streams */
+BEGIN_FTR_SECTION
+ DSSALL
+ sync
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+
+ /* Turn off interrupts and data relocation. */
+ mfmsr r7 /* Save MSR in r7 */
+ rlwinm r4,r7,0,17,15
+ rlwinm r4,r4,0,28,26 /* Turn off DR bit */
+ sync
+ mtmsr r4
+ isync
+
+ /* Before we perform the global invalidation, we must disable dynamic
+ * power management via HID0[DPM] to work around a processor bug where
+ * DPM can possibly interfere with the state machine in the processor
+ * that invalidates the L2 cache tags.
+ */
+ mfspr r8,SPRN_HID0 /* Save HID0 in r8 */
+ rlwinm r4,r8,0,12,10 /* Turn off HID0[DPM] */
+ sync
+ mtspr SPRN_HID0,r4 /* Disable DPM */
+ sync
+
+ /* Get the current enable bit of the L2CR into r4 */
+ mfspr r4,SPRN_L2CR
+
+ /* Tweak some bits */
+ rlwinm r5,r3,0,0,0 /* r5 contains the new enable bit */
+ rlwinm r3,r3,0,11,9 /* Turn off the invalidate bit */
+ rlwinm r3,r3,0,1,31 /* Turn off the enable bit */
+
+ /* Check to see if we need to flush */
+ rlwinm. r4,r4,0,0,0
+ beq 2f
+
+ /* Flush the cache. First, read the first 4MB of memory (physical) to
+ * put new data in the cache. (Actually we only need
+ * the size of the L2 cache plus the size of the L1 cache, but 4MB will
+ * cover everything just to be safe).
+ */
+
+ /**** Might be a good idea to set L2DO here - to prevent instructions
+ from getting into the cache. But since we invalidate
+ the next time we enable the cache it doesn't really matter.
+ Don't do this unless you accommodate all processor variations.
+ The bit moved on the 7450.....
+ ****/
+
+BEGIN_FTR_SECTION
+ /* Disable L2 prefetch on some 745x and try to ensure
+ * L2 prefetch engines are idle. As explained by errata
+ * text, we can't be sure they are, we just hope very hard
+ * that well be enough (sic !). At least I noticed Apple
+ * doesn't even bother doing the dcbf's here...
+ */
+ mfspr r4,SPRN_MSSCR0
+ rlwinm r4,r4,0,0,29
+ sync
+ mtspr SPRN_MSSCR0,r4
+ sync
+ isync
+ lis r4,KERNELBASE@h
+ dcbf 0,r4
+ dcbf 0,r4
+ dcbf 0,r4
+ dcbf 0,r4
+END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
+
+ /* TODO: use HW flush assist when available */
+
+ lis r4,0x0002
+ mtctr r4
+ li r4,0
+1:
+ lwzx r0,r0,r4
+ addi r4,r4,32 /* Go to start of next cache line */
+ bdnz 1b
+ isync
+
+ /* Now, flush the first 4MB of memory */
+ lis r4,0x0002
+ mtctr r4
+ li r4,0
+ sync
+1:
+ dcbf 0,r4
+ addi r4,r4,32 /* Go to start of next cache line */
+ bdnz 1b
+
+2:
+ /* Set up the L2CR configuration bits (and switch L2 off) */
+ /* CPU errata: Make sure the mtspr below is already in the
+ * L1 icache
+ */
+ b 20f
+ .balign L1_CACHE_BYTES
+22:
+ sync
+ mtspr SPRN_L2CR,r3
+ sync
+ b 23f
+20:
+ b 21f
+21: sync
+ isync
+ b 22b
+
+23:
+ /* Perform a global invalidation */
+ oris r3,r3,0x0020
+ sync
+ mtspr SPRN_L2CR,r3
+ sync
+ isync /* For errata */
+
+BEGIN_FTR_SECTION
+ /* On the 7450, we wait for the L2I bit to clear......
+ */
+10: mfspr r3,SPRN_L2CR
+ andis. r4,r3,0x0020
+ bne 10b
+ b 11f
+END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
+
+ /* Wait for the invalidation to complete */
+3: mfspr r3,SPRN_L2CR
+ rlwinm. r4,r3,0,31,31
+ bne 3b
+
+11: rlwinm r3,r3,0,11,9 /* Turn off the L2I bit */
+ sync
+ mtspr SPRN_L2CR,r3
+ sync
+
+ /* See if we need to enable the cache */
+ cmplwi r5,0
+ beq 4f
+
+ /* Enable the cache */
+ oris r3,r3,0x8000
+ mtspr SPRN_L2CR,r3
+ sync
+
+ /* Enable L2 HW prefetch on 744x/745x */
+BEGIN_FTR_SECTION
+ mfspr r3,SPRN_MSSCR0
+ ori r3,r3,3
+ sync
+ mtspr SPRN_MSSCR0,r3
+ sync
+ isync
+END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
+4:
+
+ /* Restore HID0[DPM] to whatever it was before */
+ sync
+ mtspr 1008,r8
+ sync
+
+ /* Restore MSR (restores EE and DR bits to original state) */
+ SYNC
+ mtmsr r7
+ isync
+
+ mtlr r9
+ blr
+
+_GLOBAL(_get_L2CR)
+ /* Return the L2CR contents */
+ li r3,0
+BEGIN_FTR_SECTION
+ mfspr r3,SPRN_L2CR
+END_FTR_SECTION_IFSET(CPU_FTR_L2CR)
+ blr
+
+
+/*
+ * Here is a similar routine for dealing with the L3 cache
+ * on the 745x family of chips
+ */
+
+_GLOBAL(_set_L3CR)
+ /* Make sure this is a 745x chip */
+BEGIN_FTR_SECTION
+ li r3,-1
+ blr
+END_FTR_SECTION_IFCLR(CPU_FTR_L3CR)
+
+ /* Turn off interrupts and data relocation. */
+ mfmsr r7 /* Save MSR in r7 */
+ rlwinm r4,r7,0,17,15
+ rlwinm r4,r4,0,28,26 /* Turn off DR bit */
+ sync
+ mtmsr r4
+ isync
+
+ /* Stop DST streams */
+ DSSALL
+ sync
+
+ /* Get the current enable bit of the L3CR into r4 */
+ mfspr r4,SPRN_L3CR
+
+ /* Tweak some bits */
+ rlwinm r5,r3,0,0,0 /* r5 contains the new enable bit */
+ rlwinm r3,r3,0,22,20 /* Turn off the invalidate bit */
+ rlwinm r3,r3,0,2,31 /* Turn off the enable & PE bits */
+ rlwinm r3,r3,0,5,3 /* Turn off the clken bit */
+ /* Check to see if we need to flush */
+ rlwinm. r4,r4,0,0,0
+ beq 2f
+
+ /* Flush the cache.
+ */
+
+ /* TODO: use HW flush assist */
+
+ lis r4,0x0008
+ mtctr r4
+ li r4,0
+1:
+ lwzx r0,r0,r4
+ dcbf 0,r4
+ addi r4,r4,32 /* Go to start of next cache line */
+ bdnz 1b
+
+2:
+ /* Set up the L3CR configuration bits (and switch L3 off) */
+ sync
+ mtspr SPRN_L3CR,r3
+ sync
+
+ oris r3,r3,L3CR_L3RES@h /* Set reserved bit 5 */
+ mtspr SPRN_L3CR,r3
+ sync
+ oris r3,r3,L3CR_L3CLKEN@h /* Set clken */
+ mtspr SPRN_L3CR,r3
+ sync
+
+ /* Wait for stabilize */
+ li r0,256
+ mtctr r0
+1: bdnz 1b
+
+ /* Perform a global invalidation */
+ ori r3,r3,0x0400
+ sync
+ mtspr SPRN_L3CR,r3
+ sync
+ isync
+
+ /* We wait for the L3I bit to clear...... */
+10: mfspr r3,SPRN_L3CR
+ andi. r4,r3,0x0400
+ bne 10b
+
+ /* Clear CLKEN */
+ rlwinm r3,r3,0,5,3 /* Turn off the clken bit */
+ mtspr SPRN_L3CR,r3
+ sync
+
+ /* Wait for stabilize */
+ li r0,256
+ mtctr r0
+1: bdnz 1b
+
+ /* See if we need to enable the cache */
+ cmplwi r5,0
+ beq 4f
+
+ /* Enable the cache */
+ oris r3,r3,(L3CR_L3E | L3CR_L3CLKEN)@h
+ mtspr SPRN_L3CR,r3
+ sync
+
+ /* Wait for stabilize */
+ li r0,256
+ mtctr r0
+1: bdnz 1b
+
+ /* Restore MSR (restores EE and DR bits to original state) */
+4: SYNC
+ mtmsr r7
+ isync
+ blr
+
+_GLOBAL(_get_L3CR)
+ /* Return the L3CR contents */
+ li r3,0
+BEGIN_FTR_SECTION
+ mfspr r3,SPRN_L3CR
+END_FTR_SECTION_IFSET(CPU_FTR_L3CR)
+ blr
+
+/* --- End of PowerLogix code ---
+ */
+
+
+/* flush_disable_L1() - Flush and disable L1 cache
+ *
+ * clobbers r0, r3, ctr, cr0
+ * Must be called with interrupts disabled and MMU enabled.
+ */
+_GLOBAL(__flush_disable_L1)
+ /* Stop pending alitvec streams and memory accesses */
+BEGIN_FTR_SECTION
+ DSSALL
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+ sync
+
+ /* Load counter to 0x4000 cache lines (512k) and
+ * load cache with datas
+ */
+ li r3,0x4000 /* 512kB / 32B */
+ mtctr r3
+ lis r3,KERNELBASE@h
+1:
+ lwz r0,0(r3)
+ addi r3,r3,0x0020 /* Go to start of next cache line */
+ bdnz 1b
+ isync
+ sync
+
+ /* Now flush those cache lines */
+ li r3,0x4000 /* 512kB / 32B */
+ mtctr r3
+ lis r3,KERNELBASE@h
+1:
+ dcbf 0,r3
+ addi r3,r3,0x0020 /* Go to start of next cache line */
+ bdnz 1b
+ sync
+
+ /* We can now disable the L1 cache (HID0:DCE, HID0:ICE) */
+ mfspr r3,SPRN_HID0
+ rlwinm r3,r3,0,18,15
+ mtspr SPRN_HID0,r3
+ sync
+ isync
+ blr
+
+/* inval_enable_L1 - Invalidate and enable L1 cache
+ *
+ * Assumes L1 is already disabled and MSR:EE is off
+ *
+ * clobbers r3
+ */
+_GLOBAL(__inval_enable_L1)
+ /* Enable and then Flash inval the instruction & data cache */
+ mfspr r3,SPRN_HID0
+ ori r3,r3, HID0_ICE|HID0_ICFI|HID0_DCE|HID0_DCI
+ sync
+ isync
+ mtspr SPRN_HID0,r3
+ xori r3,r3, HID0_ICFI|HID0_DCI
+ mtspr SPRN_HID0,r3
+ sync
+
+ blr
+
+
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
new file mode 100644
index 00000000..bedd12e1
--- /dev/null
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -0,0 +1,616 @@
+#include <linux/kernel.h>
+#include <linux/serial.h>
+#include <linux/serial_8250.h>
+#include <linux/serial_core.h>
+#include <linux/console.h>
+#include <linux/pci.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/serial_reg.h>
+#include <asm/io.h>
+#include <asm/mmu.h>
+#include <asm/prom.h>
+#include <asm/serial.h>
+#include <asm/udbg.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(fmt...) do { printk(fmt); } while(0)
+#else
+#define DBG(fmt...) do { } while(0)
+#endif
+
+#define MAX_LEGACY_SERIAL_PORTS 8
+
+static struct plat_serial8250_port
+legacy_serial_ports[MAX_LEGACY_SERIAL_PORTS+1];
+static struct legacy_serial_info {
+ struct device_node *np;
+ unsigned int speed;
+ unsigned int clock;
+ int irq_check_parent;
+ phys_addr_t taddr;
+} legacy_serial_infos[MAX_LEGACY_SERIAL_PORTS];
+
+static struct __initdata of_device_id legacy_serial_parents[] = {
+ {.type = "soc",},
+ {.type = "tsi-bridge",},
+ {.type = "opb", },
+ {.compatible = "ibm,opb",},
+ {.compatible = "simple-bus",},
+ {.compatible = "wrs,epld-localbus",},
+ {},
+};
+
+static unsigned int legacy_serial_count;
+static int legacy_serial_console = -1;
+
+static unsigned int tsi_serial_in(struct uart_port *p, int offset)
+{
+ unsigned int tmp;
+ offset = offset << p->regshift;
+ if (offset == UART_IIR) {
+ tmp = readl(p->membase + (UART_IIR & ~3));
+ return (tmp >> 16) & 0xff; /* UART_IIR % 4 == 2 */
+ } else
+ return readb(p->membase + offset);
+}
+
+static void tsi_serial_out(struct uart_port *p, int offset, int value)
+{
+ offset = offset << p->regshift;
+ if (!((offset == UART_IER) && (value & UART_IER_UUE)))
+ writeb(value, p->membase + offset);
+}
+
+static int __init add_legacy_port(struct device_node *np, int want_index,
+ int iotype, phys_addr_t base,
+ phys_addr_t taddr, unsigned long irq,
+ upf_t flags, int irq_check_parent)
+{
+ const __be32 *clk, *spd;
+ u32 clock = BASE_BAUD * 16;
+ int index;
+
+ /* get clock freq. if present */
+ clk = of_get_property(np, "clock-frequency", NULL);
+ if (clk && *clk)
+ clock = be32_to_cpup(clk);
+
+ /* get default speed if present */
+ spd = of_get_property(np, "current-speed", NULL);
+
+ /* If we have a location index, then try to use it */
+ if (want_index >= 0 && want_index < MAX_LEGACY_SERIAL_PORTS)
+ index = want_index;
+ else
+ index = legacy_serial_count;
+
+ /* if our index is still out of range, that mean that
+ * array is full, we could scan for a free slot but that
+ * make little sense to bother, just skip the port
+ */
+ if (index >= MAX_LEGACY_SERIAL_PORTS)
+ return -1;
+ if (index >= legacy_serial_count)
+ legacy_serial_count = index + 1;
+
+ /* Check if there is a port who already claimed our slot */
+ if (legacy_serial_infos[index].np != 0) {
+ /* if we still have some room, move it, else override */
+ if (legacy_serial_count < MAX_LEGACY_SERIAL_PORTS) {
+ printk(KERN_DEBUG "Moved legacy port %d -> %d\n",
+ index, legacy_serial_count);
+ legacy_serial_ports[legacy_serial_count] =
+ legacy_serial_ports[index];
+ legacy_serial_infos[legacy_serial_count] =
+ legacy_serial_infos[index];
+ legacy_serial_count++;
+ } else {
+ printk(KERN_DEBUG "Replacing legacy port %d\n", index);
+ }
+ }
+
+ /* Now fill the entry */
+ memset(&legacy_serial_ports[index], 0,
+ sizeof(struct plat_serial8250_port));
+ if (iotype == UPIO_PORT)
+ legacy_serial_ports[index].iobase = base;
+ else
+ legacy_serial_ports[index].mapbase = base;
+
+ legacy_serial_ports[index].iotype = iotype;
+ legacy_serial_ports[index].uartclk = clock;
+ legacy_serial_ports[index].irq = irq;
+ legacy_serial_ports[index].flags = flags;
+ legacy_serial_infos[index].taddr = taddr;
+ legacy_serial_infos[index].np = of_node_get(np);
+ legacy_serial_infos[index].clock = clock;
+ legacy_serial_infos[index].speed = spd ? be32_to_cpup(spd) : 0;
+ legacy_serial_infos[index].irq_check_parent = irq_check_parent;
+
+ if (iotype == UPIO_TSI) {
+ legacy_serial_ports[index].serial_in = tsi_serial_in;
+ legacy_serial_ports[index].serial_out = tsi_serial_out;
+ }
+
+ printk(KERN_DEBUG "Found legacy serial port %d for %s\n",
+ index, np->full_name);
+ printk(KERN_DEBUG " %s=%llx, taddr=%llx, irq=%lx, clk=%d, speed=%d\n",
+ (iotype == UPIO_PORT) ? "port" : "mem",
+ (unsigned long long)base, (unsigned long long)taddr, irq,
+ legacy_serial_ports[index].uartclk,
+ legacy_serial_infos[index].speed);
+
+ return index;
+}
+
+static int __init add_legacy_soc_port(struct device_node *np,
+ struct device_node *soc_dev)
+{
+ u64 addr;
+ const u32 *addrp;
+ upf_t flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ
+ | UPF_FIXED_PORT;
+ struct device_node *tsi = of_get_parent(np);
+
+ /* We only support ports that have a clock frequency properly
+ * encoded in the device-tree.
+ */
+ if (of_get_property(np, "clock-frequency", NULL) == NULL)
+ return -1;
+
+ /* if reg-shift or offset, don't try to use it */
+ if ((of_get_property(np, "reg-shift", NULL) != NULL) ||
+ (of_get_property(np, "reg-offset", NULL) != NULL))
+ return -1;
+
+ /* if rtas uses this device, don't try to use it as well */
+ if (of_get_property(np, "used-by-rtas", NULL) != NULL)
+ return -1;
+
+ /* Get the address */
+ addrp = of_get_address(soc_dev, 0, NULL, NULL);
+ if (addrp == NULL)
+ return -1;
+
+ addr = of_translate_address(soc_dev, addrp);
+ if (addr == OF_BAD_ADDR)
+ return -1;
+
+ /* Add port, irq will be dealt with later. We passed a translated
+ * IO port value. It will be fixed up later along with the irq
+ */
+ if (tsi && !strcmp(tsi->type, "tsi-bridge"))
+ return add_legacy_port(np, -1, UPIO_TSI, addr, addr, NO_IRQ, flags, 0);
+ else
+ return add_legacy_port(np, -1, UPIO_MEM, addr, addr, NO_IRQ, flags, 0);
+}
+
+static int __init add_legacy_isa_port(struct device_node *np,
+ struct device_node *isa_brg)
+{
+ const __be32 *reg;
+ const char *typep;
+ int index = -1;
+ u64 taddr;
+
+ DBG(" -> add_legacy_isa_port(%s)\n", np->full_name);
+
+ /* Get the ISA port number */
+ reg = of_get_property(np, "reg", NULL);
+ if (reg == NULL)
+ return -1;
+
+ /* Verify it's an IO port, we don't support anything else */
+ if (!(be32_to_cpu(reg[0]) & 0x00000001))
+ return -1;
+
+ /* Now look for an "ibm,aix-loc" property that gives us ordering
+ * if any...
+ */
+ typep = of_get_property(np, "ibm,aix-loc", NULL);
+
+ /* If we have a location index, then use it */
+ if (typep && *typep == 'S')
+ index = simple_strtol(typep+1, NULL, 0) - 1;
+
+ /* Translate ISA address. If it fails, we still register the port
+ * with no translated address so that it can be picked up as an IO
+ * port later by the serial driver
+ */
+ taddr = of_translate_address(np, reg);
+ if (taddr == OF_BAD_ADDR)
+ taddr = 0;
+
+ /* Add port, irq will be dealt with later */
+ return add_legacy_port(np, index, UPIO_PORT, be32_to_cpu(reg[1]), taddr,
+ NO_IRQ, UPF_BOOT_AUTOCONF, 0);
+
+}
+
+#ifdef CONFIG_PCI
+static int __init add_legacy_pci_port(struct device_node *np,
+ struct device_node *pci_dev)
+{
+ u64 addr, base;
+ const u32 *addrp;
+ unsigned int flags;
+ int iotype, index = -1, lindex = 0;
+
+ DBG(" -> add_legacy_pci_port(%s)\n", np->full_name);
+
+ /* We only support ports that have a clock frequency properly
+ * encoded in the device-tree (that is have an fcode). Anything
+ * else can't be used that early and will be normally probed by
+ * the generic 8250_pci driver later on. The reason is that 8250
+ * compatible UARTs on PCI need all sort of quirks (port offsets
+ * etc...) that this code doesn't know about
+ */
+ if (of_get_property(np, "clock-frequency", NULL) == NULL)
+ return -1;
+
+ /* Get the PCI address. Assume BAR 0 */
+ addrp = of_get_pci_address(pci_dev, 0, NULL, &flags);
+ if (addrp == NULL)
+ return -1;
+
+ /* We only support BAR 0 for now */
+ iotype = (flags & IORESOURCE_MEM) ? UPIO_MEM : UPIO_PORT;
+ addr = of_translate_address(pci_dev, addrp);
+ if (addr == OF_BAD_ADDR)
+ return -1;
+
+ /* Set the IO base to the same as the translated address for MMIO,
+ * or to the domain local IO base for PIO (it will be fixed up later)
+ */
+ if (iotype == UPIO_MEM)
+ base = addr;
+ else
+ base = addrp[2];
+
+ /* Try to guess an index... If we have subdevices of the pci dev,
+ * we get to their "reg" property
+ */
+ if (np != pci_dev) {
+ const __be32 *reg = of_get_property(np, "reg", NULL);
+ if (reg && (be32_to_cpup(reg) < 4))
+ index = lindex = be32_to_cpup(reg);
+ }
+
+ /* Local index means it's the Nth port in the PCI chip. Unfortunately
+ * the offset to add here is device specific. We know about those
+ * EXAR ports and we default to the most common case. If your UART
+ * doesn't work for these settings, you'll have to add your own special
+ * cases here
+ */
+ if (of_device_is_compatible(pci_dev, "pci13a8,152") ||
+ of_device_is_compatible(pci_dev, "pci13a8,154") ||
+ of_device_is_compatible(pci_dev, "pci13a8,158")) {
+ addr += 0x200 * lindex;
+ base += 0x200 * lindex;
+ } else {
+ addr += 8 * lindex;
+ base += 8 * lindex;
+ }
+
+ /* Add port, irq will be dealt with later. We passed a translated
+ * IO port value. It will be fixed up later along with the irq
+ */
+ return add_legacy_port(np, index, iotype, base, addr, NO_IRQ,
+ UPF_BOOT_AUTOCONF, np != pci_dev);
+}
+#endif
+
+static void __init setup_legacy_serial_console(int console)
+{
+ struct legacy_serial_info *info =
+ &legacy_serial_infos[console];
+ void __iomem *addr;
+
+ if (info->taddr == 0)
+ return;
+ addr = ioremap(info->taddr, 0x1000);
+ if (addr == NULL)
+ return;
+ if (info->speed == 0)
+ info->speed = udbg_probe_uart_speed(addr, info->clock);
+ DBG("default console speed = %d\n", info->speed);
+ udbg_init_uart(addr, info->speed, info->clock);
+}
+
+/*
+ * This is called very early, as part of setup_system() or eventually
+ * setup_arch(), basically before anything else in this file. This function
+ * will try to build a list of all the available 8250-compatible serial ports
+ * in the machine using the Open Firmware device-tree. It currently only deals
+ * with ISA and PCI busses but could be extended. It allows a very early boot
+ * console to be initialized, that list is also used later to provide 8250 with
+ * the machine non-PCI ports and to properly pick the default console port
+ */
+void __init find_legacy_serial_ports(void)
+{
+ struct device_node *np, *stdout = NULL;
+ const char *path;
+ int index;
+
+ DBG(" -> find_legacy_serial_port()\n");
+
+ /* Now find out if one of these is out firmware console */
+ path = of_get_property(of_chosen, "linux,stdout-path", NULL);
+ if (path != NULL) {
+ stdout = of_find_node_by_path(path);
+ if (stdout)
+ DBG("stdout is %s\n", stdout->full_name);
+ } else {
+ DBG(" no linux,stdout-path !\n");
+ }
+
+ /* Iterate over all the 16550 ports, looking for known parents */
+ for_each_compatible_node(np, "serial", "ns16550") {
+ struct device_node *parent = of_get_parent(np);
+ if (!parent)
+ continue;
+ if (of_match_node(legacy_serial_parents, parent) != NULL) {
+ if (of_device_is_available(np)) {
+ index = add_legacy_soc_port(np, np);
+ if (index >= 0 && np == stdout)
+ legacy_serial_console = index;
+ }
+ }
+ of_node_put(parent);
+ }
+
+ /* Next, fill our array with ISA ports */
+ for_each_node_by_type(np, "serial") {
+ struct device_node *isa = of_get_parent(np);
+ if (isa && !strcmp(isa->name, "isa")) {
+ index = add_legacy_isa_port(np, isa);
+ if (index >= 0 && np == stdout)
+ legacy_serial_console = index;
+ }
+ of_node_put(isa);
+ }
+
+#ifdef CONFIG_PCI
+ /* Next, try to locate PCI ports */
+ for (np = NULL; (np = of_find_all_nodes(np));) {
+ struct device_node *pci, *parent = of_get_parent(np);
+ if (parent && !strcmp(parent->name, "isa")) {
+ of_node_put(parent);
+ continue;
+ }
+ if (strcmp(np->name, "serial") && strcmp(np->type, "serial")) {
+ of_node_put(parent);
+ continue;
+ }
+ /* Check for known pciclass, and also check wether we have
+ * a device with child nodes for ports or not
+ */
+ if (of_device_is_compatible(np, "pciclass,0700") ||
+ of_device_is_compatible(np, "pciclass,070002"))
+ pci = np;
+ else if (of_device_is_compatible(parent, "pciclass,0700") ||
+ of_device_is_compatible(parent, "pciclass,070002"))
+ pci = parent;
+ else {
+ of_node_put(parent);
+ continue;
+ }
+ index = add_legacy_pci_port(np, pci);
+ if (index >= 0 && np == stdout)
+ legacy_serial_console = index;
+ of_node_put(parent);
+ }
+#endif
+
+ DBG("legacy_serial_console = %d\n", legacy_serial_console);
+ if (legacy_serial_console >= 0)
+ setup_legacy_serial_console(legacy_serial_console);
+ DBG(" <- find_legacy_serial_port()\n");
+}
+
+static struct platform_device serial_device = {
+ .name = "serial8250",
+ .id = PLAT8250_DEV_PLATFORM,
+ .dev = {
+ .platform_data = legacy_serial_ports,
+ },
+};
+
+static void __init fixup_port_irq(int index,
+ struct device_node *np,
+ struct plat_serial8250_port *port)
+{
+ unsigned int virq;
+
+ DBG("fixup_port_irq(%d)\n", index);
+
+ virq = irq_of_parse_and_map(np, 0);
+ if (virq == NO_IRQ && legacy_serial_infos[index].irq_check_parent) {
+ np = of_get_parent(np);
+ if (np == NULL)
+ return;
+ virq = irq_of_parse_and_map(np, 0);
+ of_node_put(np);
+ }
+ if (virq == NO_IRQ)
+ return;
+
+ port->irq = virq;
+
+#ifdef CONFIG_SERIAL_8250_FSL
+ if (of_device_is_compatible(np, "fsl,ns16550"))
+ port->handle_irq = fsl8250_handle_irq;
+#endif
+}
+
+static void __init fixup_port_pio(int index,
+ struct device_node *np,
+ struct plat_serial8250_port *port)
+{
+#ifdef CONFIG_PCI
+ struct pci_controller *hose;
+
+ DBG("fixup_port_pio(%d)\n", index);
+
+ hose = pci_find_hose_for_OF_device(np);
+ if (hose) {
+ unsigned long offset = (unsigned long)hose->io_base_virt -
+#ifdef CONFIG_PPC64
+ pci_io_base;
+#else
+ isa_io_base;
+#endif
+ DBG("port %d, IO %lx -> %lx\n",
+ index, port->iobase, port->iobase + offset);
+ port->iobase += offset;
+ }
+#endif
+}
+
+static void __init fixup_port_mmio(int index,
+ struct device_node *np,
+ struct plat_serial8250_port *port)
+{
+ DBG("fixup_port_mmio(%d)\n", index);
+
+ port->membase = ioremap(port->mapbase, 0x100);
+}
+
+/*
+ * This is called as an arch initcall, hopefully before the PCI bus is
+ * probed and/or the 8250 driver loaded since we need to register our
+ * platform devices before 8250 PCI ones are detected as some of them
+ * must properly "override" the platform ones.
+ *
+ * This function fixes up the interrupt value for platform ports as it
+ * couldn't be done earlier before interrupt maps have been parsed. It
+ * also "corrects" the IO address for PIO ports for the same reason,
+ * since earlier, the PHBs virtual IO space wasn't assigned yet. It then
+ * registers all those platform ports for use by the 8250 driver when it
+ * finally loads.
+ */
+static int __init serial_dev_init(void)
+{
+ int i;
+
+ if (legacy_serial_count == 0)
+ return -ENODEV;
+
+ /*
+ * Before we register the platform serial devices, we need
+ * to fixup their interrupts and their IO ports.
+ */
+ DBG("Fixing serial ports interrupts and IO ports ...\n");
+
+ for (i = 0; i < legacy_serial_count; i++) {
+ struct plat_serial8250_port *port = &legacy_serial_ports[i];
+ struct device_node *np = legacy_serial_infos[i].np;
+
+ if (port->irq == NO_IRQ)
+ fixup_port_irq(i, np, port);
+ if (port->iotype == UPIO_PORT)
+ fixup_port_pio(i, np, port);
+ if ((port->iotype == UPIO_MEM) || (port->iotype == UPIO_TSI))
+ fixup_port_mmio(i, np, port);
+ }
+
+ DBG("Registering platform serial ports\n");
+
+ return platform_device_register(&serial_device);
+}
+device_initcall(serial_dev_init);
+
+
+#ifdef CONFIG_SERIAL_8250_CONSOLE
+/*
+ * This is called very early, as part of console_init() (typically just after
+ * time_init()). This function is respondible for trying to find a good
+ * default console on serial ports. It tries to match the open firmware
+ * default output with one of the available serial console drivers that have
+ * been probed earlier by find_legacy_serial_ports()
+ */
+static int __init check_legacy_serial_console(void)
+{
+ struct device_node *prom_stdout = NULL;
+ int i, speed = 0, offset = 0;
+ const char *name;
+ const __be32 *spd;
+
+ DBG(" -> check_legacy_serial_console()\n");
+
+ /* The user has requested a console so this is already set up. */
+ if (strstr(boot_command_line, "console=")) {
+ DBG(" console was specified !\n");
+ return -EBUSY;
+ }
+
+ if (!of_chosen) {
+ DBG(" of_chosen is NULL !\n");
+ return -ENODEV;
+ }
+
+ if (legacy_serial_console < 0) {
+ DBG(" legacy_serial_console not found !\n");
+ return -ENODEV;
+ }
+ /* We are getting a weird phandle from OF ... */
+ /* ... So use the full path instead */
+ name = of_get_property(of_chosen, "linux,stdout-path", NULL);
+ if (name == NULL) {
+ DBG(" no linux,stdout-path !\n");
+ return -ENODEV;
+ }
+ prom_stdout = of_find_node_by_path(name);
+ if (!prom_stdout) {
+ DBG(" can't find stdout package %s !\n", name);
+ return -ENODEV;
+ }
+ DBG("stdout is %s\n", prom_stdout->full_name);
+
+ name = of_get_property(prom_stdout, "name", NULL);
+ if (!name) {
+ DBG(" stdout package has no name !\n");
+ goto not_found;
+ }
+ spd = of_get_property(prom_stdout, "current-speed", NULL);
+ if (spd)
+ speed = be32_to_cpup(spd);
+
+ if (strcmp(name, "serial") != 0)
+ goto not_found;
+
+ /* Look for it in probed array */
+ for (i = 0; i < legacy_serial_count; i++) {
+ if (prom_stdout != legacy_serial_infos[i].np)
+ continue;
+ offset = i;
+ speed = legacy_serial_infos[i].speed;
+ break;
+ }
+ if (i >= legacy_serial_count)
+ goto not_found;
+
+ of_node_put(prom_stdout);
+
+ DBG("Found serial console at ttyS%d\n", offset);
+
+ if (speed) {
+ static char __initdata opt[16];
+ sprintf(opt, "%d", speed);
+ return add_preferred_console("ttyS", offset, opt);
+ } else
+ return add_preferred_console("ttyS", offset, NULL);
+
+ not_found:
+ DBG("No preferred console found !\n");
+ of_node_put(prom_stdout);
+ return -ENODEV;
+}
+console_initcall(check_legacy_serial_console);
+
+#endif /* CONFIG_SERIAL_8250_CONSOLE */
diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
new file mode 100644
index 00000000..f5725bce
--- /dev/null
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -0,0 +1,718 @@
+/*
+ * PowerPC64 LPAR Configuration Information Driver
+ *
+ * Dave Engebretsen engebret@us.ibm.com
+ * Copyright (c) 2003 Dave Engebretsen
+ * Will Schmidt willschm@us.ibm.com
+ * SPLPAR updates, Copyright (c) 2003 Will Schmidt IBM Corporation.
+ * seq_file updates, Copyright (c) 2004 Will Schmidt IBM Corporation.
+ * Nathan Lynch nathanl@austin.ibm.com
+ * Added lparcfg_write, Copyright (C) 2004 Nathan Lynch IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This driver creates a proc file at /proc/ppc64/lparcfg which contains
+ * keyword - value pairs that specify the configuration of the partition.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <asm/uaccess.h>
+#include <asm/lppaca.h>
+#include <asm/hvcall.h>
+#include <asm/firmware.h>
+#include <asm/rtas.h>
+#include <asm/time.h>
+#include <asm/prom.h>
+#include <asm/vdso_datapage.h>
+#include <asm/vio.h>
+#include <asm/mmu.h>
+
+#define MODULE_VERS "1.9"
+#define MODULE_NAME "lparcfg"
+
+/* #define LPARCFG_DEBUG */
+
+static struct proc_dir_entry *proc_ppc64_lparcfg;
+
+/*
+ * Track sum of all purrs across all processors. This is used to further
+ * calculate usage values by different applications
+ */
+static unsigned long get_purr(void)
+{
+ unsigned long sum_purr = 0;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct cpu_usage *cu;
+
+ cu = &per_cpu(cpu_usage_array, cpu);
+ sum_purr += cu->current_tb;
+ }
+ return sum_purr;
+}
+
+/*
+ * Methods used to fetch LPAR data when running on a pSeries platform.
+ */
+
+struct hvcall_ppp_data {
+ u64 entitlement;
+ u64 unallocated_entitlement;
+ u16 group_num;
+ u16 pool_num;
+ u8 capped;
+ u8 weight;
+ u8 unallocated_weight;
+ u16 active_procs_in_pool;
+ u16 active_system_procs;
+ u16 phys_platform_procs;
+ u32 max_proc_cap_avail;
+ u32 entitled_proc_cap_avail;
+};
+
+/*
+ * H_GET_PPP hcall returns info in 4 parms.
+ * entitled_capacity,unallocated_capacity,
+ * aggregation, resource_capability).
+ *
+ * R4 = Entitled Processor Capacity Percentage.
+ * R5 = Unallocated Processor Capacity Percentage.
+ * R6 (AABBCCDDEEFFGGHH).
+ * XXXX - reserved (0)
+ * XXXX - reserved (0)
+ * XXXX - Group Number
+ * XXXX - Pool Number.
+ * R7 (IIJJKKLLMMNNOOPP).
+ * XX - reserved. (0)
+ * XX - bit 0-6 reserved (0). bit 7 is Capped indicator.
+ * XX - variable processor Capacity Weight
+ * XX - Unallocated Variable Processor Capacity Weight.
+ * XXXX - Active processors in Physical Processor Pool.
+ * XXXX - Processors active on platform.
+ * R8 (QQQQRRRRRRSSSSSS). if ibm,partition-performance-parameters-level >= 1
+ * XXXX - Physical platform procs allocated to virtualization.
+ * XXXXXX - Max procs capacity % available to the partitions pool.
+ * XXXXXX - Entitled procs capacity % available to the
+ * partitions pool.
+ */
+static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data)
+{
+ unsigned long rc;
+ unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+
+ rc = plpar_hcall9(H_GET_PPP, retbuf);
+
+ ppp_data->entitlement = retbuf[0];
+ ppp_data->unallocated_entitlement = retbuf[1];
+
+ ppp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
+ ppp_data->pool_num = retbuf[2] & 0xffff;
+
+ ppp_data->capped = (retbuf[3] >> 6 * 8) & 0x01;
+ ppp_data->weight = (retbuf[3] >> 5 * 8) & 0xff;
+ ppp_data->unallocated_weight = (retbuf[3] >> 4 * 8) & 0xff;
+ ppp_data->active_procs_in_pool = (retbuf[3] >> 2 * 8) & 0xffff;
+ ppp_data->active_system_procs = retbuf[3] & 0xffff;
+
+ ppp_data->phys_platform_procs = retbuf[4] >> 6 * 8;
+ ppp_data->max_proc_cap_avail = (retbuf[4] >> 3 * 8) & 0xffffff;
+ ppp_data->entitled_proc_cap_avail = retbuf[4] & 0xffffff;
+
+ return rc;
+}
+
+static unsigned h_pic(unsigned long *pool_idle_time,
+ unsigned long *num_procs)
+{
+ unsigned long rc;
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+ rc = plpar_hcall(H_PIC, retbuf);
+
+ *pool_idle_time = retbuf[0];
+ *num_procs = retbuf[1];
+
+ return rc;
+}
+
+/*
+ * parse_ppp_data
+ * Parse out the data returned from h_get_ppp and h_pic
+ */
+static void parse_ppp_data(struct seq_file *m)
+{
+ struct hvcall_ppp_data ppp_data;
+ struct device_node *root;
+ const int *perf_level;
+ int rc;
+
+ rc = h_get_ppp(&ppp_data);
+ if (rc)
+ return;
+
+ seq_printf(m, "partition_entitled_capacity=%lld\n",
+ ppp_data.entitlement);
+ seq_printf(m, "group=%d\n", ppp_data.group_num);
+ seq_printf(m, "system_active_processors=%d\n",
+ ppp_data.active_system_procs);
+
+ /* pool related entries are appropriate for shared configs */
+ if (lppaca_of(0).shared_proc) {
+ unsigned long pool_idle_time, pool_procs;
+
+ seq_printf(m, "pool=%d\n", ppp_data.pool_num);
+
+ /* report pool_capacity in percentage */
+ seq_printf(m, "pool_capacity=%d\n",
+ ppp_data.active_procs_in_pool * 100);
+
+ h_pic(&pool_idle_time, &pool_procs);
+ seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
+ seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
+ }
+
+ seq_printf(m, "unallocated_capacity_weight=%d\n",
+ ppp_data.unallocated_weight);
+ seq_printf(m, "capacity_weight=%d\n", ppp_data.weight);
+ seq_printf(m, "capped=%d\n", ppp_data.capped);
+ seq_printf(m, "unallocated_capacity=%lld\n",
+ ppp_data.unallocated_entitlement);
+
+ /* The last bits of information returned from h_get_ppp are only
+ * valid if the ibm,partition-performance-parameters-level
+ * property is >= 1.
+ */
+ root = of_find_node_by_path("/");
+ if (root) {
+ perf_level = of_get_property(root,
+ "ibm,partition-performance-parameters-level",
+ NULL);
+ if (perf_level && (*perf_level >= 1)) {
+ seq_printf(m,
+ "physical_procs_allocated_to_virtualization=%d\n",
+ ppp_data.phys_platform_procs);
+ seq_printf(m, "max_proc_capacity_available=%d\n",
+ ppp_data.max_proc_cap_avail);
+ seq_printf(m, "entitled_proc_capacity_available=%d\n",
+ ppp_data.entitled_proc_cap_avail);
+ }
+
+ of_node_put(root);
+ }
+}
+
+/**
+ * parse_mpp_data
+ * Parse out data returned from h_get_mpp
+ */
+static void parse_mpp_data(struct seq_file *m)
+{
+ struct hvcall_mpp_data mpp_data;
+ int rc;
+
+ rc = h_get_mpp(&mpp_data);
+ if (rc)
+ return;
+
+ seq_printf(m, "entitled_memory=%ld\n", mpp_data.entitled_mem);
+
+ if (mpp_data.mapped_mem != -1)
+ seq_printf(m, "mapped_entitled_memory=%ld\n",
+ mpp_data.mapped_mem);
+
+ seq_printf(m, "entitled_memory_group_number=%d\n", mpp_data.group_num);
+ seq_printf(m, "entitled_memory_pool_number=%d\n", mpp_data.pool_num);
+
+ seq_printf(m, "entitled_memory_weight=%d\n", mpp_data.mem_weight);
+ seq_printf(m, "unallocated_entitled_memory_weight=%d\n",
+ mpp_data.unallocated_mem_weight);
+ seq_printf(m, "unallocated_io_mapping_entitlement=%ld\n",
+ mpp_data.unallocated_entitlement);
+
+ if (mpp_data.pool_size != -1)
+ seq_printf(m, "entitled_memory_pool_size=%ld bytes\n",
+ mpp_data.pool_size);
+
+ seq_printf(m, "entitled_memory_loan_request=%ld\n",
+ mpp_data.loan_request);
+
+ seq_printf(m, "backing_memory=%ld bytes\n", mpp_data.backing_mem);
+}
+
+/**
+ * parse_mpp_x_data
+ * Parse out data returned from h_get_mpp_x
+ */
+static void parse_mpp_x_data(struct seq_file *m)
+{
+ struct hvcall_mpp_x_data mpp_x_data;
+
+ if (!firmware_has_feature(FW_FEATURE_XCMO))
+ return;
+ if (h_get_mpp_x(&mpp_x_data))
+ return;
+
+ seq_printf(m, "coalesced_bytes=%ld\n", mpp_x_data.coalesced_bytes);
+
+ if (mpp_x_data.pool_coalesced_bytes)
+ seq_printf(m, "pool_coalesced_bytes=%ld\n",
+ mpp_x_data.pool_coalesced_bytes);
+ if (mpp_x_data.pool_purr_cycles)
+ seq_printf(m, "coalesce_pool_purr=%ld\n", mpp_x_data.pool_purr_cycles);
+ if (mpp_x_data.pool_spurr_cycles)
+ seq_printf(m, "coalesce_pool_spurr=%ld\n", mpp_x_data.pool_spurr_cycles);
+}
+
+#define SPLPAR_CHARACTERISTICS_TOKEN 20
+#define SPLPAR_MAXLENGTH 1026*(sizeof(char))
+
+/*
+ * parse_system_parameter_string()
+ * Retrieve the potential_processors, max_entitled_capacity and friends
+ * through the get-system-parameter rtas call. Replace keyword strings as
+ * necessary.
+ */
+static void parse_system_parameter_string(struct seq_file *m)
+{
+ int call_status;
+
+ unsigned char *local_buffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL);
+ if (!local_buffer) {
+ printk(KERN_ERR "%s %s kmalloc failure at line %d\n",
+ __FILE__, __func__, __LINE__);
+ return;
+ }
+
+ spin_lock(&rtas_data_buf_lock);
+ memset(rtas_data_buf, 0, SPLPAR_MAXLENGTH);
+ call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
+ NULL,
+ SPLPAR_CHARACTERISTICS_TOKEN,
+ __pa(rtas_data_buf),
+ RTAS_DATA_BUF_SIZE);
+ memcpy(local_buffer, rtas_data_buf, SPLPAR_MAXLENGTH);
+ spin_unlock(&rtas_data_buf_lock);
+
+ if (call_status != 0) {
+ printk(KERN_INFO
+ "%s %s Error calling get-system-parameter (0x%x)\n",
+ __FILE__, __func__, call_status);
+ } else {
+ int splpar_strlen;
+ int idx, w_idx;
+ char *workbuffer = kzalloc(SPLPAR_MAXLENGTH, GFP_KERNEL);
+ if (!workbuffer) {
+ printk(KERN_ERR "%s %s kmalloc failure at line %d\n",
+ __FILE__, __func__, __LINE__);
+ kfree(local_buffer);
+ return;
+ }
+#ifdef LPARCFG_DEBUG
+ printk(KERN_INFO "success calling get-system-parameter\n");
+#endif
+ splpar_strlen = local_buffer[0] * 256 + local_buffer[1];
+ local_buffer += 2; /* step over strlen value */
+
+ w_idx = 0;
+ idx = 0;
+ while ((*local_buffer) && (idx < splpar_strlen)) {
+ workbuffer[w_idx++] = local_buffer[idx++];
+ if ((local_buffer[idx] == ',')
+ || (local_buffer[idx] == '\0')) {
+ workbuffer[w_idx] = '\0';
+ if (w_idx) {
+ /* avoid the empty string */
+ seq_printf(m, "%s\n", workbuffer);
+ }
+ memset(workbuffer, 0, SPLPAR_MAXLENGTH);
+ idx++; /* skip the comma */
+ w_idx = 0;
+ } else if (local_buffer[idx] == '=') {
+ /* code here to replace workbuffer contents
+ with different keyword strings */
+ if (0 == strcmp(workbuffer, "MaxEntCap")) {
+ strcpy(workbuffer,
+ "partition_max_entitled_capacity");
+ w_idx = strlen(workbuffer);
+ }
+ if (0 == strcmp(workbuffer, "MaxPlatProcs")) {
+ strcpy(workbuffer,
+ "system_potential_processors");
+ w_idx = strlen(workbuffer);
+ }
+ }
+ }
+ kfree(workbuffer);
+ local_buffer -= 2; /* back up over strlen value */
+ }
+ kfree(local_buffer);
+}
+
+/* Return the number of processors in the system.
+ * This function reads through the device tree and counts
+ * the virtual processors, this does not include threads.
+ */
+static int lparcfg_count_active_processors(void)
+{
+ struct device_node *cpus_dn = NULL;
+ int count = 0;
+
+ while ((cpus_dn = of_find_node_by_type(cpus_dn, "cpu"))) {
+#ifdef LPARCFG_DEBUG
+ printk(KERN_ERR "cpus_dn %p\n", cpus_dn);
+#endif
+ count++;
+ }
+ return count;
+}
+
+static void pseries_cmo_data(struct seq_file *m)
+{
+ int cpu;
+ unsigned long cmo_faults = 0;
+ unsigned long cmo_fault_time = 0;
+
+ seq_printf(m, "cmo_enabled=%d\n", firmware_has_feature(FW_FEATURE_CMO));
+
+ if (!firmware_has_feature(FW_FEATURE_CMO))
+ return;
+
+ for_each_possible_cpu(cpu) {
+ cmo_faults += lppaca_of(cpu).cmo_faults;
+ cmo_fault_time += lppaca_of(cpu).cmo_fault_time;
+ }
+
+ seq_printf(m, "cmo_faults=%lu\n", cmo_faults);
+ seq_printf(m, "cmo_fault_time_usec=%lu\n",
+ cmo_fault_time / tb_ticks_per_usec);
+ seq_printf(m, "cmo_primary_psp=%d\n", cmo_get_primary_psp());
+ seq_printf(m, "cmo_secondary_psp=%d\n", cmo_get_secondary_psp());
+ seq_printf(m, "cmo_page_size=%lu\n", cmo_get_page_size());
+}
+
+static void splpar_dispatch_data(struct seq_file *m)
+{
+ int cpu;
+ unsigned long dispatches = 0;
+ unsigned long dispatch_dispersions = 0;
+
+ for_each_possible_cpu(cpu) {
+ dispatches += lppaca_of(cpu).yield_count;
+ dispatch_dispersions += lppaca_of(cpu).dispersion_count;
+ }
+
+ seq_printf(m, "dispatches=%lu\n", dispatches);
+ seq_printf(m, "dispatch_dispersions=%lu\n", dispatch_dispersions);
+}
+
+static void parse_em_data(struct seq_file *m)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+ if (plpar_hcall(H_GET_EM_PARMS, retbuf) == H_SUCCESS)
+ seq_printf(m, "power_mode_data=%016lx\n", retbuf[0]);
+}
+
+static int pseries_lparcfg_data(struct seq_file *m, void *v)
+{
+ int partition_potential_processors;
+ int partition_active_processors;
+ struct device_node *rtas_node;
+ const int *lrdrp = NULL;
+
+ rtas_node = of_find_node_by_path("/rtas");
+ if (rtas_node)
+ lrdrp = of_get_property(rtas_node, "ibm,lrdr-capacity", NULL);
+
+ if (lrdrp == NULL) {
+ partition_potential_processors = vdso_data->processorCount;
+ } else {
+ partition_potential_processors = *(lrdrp + 4);
+ }
+ of_node_put(rtas_node);
+
+ partition_active_processors = lparcfg_count_active_processors();
+
+ if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ /* this call handles the ibm,get-system-parameter contents */
+ parse_system_parameter_string(m);
+ parse_ppp_data(m);
+ parse_mpp_data(m);
+ parse_mpp_x_data(m);
+ pseries_cmo_data(m);
+ splpar_dispatch_data(m);
+
+ seq_printf(m, "purr=%ld\n", get_purr());
+ } else { /* non SPLPAR case */
+
+ seq_printf(m, "system_active_processors=%d\n",
+ partition_potential_processors);
+
+ seq_printf(m, "system_potential_processors=%d\n",
+ partition_potential_processors);
+
+ seq_printf(m, "partition_max_entitled_capacity=%d\n",
+ partition_potential_processors * 100);
+
+ seq_printf(m, "partition_entitled_capacity=%d\n",
+ partition_active_processors * 100);
+ }
+
+ seq_printf(m, "partition_active_processors=%d\n",
+ partition_active_processors);
+
+ seq_printf(m, "partition_potential_processors=%d\n",
+ partition_potential_processors);
+
+ seq_printf(m, "shared_processor_mode=%d\n", lppaca_of(0).shared_proc);
+
+ seq_printf(m, "slb_size=%d\n", mmu_slb_size);
+
+ parse_em_data(m);
+
+ return 0;
+}
+
+static ssize_t update_ppp(u64 *entitlement, u8 *weight)
+{
+ struct hvcall_ppp_data ppp_data;
+ u8 new_weight;
+ u64 new_entitled;
+ ssize_t retval;
+
+ /* Get our current parameters */
+ retval = h_get_ppp(&ppp_data);
+ if (retval)
+ return retval;
+
+ if (entitlement) {
+ new_weight = ppp_data.weight;
+ new_entitled = *entitlement;
+ } else if (weight) {
+ new_weight = *weight;
+ new_entitled = ppp_data.entitlement;
+ } else
+ return -EINVAL;
+
+ pr_debug("%s: current_entitled = %llu, current_weight = %u\n",
+ __func__, ppp_data.entitlement, ppp_data.weight);
+
+ pr_debug("%s: new_entitled = %llu, new_weight = %u\n",
+ __func__, new_entitled, new_weight);
+
+ retval = plpar_hcall_norets(H_SET_PPP, new_entitled, new_weight);
+ return retval;
+}
+
+/**
+ * update_mpp
+ *
+ * Update the memory entitlement and weight for the partition. Caller must
+ * specify either a new entitlement or weight, not both, to be updated
+ * since the h_set_mpp call takes both entitlement and weight as parameters.
+ */
+static ssize_t update_mpp(u64 *entitlement, u8 *weight)
+{
+ struct hvcall_mpp_data mpp_data;
+ u64 new_entitled;
+ u8 new_weight;
+ ssize_t rc;
+
+ if (entitlement) {
+ /* Check with vio to ensure the new memory entitlement
+ * can be handled.
+ */
+ rc = vio_cmo_entitlement_update(*entitlement);
+ if (rc)
+ return rc;
+ }
+
+ rc = h_get_mpp(&mpp_data);
+ if (rc)
+ return rc;
+
+ if (entitlement) {
+ new_weight = mpp_data.mem_weight;
+ new_entitled = *entitlement;
+ } else if (weight) {
+ new_weight = *weight;
+ new_entitled = mpp_data.entitled_mem;
+ } else
+ return -EINVAL;
+
+ pr_debug("%s: current_entitled = %lu, current_weight = %u\n",
+ __func__, mpp_data.entitled_mem, mpp_data.mem_weight);
+
+ pr_debug("%s: new_entitled = %llu, new_weight = %u\n",
+ __func__, new_entitled, new_weight);
+
+ rc = plpar_hcall_norets(H_SET_MPP, new_entitled, new_weight);
+ return rc;
+}
+
+/*
+ * Interface for changing system parameters (variable capacity weight
+ * and entitled capacity). Format of input is "param_name=value";
+ * anything after value is ignored. Valid parameters at this time are
+ * "partition_entitled_capacity" and "capacity_weight". We use
+ * H_SET_PPP to alter parameters.
+ *
+ * This function should be invoked only on systems with
+ * FW_FEATURE_SPLPAR.
+ */
+static ssize_t lparcfg_write(struct file *file, const char __user * buf,
+ size_t count, loff_t * off)
+{
+ int kbuf_sz = 64;
+ char kbuf[kbuf_sz];
+ char *tmp;
+ u64 new_entitled, *new_entitled_ptr = &new_entitled;
+ u8 new_weight, *new_weight_ptr = &new_weight;
+ ssize_t retval;
+
+ if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+ return -EINVAL;
+
+ if (count > kbuf_sz)
+ return -EINVAL;
+
+ if (copy_from_user(kbuf, buf, count))
+ return -EFAULT;
+
+ kbuf[count - 1] = '\0';
+ tmp = strchr(kbuf, '=');
+ if (!tmp)
+ return -EINVAL;
+
+ *tmp++ = '\0';
+
+ if (!strcmp(kbuf, "partition_entitled_capacity")) {
+ char *endp;
+ *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
+ if (endp == tmp)
+ return -EINVAL;
+
+ retval = update_ppp(new_entitled_ptr, NULL);
+ } else if (!strcmp(kbuf, "capacity_weight")) {
+ char *endp;
+ *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
+ if (endp == tmp)
+ return -EINVAL;
+
+ retval = update_ppp(NULL, new_weight_ptr);
+ } else if (!strcmp(kbuf, "entitled_memory")) {
+ char *endp;
+ *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
+ if (endp == tmp)
+ return -EINVAL;
+
+ retval = update_mpp(new_entitled_ptr, NULL);
+ } else if (!strcmp(kbuf, "entitled_memory_weight")) {
+ char *endp;
+ *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
+ if (endp == tmp)
+ return -EINVAL;
+
+ retval = update_mpp(NULL, new_weight_ptr);
+ } else
+ return -EINVAL;
+
+ if (retval == H_SUCCESS || retval == H_CONSTRAINED) {
+ retval = count;
+ } else if (retval == H_BUSY) {
+ retval = -EBUSY;
+ } else if (retval == H_HARDWARE) {
+ retval = -EIO;
+ } else if (retval == H_PARAMETER) {
+ retval = -EINVAL;
+ }
+
+ return retval;
+}
+
+static int lparcfg_data(struct seq_file *m, void *v)
+{
+ struct device_node *rootdn;
+ const char *model = "";
+ const char *system_id = "";
+ const char *tmp;
+ const unsigned int *lp_index_ptr;
+ unsigned int lp_index = 0;
+
+ seq_printf(m, "%s %s\n", MODULE_NAME, MODULE_VERS);
+
+ rootdn = of_find_node_by_path("/");
+ if (rootdn) {
+ tmp = of_get_property(rootdn, "model", NULL);
+ if (tmp)
+ model = tmp;
+ tmp = of_get_property(rootdn, "system-id", NULL);
+ if (tmp)
+ system_id = tmp;
+ lp_index_ptr = of_get_property(rootdn, "ibm,partition-no",
+ NULL);
+ if (lp_index_ptr)
+ lp_index = *lp_index_ptr;
+ of_node_put(rootdn);
+ }
+ seq_printf(m, "serial_number=%s\n", system_id);
+ seq_printf(m, "system_type=%s\n", model);
+ seq_printf(m, "partition_id=%d\n", (int)lp_index);
+
+ return pseries_lparcfg_data(m, v);
+}
+
+static int lparcfg_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, lparcfg_data, NULL);
+}
+
+static const struct file_operations lparcfg_fops = {
+ .owner = THIS_MODULE,
+ .read = seq_read,
+ .write = lparcfg_write,
+ .open = lparcfg_open,
+ .release = single_release,
+ .llseek = seq_lseek,
+};
+
+static int __init lparcfg_init(void)
+{
+ struct proc_dir_entry *ent;
+ umode_t mode = S_IRUSR | S_IRGRP | S_IROTH;
+
+ /* Allow writing if we have FW_FEATURE_SPLPAR */
+ if (firmware_has_feature(FW_FEATURE_SPLPAR))
+ mode |= S_IWUSR;
+
+ ent = proc_create("powerpc/lparcfg", mode, NULL, &lparcfg_fops);
+ if (!ent) {
+ printk(KERN_ERR "Failed to create powerpc/lparcfg\n");
+ return -EIO;
+ }
+
+ proc_ppc64_lparcfg = ent;
+ return 0;
+}
+
+static void __exit lparcfg_cleanup(void)
+{
+ if (proc_ppc64_lparcfg)
+ remove_proc_entry("lparcfg", proc_ppc64_lparcfg->parent);
+}
+
+module_init(lparcfg_init);
+module_exit(lparcfg_cleanup);
+MODULE_DESCRIPTION("Interface for LPAR configuration data");
+MODULE_AUTHOR("Dave Engebretsen");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
new file mode 100644
index 00000000..5df77779
--- /dev/null
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -0,0 +1,251 @@
+/*
+ * Code to handle transition of Linux booting another kernel.
+ *
+ * Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com>
+ * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
+ * Copyright (C) 2005 IBM Corporation.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include <linux/kexec.h>
+#include <linux/reboot.h>
+#include <linux/threads.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+#include <linux/irq.h>
+#include <linux/ftrace.h>
+
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/sections.h>
+
+void machine_kexec_mask_interrupts(void) {
+ unsigned int i;
+ struct irq_desc *desc;
+
+ for_each_irq_desc(i, desc) {
+ struct irq_chip *chip;
+
+ chip = irq_desc_get_chip(desc);
+ if (!chip)
+ continue;
+
+ if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data))
+ chip->irq_eoi(&desc->irq_data);
+
+ if (chip->irq_mask)
+ chip->irq_mask(&desc->irq_data);
+
+ if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
+ chip->irq_disable(&desc->irq_data);
+ }
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+ default_machine_crash_shutdown(regs);
+}
+
+/*
+ * Do what every setup is needed on image and the
+ * reboot code buffer to allow us to avoid allocations
+ * later.
+ */
+int machine_kexec_prepare(struct kimage *image)
+{
+ if (ppc_md.machine_kexec_prepare)
+ return ppc_md.machine_kexec_prepare(image);
+ else
+ return default_machine_kexec_prepare(image);
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+void arch_crash_save_vmcoreinfo(void)
+{
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+ VMCOREINFO_SYMBOL(node_data);
+ VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
+#endif
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+ VMCOREINFO_SYMBOL(contig_page_data);
+#endif
+}
+
+/*
+ * Do not allocate memory (or fail in any way) in machine_kexec().
+ * We are past the point of no return, committed to rebooting now.
+ */
+void machine_kexec(struct kimage *image)
+{
+ int save_ftrace_enabled;
+
+ save_ftrace_enabled = __ftrace_enabled_save();
+
+ if (ppc_md.machine_kexec)
+ ppc_md.machine_kexec(image);
+ else
+ default_machine_kexec(image);
+
+ __ftrace_enabled_restore(save_ftrace_enabled);
+
+ /* Fall back to normal restart if we're still alive. */
+ machine_restart(NULL);
+ for(;;);
+}
+
+void __init reserve_crashkernel(void)
+{
+ unsigned long long crash_size, crash_base;
+ int ret;
+
+ /* use common parsing */
+ ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
+ &crash_size, &crash_base);
+ if (ret == 0 && crash_size > 0) {
+ crashk_res.start = crash_base;
+ crashk_res.end = crash_base + crash_size - 1;
+ }
+
+ if (crashk_res.end == crashk_res.start) {
+ crashk_res.start = crashk_res.end = 0;
+ return;
+ }
+
+ /* We might have got these values via the command line or the
+ * device tree, either way sanitise them now. */
+
+ crash_size = resource_size(&crashk_res);
+
+#ifndef CONFIG_NONSTATIC_KERNEL
+ if (crashk_res.start != KDUMP_KERNELBASE)
+ printk("Crash kernel location must be 0x%x\n",
+ KDUMP_KERNELBASE);
+
+ crashk_res.start = KDUMP_KERNELBASE;
+#else
+ if (!crashk_res.start) {
+#ifdef CONFIG_PPC64
+ /*
+ * On 64bit we split the RMO in half but cap it at half of
+ * a small SLB (128MB) since the crash kernel needs to place
+ * itself and some stacks to be in the first segment.
+ */
+ crashk_res.start = min(0x80000000ULL, (ppc64_rma_size / 2));
+#else
+ crashk_res.start = KDUMP_KERNELBASE;
+#endif
+ }
+
+ crash_base = PAGE_ALIGN(crashk_res.start);
+ if (crash_base != crashk_res.start) {
+ printk("Crash kernel base must be aligned to 0x%lx\n",
+ PAGE_SIZE);
+ crashk_res.start = crash_base;
+ }
+
+#endif
+ crash_size = PAGE_ALIGN(crash_size);
+ crashk_res.end = crashk_res.start + crash_size - 1;
+
+ /* The crash region must not overlap the current kernel */
+ if (overlaps_crashkernel(__pa(_stext), _end - _stext)) {
+ printk(KERN_WARNING
+ "Crash kernel can not overlap current kernel\n");
+ crashk_res.start = crashk_res.end = 0;
+ return;
+ }
+
+ /* Crash kernel trumps memory limit */
+ if (memory_limit && memory_limit <= crashk_res.end) {
+ memory_limit = crashk_res.end + 1;
+ printk("Adjusted memory limit for crashkernel, now 0x%llx\n",
+ (unsigned long long)memory_limit);
+ }
+
+ printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+ "for crashkernel (System RAM: %ldMB)\n",
+ (unsigned long)(crash_size >> 20),
+ (unsigned long)(crashk_res.start >> 20),
+ (unsigned long)(memblock_phys_mem_size() >> 20));
+
+ memblock_reserve(crashk_res.start, crash_size);
+}
+
+int overlaps_crashkernel(unsigned long start, unsigned long size)
+{
+ return (start + size) > crashk_res.start && start <= crashk_res.end;
+}
+
+/* Values we need to export to the second kernel via the device tree. */
+static phys_addr_t kernel_end;
+static phys_addr_t crashk_size;
+
+static struct property kernel_end_prop = {
+ .name = "linux,kernel-end",
+ .length = sizeof(phys_addr_t),
+ .value = &kernel_end,
+};
+
+static struct property crashk_base_prop = {
+ .name = "linux,crashkernel-base",
+ .length = sizeof(phys_addr_t),
+ .value = &crashk_res.start,
+};
+
+static struct property crashk_size_prop = {
+ .name = "linux,crashkernel-size",
+ .length = sizeof(phys_addr_t),
+ .value = &crashk_size,
+};
+
+static void __init export_crashk_values(struct device_node *node)
+{
+ struct property *prop;
+
+ /* There might be existing crash kernel properties, but we can't
+ * be sure what's in them, so remove them. */
+ prop = of_find_property(node, "linux,crashkernel-base", NULL);
+ if (prop)
+ prom_remove_property(node, prop);
+
+ prop = of_find_property(node, "linux,crashkernel-size", NULL);
+ if (prop)
+ prom_remove_property(node, prop);
+
+ if (crashk_res.start != 0) {
+ prom_add_property(node, &crashk_base_prop);
+ crashk_size = resource_size(&crashk_res);
+ prom_add_property(node, &crashk_size_prop);
+ }
+}
+
+static int __init kexec_setup(void)
+{
+ struct device_node *node;
+ struct property *prop;
+
+ node = of_find_node_by_path("/chosen");
+ if (!node)
+ return -ENOENT;
+
+ /* remove any stale properties so ours can be found */
+ prop = of_find_property(node, kernel_end_prop.name, NULL);
+ if (prop)
+ prom_remove_property(node, prop);
+
+ /* information needed by userspace when using default_machine_kexec */
+ kernel_end = __pa(_end);
+ prom_add_property(node, &kernel_end_prop);
+
+ export_crashk_values(node);
+
+ of_node_put(node);
+ return 0;
+}
+late_initcall(kexec_setup);
diff --git a/arch/powerpc/kernel/machine_kexec_32.c b/arch/powerpc/kernel/machine_kexec_32.c
new file mode 100644
index 00000000..affe5dcc
--- /dev/null
+++ b/arch/powerpc/kernel/machine_kexec_32.c
@@ -0,0 +1,69 @@
+/*
+ * PPC32 code to handle Linux booting another kernel.
+ *
+ * Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com>
+ * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
+ * Copyright (C) 2005 IBM Corporation.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include <linux/kexec.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <asm/cacheflush.h>
+#include <asm/hw_irq.h>
+#include <asm/io.h>
+
+typedef void (*relocate_new_kernel_t)(
+ unsigned long indirection_page,
+ unsigned long reboot_code_buffer,
+ unsigned long start_address) __noreturn;
+
+/*
+ * This is a generic machine_kexec function suitable at least for
+ * non-OpenFirmware embedded platforms.
+ * It merely copies the image relocation code to the control page and
+ * jumps to it.
+ * A platform specific function may just call this one.
+ */
+void default_machine_kexec(struct kimage *image)
+{
+ extern const unsigned char relocate_new_kernel[];
+ extern const unsigned int relocate_new_kernel_size;
+ unsigned long page_list;
+ unsigned long reboot_code_buffer, reboot_code_buffer_phys;
+ relocate_new_kernel_t rnk;
+
+ /* Interrupts aren't acceptable while we reboot */
+ local_irq_disable();
+
+ /* mask each interrupt so we are in a more sane state for the
+ * kexec kernel */
+ machine_kexec_mask_interrupts();
+
+ page_list = image->head;
+
+ /* we need both effective and real address here */
+ reboot_code_buffer =
+ (unsigned long)page_address(image->control_code_page);
+ reboot_code_buffer_phys = virt_to_phys((void *)reboot_code_buffer);
+
+ /* copy our kernel relocation code to the control code page */
+ memcpy((void *)reboot_code_buffer, relocate_new_kernel,
+ relocate_new_kernel_size);
+
+ flush_icache_range(reboot_code_buffer,
+ reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE);
+ printk(KERN_INFO "Bye!\n");
+
+ /* now call it */
+ rnk = (relocate_new_kernel_t) reboot_code_buffer;
+ (*rnk)(page_list, reboot_code_buffer_phys, image->start);
+}
+
+int default_machine_kexec_prepare(struct kimage *image)
+{
+ return 0;
+}
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
new file mode 100644
index 00000000..d7f60908
--- /dev/null
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -0,0 +1,404 @@
+/*
+ * PPC64 code to handle Linux booting another kernel.
+ *
+ * Copyright (C) 2004-2005, IBM Corp.
+ *
+ * Created by: Milton D Miller II
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+
+#include <linux/kexec.h>
+#include <linux/smp.h>
+#include <linux/thread_info.h>
+#include <linux/init_task.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+
+#include <asm/page.h>
+#include <asm/current.h>
+#include <asm/machdep.h>
+#include <asm/cacheflush.h>
+#include <asm/paca.h>
+#include <asm/mmu.h>
+#include <asm/sections.h> /* _end */
+#include <asm/prom.h>
+#include <asm/smp.h>
+#include <asm/hw_breakpoint.h>
+
+int default_machine_kexec_prepare(struct kimage *image)
+{
+ int i;
+ unsigned long begin, end; /* limits of segment */
+ unsigned long low, high; /* limits of blocked memory range */
+ struct device_node *node;
+ const unsigned long *basep;
+ const unsigned int *sizep;
+
+ if (!ppc_md.hpte_clear_all)
+ return -ENOENT;
+
+ /*
+ * Since we use the kernel fault handlers and paging code to
+ * handle the virtual mode, we must make sure no destination
+ * overlaps kernel static data or bss.
+ */
+ for (i = 0; i < image->nr_segments; i++)
+ if (image->segment[i].mem < __pa(_end))
+ return -ETXTBSY;
+
+ /*
+ * For non-LPAR, we absolutely can not overwrite the mmu hash
+ * table, since we are still using the bolted entries in it to
+ * do the copy. Check that here.
+ *
+ * It is safe if the end is below the start of the blocked
+ * region (end <= low), or if the beginning is after the
+ * end of the blocked region (begin >= high). Use the
+ * boolean identity !(a || b) === (!a && !b).
+ */
+ if (htab_address) {
+ low = __pa(htab_address);
+ high = low + htab_size_bytes;
+
+ for (i = 0; i < image->nr_segments; i++) {
+ begin = image->segment[i].mem;
+ end = begin + image->segment[i].memsz;
+
+ if ((begin < high) && (end > low))
+ return -ETXTBSY;
+ }
+ }
+
+ /* We also should not overwrite the tce tables */
+ for_each_node_by_type(node, "pci") {
+ basep = of_get_property(node, "linux,tce-base", NULL);
+ sizep = of_get_property(node, "linux,tce-size", NULL);
+ if (basep == NULL || sizep == NULL)
+ continue;
+
+ low = *basep;
+ high = low + (*sizep);
+
+ for (i = 0; i < image->nr_segments; i++) {
+ begin = image->segment[i].mem;
+ end = begin + image->segment[i].memsz;
+
+ if ((begin < high) && (end > low))
+ return -ETXTBSY;
+ }
+ }
+
+ return 0;
+}
+
+#define IND_FLAGS (IND_DESTINATION | IND_INDIRECTION | IND_DONE | IND_SOURCE)
+
+static void copy_segments(unsigned long ind)
+{
+ unsigned long entry;
+ unsigned long *ptr;
+ void *dest;
+ void *addr;
+
+ /*
+ * We rely on kexec_load to create a lists that properly
+ * initializes these pointers before they are used.
+ * We will still crash if the list is wrong, but at least
+ * the compiler will be quiet.
+ */
+ ptr = NULL;
+ dest = NULL;
+
+ for (entry = ind; !(entry & IND_DONE); entry = *ptr++) {
+ addr = __va(entry & PAGE_MASK);
+
+ switch (entry & IND_FLAGS) {
+ case IND_DESTINATION:
+ dest = addr;
+ break;
+ case IND_INDIRECTION:
+ ptr = addr;
+ break;
+ case IND_SOURCE:
+ copy_page(dest, addr);
+ dest += PAGE_SIZE;
+ }
+ }
+}
+
+void kexec_copy_flush(struct kimage *image)
+{
+ long i, nr_segments = image->nr_segments;
+ struct kexec_segment ranges[KEXEC_SEGMENT_MAX];
+
+ /* save the ranges on the stack to efficiently flush the icache */
+ memcpy(ranges, image->segment, sizeof(ranges));
+
+ /*
+ * After this call we may not use anything allocated in dynamic
+ * memory, including *image.
+ *
+ * Only globals and the stack are allowed.
+ */
+ copy_segments(image->head);
+
+ /*
+ * we need to clear the icache for all dest pages sometime,
+ * including ones that were in place on the original copy
+ */
+ for (i = 0; i < nr_segments; i++)
+ flush_icache_range((unsigned long)__va(ranges[i].mem),
+ (unsigned long)__va(ranges[i].mem + ranges[i].memsz));
+}
+
+#ifdef CONFIG_SMP
+
+static int kexec_all_irq_disabled = 0;
+
+static void kexec_smp_down(void *arg)
+{
+ local_irq_disable();
+ mb(); /* make sure our irqs are disabled before we say they are */
+ get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF;
+ while(kexec_all_irq_disabled == 0)
+ cpu_relax();
+ mb(); /* make sure all irqs are disabled before this */
+ hw_breakpoint_disable();
+ /*
+ * Now every CPU has IRQs off, we can clear out any pending
+ * IPIs and be sure that no more will come in after this.
+ */
+ if (ppc_md.kexec_cpu_down)
+ ppc_md.kexec_cpu_down(0, 1);
+
+ kexec_smp_wait();
+ /* NOTREACHED */
+}
+
+static void kexec_prepare_cpus_wait(int wait_state)
+{
+ int my_cpu, i, notified=-1;
+
+ hw_breakpoint_disable();
+ my_cpu = get_cpu();
+ /* Make sure each CPU has at least made it to the state we need.
+ *
+ * FIXME: There is a (slim) chance of a problem if not all of the CPUs
+ * are correctly onlined. If somehow we start a CPU on boot with RTAS
+ * start-cpu, but somehow that CPU doesn't write callin_cpu_map[] in
+ * time, the boot CPU will timeout. If it does eventually execute
+ * stuff, the secondary will start up (paca[].cpu_start was written) and
+ * get into a peculiar state. If the platform supports
+ * smp_ops->take_timebase(), the secondary CPU will probably be spinning
+ * in there. If not (i.e. pseries), the secondary will continue on and
+ * try to online itself/idle/etc. If it survives that, we need to find
+ * these possible-but-not-online-but-should-be CPUs and chaperone them
+ * into kexec_smp_wait().
+ */
+ for_each_online_cpu(i) {
+ if (i == my_cpu)
+ continue;
+
+ while (paca[i].kexec_state < wait_state) {
+ barrier();
+ if (i != notified) {
+ printk(KERN_INFO "kexec: waiting for cpu %d "
+ "(physical %d) to enter %i state\n",
+ i, paca[i].hw_cpu_id, wait_state);
+ notified = i;
+ }
+ }
+ }
+ mb();
+}
+
+/*
+ * We need to make sure each present CPU is online. The next kernel will scan
+ * the device tree and assume primary threads are online and query secondary
+ * threads via RTAS to online them if required. If we don't online primary
+ * threads, they will be stuck. However, we also online secondary threads as we
+ * may be using 'cede offline'. In this case RTAS doesn't see the secondary
+ * threads as offline -- and again, these CPUs will be stuck.
+ *
+ * So, we online all CPUs that should be running, including secondary threads.
+ */
+static void wake_offline_cpus(void)
+{
+ int cpu = 0;
+
+ for_each_present_cpu(cpu) {
+ if (!cpu_online(cpu)) {
+ printk(KERN_INFO "kexec: Waking offline cpu %d.\n",
+ cpu);
+ cpu_up(cpu);
+ }
+ }
+}
+
+static void kexec_prepare_cpus(void)
+{
+ wake_offline_cpus();
+ smp_call_function(kexec_smp_down, NULL, /* wait */0);
+ local_irq_disable();
+ mb(); /* make sure IRQs are disabled before we say they are */
+ get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF;
+
+ kexec_prepare_cpus_wait(KEXEC_STATE_IRQS_OFF);
+ /* we are sure every CPU has IRQs off at this point */
+ kexec_all_irq_disabled = 1;
+
+ /* after we tell the others to go down */
+ if (ppc_md.kexec_cpu_down)
+ ppc_md.kexec_cpu_down(0, 0);
+
+ /*
+ * Before removing MMU mappings make sure all CPUs have entered real
+ * mode:
+ */
+ kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE);
+
+ put_cpu();
+}
+
+#else /* ! SMP */
+
+static void kexec_prepare_cpus(void)
+{
+ /*
+ * move the secondarys to us so that we can copy
+ * the new kernel 0-0x100 safely
+ *
+ * do this if kexec in setup.c ?
+ *
+ * We need to release the cpus if we are ever going from an
+ * UP to an SMP kernel.
+ */
+ smp_release_cpus();
+ if (ppc_md.kexec_cpu_down)
+ ppc_md.kexec_cpu_down(0, 0);
+ local_irq_disable();
+}
+
+#endif /* SMP */
+
+/*
+ * kexec thread structure and stack.
+ *
+ * We need to make sure that this is 16384-byte aligned due to the
+ * way process stacks are handled. It also must be statically allocated
+ * or allocated as part of the kimage, because everything else may be
+ * overwritten when we copy the kexec image. We piggyback on the
+ * "init_task" linker section here to statically allocate a stack.
+ *
+ * We could use a smaller stack if we don't care about anything using
+ * current, but that audit has not been performed.
+ */
+static union thread_union kexec_stack __init_task_data =
+ { };
+
+/*
+ * For similar reasons to the stack above, the kexecing CPU needs to be on a
+ * static PACA; we switch to kexec_paca.
+ */
+struct paca_struct kexec_paca;
+
+/* Our assembly helper, in kexec_stub.S */
+extern void kexec_sequence(void *newstack, unsigned long start,
+ void *image, void *control,
+ void (*clear_all)(void)) __noreturn;
+
+/* too late to fail here */
+void default_machine_kexec(struct kimage *image)
+{
+ /* prepare control code if any */
+
+ /*
+ * If the kexec boot is the normal one, need to shutdown other cpus
+ * into our wait loop and quiesce interrupts.
+ * Otherwise, in the case of crashed mode (crashing_cpu >= 0),
+ * stopping other CPUs and collecting their pt_regs is done before
+ * using debugger IPI.
+ */
+
+ if (crashing_cpu == -1)
+ kexec_prepare_cpus();
+
+ pr_debug("kexec: Starting switchover sequence.\n");
+
+ /* switch to a staticly allocated stack. Based on irq stack code.
+ * XXX: the task struct will likely be invalid once we do the copy!
+ */
+ kexec_stack.thread_info.task = current_thread_info()->task;
+ kexec_stack.thread_info.flags = 0;
+
+ /* We need a static PACA, too; copy this CPU's PACA over and switch to
+ * it. Also poison per_cpu_offset to catch anyone using non-static
+ * data.
+ */
+ memcpy(&kexec_paca, get_paca(), sizeof(struct paca_struct));
+ kexec_paca.data_offset = 0xedeaddeadeeeeeeeUL;
+ paca = (struct paca_struct *)RELOC_HIDE(&kexec_paca, 0) -
+ kexec_paca.paca_index;
+ setup_paca(&kexec_paca);
+
+ /* XXX: If anyone does 'dynamic lppacas' this will also need to be
+ * switched to a static version!
+ */
+
+ /* Some things are best done in assembly. Finding globals with
+ * a toc is easier in C, so pass in what we can.
+ */
+ kexec_sequence(&kexec_stack, image->start, image,
+ page_address(image->control_code_page),
+ ppc_md.hpte_clear_all);
+ /* NOTREACHED */
+}
+
+/* Values we need to export to the second kernel via the device tree. */
+static unsigned long htab_base;
+
+static struct property htab_base_prop = {
+ .name = "linux,htab-base",
+ .length = sizeof(unsigned long),
+ .value = &htab_base,
+};
+
+static struct property htab_size_prop = {
+ .name = "linux,htab-size",
+ .length = sizeof(unsigned long),
+ .value = &htab_size_bytes,
+};
+
+static int __init export_htab_values(void)
+{
+ struct device_node *node;
+ struct property *prop;
+
+ /* On machines with no htab htab_address is NULL */
+ if (!htab_address)
+ return -ENODEV;
+
+ node = of_find_node_by_path("/chosen");
+ if (!node)
+ return -ENODEV;
+
+ /* remove any stale propertys so ours can be found */
+ prop = of_find_property(node, htab_base_prop.name, NULL);
+ if (prop)
+ prom_remove_property(node, prop);
+ prop = of_find_property(node, htab_size_prop.name, NULL);
+ if (prop)
+ prom_remove_property(node, prop);
+
+ htab_base = __pa(htab_address);
+ prom_add_property(node, &htab_base_prop);
+ prom_add_property(node, &htab_size_prop);
+
+ of_node_put(node);
+ return 0;
+}
+late_initcall(export_htab_values);
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
new file mode 100644
index 00000000..ba16874f
--- /dev/null
+++ b/arch/powerpc/kernel/misc.S
@@ -0,0 +1,123 @@
+/*
+ * This file contains miscellaneous low-level functions.
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
+ * and Paul Mackerras.
+ *
+ * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com)
+ *
+ * setjmp/longjmp code by Paul Mackerras.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/ppc_asm.h>
+#include <asm/unistd.h>
+#include <asm/asm-compat.h>
+#include <asm/asm-offsets.h>
+
+ .text
+
+/*
+ * Returns (address we are running at) - (address we were linked at)
+ * for use before the text and data are mapped to KERNELBASE.
+ */
+
+_GLOBAL(reloc_offset)
+ mflr r0
+ bl 1f
+1: mflr r3
+ PPC_LL r4,(2f-1b)(r3)
+ subf r3,r4,r3
+ mtlr r0
+ blr
+
+ .align 3
+2: PPC_LONG 1b
+
+/*
+ * add_reloc_offset(x) returns x + reloc_offset().
+ */
+_GLOBAL(add_reloc_offset)
+ mflr r0
+ bl 1f
+1: mflr r5
+ PPC_LL r4,(2f-1b)(r5)
+ subf r5,r4,r5
+ add r3,r3,r5
+ mtlr r0
+ blr
+
+ .align 3
+2: PPC_LONG 1b
+
+_GLOBAL(kernel_execve)
+ li r0,__NR_execve
+ sc
+ bnslr
+ neg r3,r3
+ blr
+
+_GLOBAL(setjmp)
+ mflr r0
+ PPC_STL r0,0(r3)
+ PPC_STL r1,SZL(r3)
+ PPC_STL r2,2*SZL(r3)
+ mfcr r0
+ PPC_STL r0,3*SZL(r3)
+ PPC_STL r13,4*SZL(r3)
+ PPC_STL r14,5*SZL(r3)
+ PPC_STL r15,6*SZL(r3)
+ PPC_STL r16,7*SZL(r3)
+ PPC_STL r17,8*SZL(r3)
+ PPC_STL r18,9*SZL(r3)
+ PPC_STL r19,10*SZL(r3)
+ PPC_STL r20,11*SZL(r3)
+ PPC_STL r21,12*SZL(r3)
+ PPC_STL r22,13*SZL(r3)
+ PPC_STL r23,14*SZL(r3)
+ PPC_STL r24,15*SZL(r3)
+ PPC_STL r25,16*SZL(r3)
+ PPC_STL r26,17*SZL(r3)
+ PPC_STL r27,18*SZL(r3)
+ PPC_STL r28,19*SZL(r3)
+ PPC_STL r29,20*SZL(r3)
+ PPC_STL r30,21*SZL(r3)
+ PPC_STL r31,22*SZL(r3)
+ li r3,0
+ blr
+
+_GLOBAL(longjmp)
+ PPC_LCMPI r4,0
+ bne 1f
+ li r4,1
+1: PPC_LL r13,4*SZL(r3)
+ PPC_LL r14,5*SZL(r3)
+ PPC_LL r15,6*SZL(r3)
+ PPC_LL r16,7*SZL(r3)
+ PPC_LL r17,8*SZL(r3)
+ PPC_LL r18,9*SZL(r3)
+ PPC_LL r19,10*SZL(r3)
+ PPC_LL r20,11*SZL(r3)
+ PPC_LL r21,12*SZL(r3)
+ PPC_LL r22,13*SZL(r3)
+ PPC_LL r23,14*SZL(r3)
+ PPC_LL r24,15*SZL(r3)
+ PPC_LL r25,16*SZL(r3)
+ PPC_LL r26,17*SZL(r3)
+ PPC_LL r27,18*SZL(r3)
+ PPC_LL r28,19*SZL(r3)
+ PPC_LL r29,20*SZL(r3)
+ PPC_LL r30,21*SZL(r3)
+ PPC_LL r31,22*SZL(r3)
+ PPC_LL r0,3*SZL(r3)
+ mtcrf 0x38,r0
+ PPC_LL r0,0(r3)
+ PPC_LL r1,SZL(r3)
+ PPC_LL r2,2*SZL(r3)
+ mtlr r0
+ mr r3,r4
+ blr
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
new file mode 100644
index 00000000..7cd07b42
--- /dev/null
+++ b/arch/powerpc/kernel/misc_32.S
@@ -0,0 +1,1010 @@
+/*
+ * This file contains miscellaneous low-level functions.
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
+ * and Paul Mackerras.
+ *
+ * kexec bits:
+ * Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com>
+ * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
+ * PPC44x port. Copyright (C) 2011, IBM Corporation
+ * Author: Suzuki Poulose <suzuki@in.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/sys.h>
+#include <asm/unistd.h>
+#include <asm/errno.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/cache.h>
+#include <asm/cputable.h>
+#include <asm/mmu.h>
+#include <asm/ppc_asm.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+#include <asm/processor.h>
+#include <asm/kexec.h>
+#include <asm/bug.h>
+#include <asm/ptrace.h>
+
+ .text
+
+_GLOBAL(call_do_softirq)
+ mflr r0
+ stw r0,4(r1)
+ stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
+ mr r1,r3
+ bl __do_softirq
+ lwz r1,0(r1)
+ lwz r0,4(r1)
+ mtlr r0
+ blr
+
+_GLOBAL(call_handle_irq)
+ mflr r0
+ stw r0,4(r1)
+ mtctr r6
+ stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5)
+ mr r1,r5
+ bctrl
+ lwz r1,0(r1)
+ lwz r0,4(r1)
+ mtlr r0
+ blr
+
+/*
+ * This returns the high 64 bits of the product of two 64-bit numbers.
+ */
+_GLOBAL(mulhdu)
+ cmpwi r6,0
+ cmpwi cr1,r3,0
+ mr r10,r4
+ mulhwu r4,r4,r5
+ beq 1f
+ mulhwu r0,r10,r6
+ mullw r7,r10,r5
+ addc r7,r0,r7
+ addze r4,r4
+1: beqlr cr1 /* all done if high part of A is 0 */
+ mr r10,r3
+ mullw r9,r3,r5
+ mulhwu r3,r3,r5
+ beq 2f
+ mullw r0,r10,r6
+ mulhwu r8,r10,r6
+ addc r7,r0,r7
+ adde r4,r4,r8
+ addze r3,r3
+2: addc r4,r4,r9
+ addze r3,r3
+ blr
+
+/*
+ * sub_reloc_offset(x) returns x - reloc_offset().
+ */
+_GLOBAL(sub_reloc_offset)
+ mflr r0
+ bl 1f
+1: mflr r5
+ lis r4,1b@ha
+ addi r4,r4,1b@l
+ subf r5,r4,r5
+ subf r3,r5,r3
+ mtlr r0
+ blr
+
+/*
+ * reloc_got2 runs through the .got2 section adding an offset
+ * to each entry.
+ */
+_GLOBAL(reloc_got2)
+ mflr r11
+ lis r7,__got2_start@ha
+ addi r7,r7,__got2_start@l
+ lis r8,__got2_end@ha
+ addi r8,r8,__got2_end@l
+ subf r8,r7,r8
+ srwi. r8,r8,2
+ beqlr
+ mtctr r8
+ bl 1f
+1: mflr r0
+ lis r4,1b@ha
+ addi r4,r4,1b@l
+ subf r0,r4,r0
+ add r7,r0,r7
+2: lwz r0,0(r7)
+ add r0,r0,r3
+ stw r0,0(r7)
+ addi r7,r7,4
+ bdnz 2b
+ mtlr r11
+ blr
+
+/*
+ * call_setup_cpu - call the setup_cpu function for this cpu
+ * r3 = data offset, r24 = cpu number
+ *
+ * Setup function is called with:
+ * r3 = data offset
+ * r4 = ptr to CPU spec (relocated)
+ */
+_GLOBAL(call_setup_cpu)
+ addis r4,r3,cur_cpu_spec@ha
+ addi r4,r4,cur_cpu_spec@l
+ lwz r4,0(r4)
+ add r4,r4,r3
+ lwz r5,CPU_SPEC_SETUP(r4)
+ cmpwi 0,r5,0
+ add r5,r5,r3
+ beqlr
+ mtctr r5
+ bctr
+
+#if defined(CONFIG_CPU_FREQ_PMAC) && defined(CONFIG_6xx)
+
+/* This gets called by via-pmu.c to switch the PLL selection
+ * on 750fx CPU. This function should really be moved to some
+ * other place (as most of the cpufreq code in via-pmu
+ */
+_GLOBAL(low_choose_750fx_pll)
+ /* Clear MSR:EE */
+ mfmsr r7
+ rlwinm r0,r7,0,17,15
+ mtmsr r0
+
+ /* If switching to PLL1, disable HID0:BTIC */
+ cmplwi cr0,r3,0
+ beq 1f
+ mfspr r5,SPRN_HID0
+ rlwinm r5,r5,0,27,25
+ sync
+ mtspr SPRN_HID0,r5
+ isync
+ sync
+
+1:
+ /* Calc new HID1 value */
+ mfspr r4,SPRN_HID1 /* Build a HID1:PS bit from parameter */
+ rlwinm r5,r3,16,15,15 /* Clear out HID1:PS from value read */
+ rlwinm r4,r4,0,16,14 /* Could have I used rlwimi here ? */
+ or r4,r4,r5
+ mtspr SPRN_HID1,r4
+
+ /* Store new HID1 image */
+ rlwinm r6,r1,0,0,(31-THREAD_SHIFT)
+ lwz r6,TI_CPU(r6)
+ slwi r6,r6,2
+ addis r6,r6,nap_save_hid1@ha
+ stw r4,nap_save_hid1@l(r6)
+
+ /* If switching to PLL0, enable HID0:BTIC */
+ cmplwi cr0,r3,0
+ bne 1f
+ mfspr r5,SPRN_HID0
+ ori r5,r5,HID0_BTIC
+ sync
+ mtspr SPRN_HID0,r5
+ isync
+ sync
+
+1:
+ /* Return */
+ mtmsr r7
+ blr
+
+_GLOBAL(low_choose_7447a_dfs)
+ /* Clear MSR:EE */
+ mfmsr r7
+ rlwinm r0,r7,0,17,15
+ mtmsr r0
+
+ /* Calc new HID1 value */
+ mfspr r4,SPRN_HID1
+ insrwi r4,r3,1,9 /* insert parameter into bit 9 */
+ sync
+ mtspr SPRN_HID1,r4
+ sync
+ isync
+
+ /* Return */
+ mtmsr r7
+ blr
+
+#endif /* CONFIG_CPU_FREQ_PMAC && CONFIG_6xx */
+
+/*
+ * complement mask on the msr then "or" some values on.
+ * _nmask_and_or_msr(nmask, value_to_or)
+ */
+_GLOBAL(_nmask_and_or_msr)
+ mfmsr r0 /* Get current msr */
+ andc r0,r0,r3 /* And off the bits set in r3 (first parm) */
+ or r0,r0,r4 /* Or on the bits in r4 (second parm) */
+ SYNC /* Some chip revs have problems here... */
+ mtmsr r0 /* Update machine state */
+ isync
+ blr /* Done */
+
+#ifdef CONFIG_40x
+
+/*
+ * Do an IO access in real mode
+ */
+_GLOBAL(real_readb)
+ mfmsr r7
+ ori r0,r7,MSR_DR
+ xori r0,r0,MSR_DR
+ sync
+ mtmsr r0
+ sync
+ isync
+ lbz r3,0(r3)
+ sync
+ mtmsr r7
+ sync
+ isync
+ blr
+
+ /*
+ * Do an IO access in real mode
+ */
+_GLOBAL(real_writeb)
+ mfmsr r7
+ ori r0,r7,MSR_DR
+ xori r0,r0,MSR_DR
+ sync
+ mtmsr r0
+ sync
+ isync
+ stb r3,0(r4)
+ sync
+ mtmsr r7
+ sync
+ isync
+ blr
+
+#endif /* CONFIG_40x */
+
+
+/*
+ * Flush instruction cache.
+ * This is a no-op on the 601.
+ */
+_GLOBAL(flush_instruction_cache)
+#if defined(CONFIG_8xx)
+ isync
+ lis r5, IDC_INVALL@h
+ mtspr SPRN_IC_CST, r5
+#elif defined(CONFIG_4xx)
+#ifdef CONFIG_403GCX
+ li r3, 512
+ mtctr r3
+ lis r4, KERNELBASE@h
+1: iccci 0, r4
+ addi r4, r4, 16
+ bdnz 1b
+#else
+ lis r3, KERNELBASE@h
+ iccci 0,r3
+#endif
+#elif CONFIG_FSL_BOOKE
+BEGIN_FTR_SECTION
+ mfspr r3,SPRN_L1CSR0
+ ori r3,r3,L1CSR0_CFI|L1CSR0_CLFC
+ /* msync; isync recommended here */
+ mtspr SPRN_L1CSR0,r3
+ isync
+ blr
+END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE)
+ mfspr r3,SPRN_L1CSR1
+ ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR
+ mtspr SPRN_L1CSR1,r3
+#else
+ mfspr r3,SPRN_PVR
+ rlwinm r3,r3,16,16,31
+ cmpwi 0,r3,1
+ beqlr /* for 601, do nothing */
+ /* 603/604 processor - use invalidate-all bit in HID0 */
+ mfspr r3,SPRN_HID0
+ ori r3,r3,HID0_ICFI
+ mtspr SPRN_HID0,r3
+#endif /* CONFIG_8xx/4xx */
+ isync
+ blr
+
+/*
+ * Write any modified data cache blocks out to memory
+ * and invalidate the corresponding instruction cache blocks.
+ * This is a no-op on the 601.
+ *
+ * flush_icache_range(unsigned long start, unsigned long stop)
+ */
+_KPROBE(__flush_icache_range)
+BEGIN_FTR_SECTION
+ blr /* for 601, do nothing */
+END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
+ li r5,L1_CACHE_BYTES-1
+ andc r3,r3,r5
+ subf r4,r3,r4
+ add r4,r4,r5
+ srwi. r4,r4,L1_CACHE_SHIFT
+ beqlr
+ mtctr r4
+ mr r6,r3
+1: dcbst 0,r3
+ addi r3,r3,L1_CACHE_BYTES
+ bdnz 1b
+ sync /* wait for dcbst's to get to ram */
+#ifndef CONFIG_44x
+ mtctr r4
+2: icbi 0,r6
+ addi r6,r6,L1_CACHE_BYTES
+ bdnz 2b
+#else
+ /* Flash invalidate on 44x because we are passed kmapped addresses and
+ this doesn't work for userspace pages due to the virtually tagged
+ icache. Sigh. */
+ iccci 0, r0
+#endif
+ sync /* additional sync needed on g4 */
+ isync
+ blr
+/*
+ * Write any modified data cache blocks out to memory.
+ * Does not invalidate the corresponding cache lines (especially for
+ * any corresponding instruction cache).
+ *
+ * clean_dcache_range(unsigned long start, unsigned long stop)
+ */
+_GLOBAL(clean_dcache_range)
+ li r5,L1_CACHE_BYTES-1
+ andc r3,r3,r5
+ subf r4,r3,r4
+ add r4,r4,r5
+ srwi. r4,r4,L1_CACHE_SHIFT
+ beqlr
+ mtctr r4
+
+1: dcbst 0,r3
+ addi r3,r3,L1_CACHE_BYTES
+ bdnz 1b
+ sync /* wait for dcbst's to get to ram */
+ blr
+
+/*
+ * Write any modified data cache blocks out to memory and invalidate them.
+ * Does not invalidate the corresponding instruction cache blocks.
+ *
+ * flush_dcache_range(unsigned long start, unsigned long stop)
+ */
+_GLOBAL(flush_dcache_range)
+ li r5,L1_CACHE_BYTES-1
+ andc r3,r3,r5
+ subf r4,r3,r4
+ add r4,r4,r5
+ srwi. r4,r4,L1_CACHE_SHIFT
+ beqlr
+ mtctr r4
+
+1: dcbf 0,r3
+ addi r3,r3,L1_CACHE_BYTES
+ bdnz 1b
+ sync /* wait for dcbst's to get to ram */
+ blr
+
+/*
+ * Like above, but invalidate the D-cache. This is used by the 8xx
+ * to invalidate the cache so the PPC core doesn't get stale data
+ * from the CPM (no cache snooping here :-).
+ *
+ * invalidate_dcache_range(unsigned long start, unsigned long stop)
+ */
+_GLOBAL(invalidate_dcache_range)
+ li r5,L1_CACHE_BYTES-1
+ andc r3,r3,r5
+ subf r4,r3,r4
+ add r4,r4,r5
+ srwi. r4,r4,L1_CACHE_SHIFT
+ beqlr
+ mtctr r4
+
+1: dcbi 0,r3
+ addi r3,r3,L1_CACHE_BYTES
+ bdnz 1b
+ sync /* wait for dcbi's to get to ram */
+ blr
+
+/*
+ * Flush a particular page from the data cache to RAM.
+ * Note: this is necessary because the instruction cache does *not*
+ * snoop from the data cache.
+ * This is a no-op on the 601 which has a unified cache.
+ *
+ * void __flush_dcache_icache(void *page)
+ */
+_GLOBAL(__flush_dcache_icache)
+BEGIN_FTR_SECTION
+ blr
+END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
+ rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */
+ li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */
+ mtctr r4
+ mr r6,r3
+0: dcbst 0,r3 /* Write line to ram */
+ addi r3,r3,L1_CACHE_BYTES
+ bdnz 0b
+ sync
+#ifdef CONFIG_44x
+ /* We don't flush the icache on 44x. Those have a virtual icache
+ * and we don't have access to the virtual address here (it's
+ * not the page vaddr but where it's mapped in user space). The
+ * flushing of the icache on these is handled elsewhere, when
+ * a change in the address space occurs, before returning to
+ * user space
+ */
+BEGIN_MMU_FTR_SECTION
+ blr
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x)
+#endif /* CONFIG_44x */
+ mtctr r4
+1: icbi 0,r6
+ addi r6,r6,L1_CACHE_BYTES
+ bdnz 1b
+ sync
+ isync
+ blr
+
+#ifndef CONFIG_BOOKE
+/*
+ * Flush a particular page from the data cache to RAM, identified
+ * by its physical address. We turn off the MMU so we can just use
+ * the physical address (this may be a highmem page without a kernel
+ * mapping).
+ *
+ * void __flush_dcache_icache_phys(unsigned long physaddr)
+ */
+_GLOBAL(__flush_dcache_icache_phys)
+BEGIN_FTR_SECTION
+ blr /* for 601, do nothing */
+END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
+ mfmsr r10
+ rlwinm r0,r10,0,28,26 /* clear DR */
+ mtmsr r0
+ isync
+ rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */
+ li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */
+ mtctr r4
+ mr r6,r3
+0: dcbst 0,r3 /* Write line to ram */
+ addi r3,r3,L1_CACHE_BYTES
+ bdnz 0b
+ sync
+ mtctr r4
+1: icbi 0,r6
+ addi r6,r6,L1_CACHE_BYTES
+ bdnz 1b
+ sync
+ mtmsr r10 /* restore DR */
+ isync
+ blr
+#endif /* CONFIG_BOOKE */
+
+/*
+ * Clear pages using the dcbz instruction, which doesn't cause any
+ * memory traffic (except to write out any cache lines which get
+ * displaced). This only works on cacheable memory.
+ *
+ * void clear_pages(void *page, int order) ;
+ */
+_GLOBAL(clear_pages)
+ li r0,PAGE_SIZE/L1_CACHE_BYTES
+ slw r0,r0,r4
+ mtctr r0
+1: dcbz 0,r3
+ addi r3,r3,L1_CACHE_BYTES
+ bdnz 1b
+ blr
+
+/*
+ * Copy a whole page. We use the dcbz instruction on the destination
+ * to reduce memory traffic (it eliminates the unnecessary reads of
+ * the destination into cache). This requires that the destination
+ * is cacheable.
+ */
+#define COPY_16_BYTES \
+ lwz r6,4(r4); \
+ lwz r7,8(r4); \
+ lwz r8,12(r4); \
+ lwzu r9,16(r4); \
+ stw r6,4(r3); \
+ stw r7,8(r3); \
+ stw r8,12(r3); \
+ stwu r9,16(r3)
+
+_GLOBAL(copy_page)
+ addi r3,r3,-4
+ addi r4,r4,-4
+
+ li r5,4
+
+#if MAX_COPY_PREFETCH > 1
+ li r0,MAX_COPY_PREFETCH
+ li r11,4
+ mtctr r0
+11: dcbt r11,r4
+ addi r11,r11,L1_CACHE_BYTES
+ bdnz 11b
+#else /* MAX_COPY_PREFETCH == 1 */
+ dcbt r5,r4
+ li r11,L1_CACHE_BYTES+4
+#endif /* MAX_COPY_PREFETCH */
+ li r0,PAGE_SIZE/L1_CACHE_BYTES - MAX_COPY_PREFETCH
+ crclr 4*cr0+eq
+2:
+ mtctr r0
+1:
+ dcbt r11,r4
+ dcbz r5,r3
+ COPY_16_BYTES
+#if L1_CACHE_BYTES >= 32
+ COPY_16_BYTES
+#if L1_CACHE_BYTES >= 64
+ COPY_16_BYTES
+ COPY_16_BYTES
+#if L1_CACHE_BYTES >= 128
+ COPY_16_BYTES
+ COPY_16_BYTES
+ COPY_16_BYTES
+ COPY_16_BYTES
+#endif
+#endif
+#endif
+ bdnz 1b
+ beqlr
+ crnot 4*cr0+eq,4*cr0+eq
+ li r0,MAX_COPY_PREFETCH
+ li r11,4
+ b 2b
+
+/*
+ * void atomic_clear_mask(atomic_t mask, atomic_t *addr)
+ * void atomic_set_mask(atomic_t mask, atomic_t *addr);
+ */
+_GLOBAL(atomic_clear_mask)
+10: lwarx r5,0,r4
+ andc r5,r5,r3
+ PPC405_ERR77(0,r4)
+ stwcx. r5,0,r4
+ bne- 10b
+ blr
+_GLOBAL(atomic_set_mask)
+10: lwarx r5,0,r4
+ or r5,r5,r3
+ PPC405_ERR77(0,r4)
+ stwcx. r5,0,r4
+ bne- 10b
+ blr
+
+/*
+ * Extended precision shifts.
+ *
+ * Updated to be valid for shift counts from 0 to 63 inclusive.
+ * -- Gabriel
+ *
+ * R3/R4 has 64 bit value
+ * R5 has shift count
+ * result in R3/R4
+ *
+ * ashrdi3: arithmetic right shift (sign propagation)
+ * lshrdi3: logical right shift
+ * ashldi3: left shift
+ */
+_GLOBAL(__ashrdi3)
+ subfic r6,r5,32
+ srw r4,r4,r5 # LSW = count > 31 ? 0 : LSW >> count
+ addi r7,r5,32 # could be xori, or addi with -32
+ slw r6,r3,r6 # t1 = count > 31 ? 0 : MSW << (32-count)
+ rlwinm r8,r7,0,32 # t3 = (count < 32) ? 32 : 0
+ sraw r7,r3,r7 # t2 = MSW >> (count-32)
+ or r4,r4,r6 # LSW |= t1
+ slw r7,r7,r8 # t2 = (count < 32) ? 0 : t2
+ sraw r3,r3,r5 # MSW = MSW >> count
+ or r4,r4,r7 # LSW |= t2
+ blr
+
+_GLOBAL(__ashldi3)
+ subfic r6,r5,32
+ slw r3,r3,r5 # MSW = count > 31 ? 0 : MSW << count
+ addi r7,r5,32 # could be xori, or addi with -32
+ srw r6,r4,r6 # t1 = count > 31 ? 0 : LSW >> (32-count)
+ slw r7,r4,r7 # t2 = count < 32 ? 0 : LSW << (count-32)
+ or r3,r3,r6 # MSW |= t1
+ slw r4,r4,r5 # LSW = LSW << count
+ or r3,r3,r7 # MSW |= t2
+ blr
+
+_GLOBAL(__lshrdi3)
+ subfic r6,r5,32
+ srw r4,r4,r5 # LSW = count > 31 ? 0 : LSW >> count
+ addi r7,r5,32 # could be xori, or addi with -32
+ slw r6,r3,r6 # t1 = count > 31 ? 0 : MSW << (32-count)
+ srw r7,r3,r7 # t2 = count < 32 ? 0 : MSW >> (count-32)
+ or r4,r4,r6 # LSW |= t1
+ srw r3,r3,r5 # MSW = MSW >> count
+ or r4,r4,r7 # LSW |= t2
+ blr
+
+/*
+ * 64-bit comparison: __ucmpdi2(u64 a, u64 b)
+ * Returns 0 if a < b, 1 if a == b, 2 if a > b.
+ */
+_GLOBAL(__ucmpdi2)
+ cmplw r3,r5
+ li r3,1
+ bne 1f
+ cmplw r4,r6
+ beqlr
+1: li r3,0
+ bltlr
+ li r3,2
+ blr
+
+_GLOBAL(abs)
+ srawi r4,r3,31
+ xor r3,r3,r4
+ sub r3,r3,r4
+ blr
+
+/*
+ * Create a kernel thread
+ * kernel_thread(fn, arg, flags)
+ */
+_GLOBAL(kernel_thread)
+ stwu r1,-16(r1)
+ stw r30,8(r1)
+ stw r31,12(r1)
+ mr r30,r3 /* function */
+ mr r31,r4 /* argument */
+ ori r3,r5,CLONE_VM /* flags */
+ oris r3,r3,CLONE_UNTRACED>>16
+ li r4,0 /* new sp (unused) */
+ li r0,__NR_clone
+ sc
+ bns+ 1f /* did system call indicate error? */
+ neg r3,r3 /* if so, make return code negative */
+1: cmpwi 0,r3,0 /* parent or child? */
+ bne 2f /* return if parent */
+ li r0,0 /* make top-level stack frame */
+ stwu r0,-16(r1)
+ mtlr r30 /* fn addr in lr */
+ mr r3,r31 /* load arg and call fn */
+ PPC440EP_ERR42
+ blrl
+ li r0,__NR_exit /* exit if function returns */
+ li r3,0
+ sc
+2: lwz r30,8(r1)
+ lwz r31,12(r1)
+ addi r1,r1,16
+ blr
+
+#ifdef CONFIG_SMP
+_GLOBAL(start_secondary_resume)
+ /* Reset stack */
+ rlwinm r1,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */
+ addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+ li r3,0
+ stw r3,0(r1) /* Zero the stack frame pointer */
+ bl start_secondary
+ b .
+#endif /* CONFIG_SMP */
+
+/*
+ * This routine is just here to keep GCC happy - sigh...
+ */
+_GLOBAL(__main)
+ blr
+
+#ifdef CONFIG_KEXEC
+ /*
+ * Must be relocatable PIC code callable as a C function.
+ */
+ .globl relocate_new_kernel
+relocate_new_kernel:
+ /* r3 = page_list */
+ /* r4 = reboot_code_buffer */
+ /* r5 = start_address */
+
+#ifdef CONFIG_FSL_BOOKE
+
+ mr r29, r3
+ mr r30, r4
+ mr r31, r5
+
+#define ENTRY_MAPPING_KEXEC_SETUP
+#include "fsl_booke_entry_mapping.S"
+#undef ENTRY_MAPPING_KEXEC_SETUP
+
+ mr r3, r29
+ mr r4, r30
+ mr r5, r31
+
+ li r0, 0
+#elif defined(CONFIG_44x) && !defined(CONFIG_PPC_47x)
+
+/*
+ * Code for setting up 1:1 mapping for PPC440x for KEXEC
+ *
+ * We cannot switch off the MMU on PPC44x.
+ * So we:
+ * 1) Invalidate all the mappings except the one we are running from.
+ * 2) Create a tmp mapping for our code in the other address space(TS) and
+ * jump to it. Invalidate the entry we started in.
+ * 3) Create a 1:1 mapping for 0-2GiB in chunks of 256M in original TS.
+ * 4) Jump to the 1:1 mapping in original TS.
+ * 5) Invalidate the tmp mapping.
+ *
+ * - Based on the kexec support code for FSL BookE
+ * - Doesn't support 47x yet.
+ *
+ */
+ /* Save our parameters */
+ mr r29, r3
+ mr r30, r4
+ mr r31, r5
+
+ /* Load our MSR_IS and TID to MMUCR for TLB search */
+ mfspr r3,SPRN_PID
+ mfmsr r4
+ andi. r4,r4,MSR_IS@l
+ beq wmmucr
+ oris r3,r3,PPC44x_MMUCR_STS@h
+wmmucr:
+ mtspr SPRN_MMUCR,r3
+ sync
+
+ /*
+ * Invalidate all the TLB entries except the current entry
+ * where we are running from
+ */
+ bl 0f /* Find our address */
+0: mflr r5 /* Make it accessible */
+ tlbsx r23,0,r5 /* Find entry we are in */
+ li r4,0 /* Start at TLB entry 0 */
+ li r3,0 /* Set PAGEID inval value */
+1: cmpw r23,r4 /* Is this our entry? */
+ beq skip /* If so, skip the inval */
+ tlbwe r3,r4,PPC44x_TLB_PAGEID /* If not, inval the entry */
+skip:
+ addi r4,r4,1 /* Increment */
+ cmpwi r4,64 /* Are we done? */
+ bne 1b /* If not, repeat */
+ isync
+
+ /* Create a temp mapping and jump to it */
+ andi. r6, r23, 1 /* Find the index to use */
+ addi r24, r6, 1 /* r24 will contain 1 or 2 */
+
+ mfmsr r9 /* get the MSR */
+ rlwinm r5, r9, 27, 31, 31 /* Extract the MSR[IS] */
+ xori r7, r5, 1 /* Use the other address space */
+
+ /* Read the current mapping entries */
+ tlbre r3, r23, PPC44x_TLB_PAGEID
+ tlbre r4, r23, PPC44x_TLB_XLAT
+ tlbre r5, r23, PPC44x_TLB_ATTRIB
+
+ /* Save our current XLAT entry */
+ mr r25, r4
+
+ /* Extract the TLB PageSize */
+ li r10, 1 /* r10 will hold PageSize */
+ rlwinm r11, r3, 0, 24, 27 /* bits 24-27 */
+
+ /* XXX: As of now we use 256M, 4K pages */
+ cmpwi r11, PPC44x_TLB_256M
+ bne tlb_4k
+ rotlwi r10, r10, 28 /* r10 = 256M */
+ b write_out
+tlb_4k:
+ cmpwi r11, PPC44x_TLB_4K
+ bne default
+ rotlwi r10, r10, 12 /* r10 = 4K */
+ b write_out
+default:
+ rotlwi r10, r10, 10 /* r10 = 1K */
+
+write_out:
+ /*
+ * Write out the tmp 1:1 mapping for this code in other address space
+ * Fixup EPN = RPN , TS=other address space
+ */
+ insrwi r3, r7, 1, 23 /* Bit 23 is TS for PAGEID field */
+
+ /* Write out the tmp mapping entries */
+ tlbwe r3, r24, PPC44x_TLB_PAGEID
+ tlbwe r4, r24, PPC44x_TLB_XLAT
+ tlbwe r5, r24, PPC44x_TLB_ATTRIB
+
+ subi r11, r10, 1 /* PageOffset Mask = PageSize - 1 */
+ not r10, r11 /* Mask for PageNum */
+
+ /* Switch to other address space in MSR */
+ insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */
+
+ bl 1f
+1: mflr r8
+ addi r8, r8, (2f-1b) /* Find the target offset */
+
+ /* Jump to the tmp mapping */
+ mtspr SPRN_SRR0, r8
+ mtspr SPRN_SRR1, r9
+ rfi
+
+2:
+ /* Invalidate the entry we were executing from */
+ li r3, 0
+ tlbwe r3, r23, PPC44x_TLB_PAGEID
+
+ /* attribute fields. rwx for SUPERVISOR mode */
+ li r5, 0
+ ori r5, r5, (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G)
+
+ /* Create 1:1 mapping in 256M pages */
+ xori r7, r7, 1 /* Revert back to Original TS */
+
+ li r8, 0 /* PageNumber */
+ li r6, 3 /* TLB Index, start at 3 */
+
+next_tlb:
+ rotlwi r3, r8, 28 /* Create EPN (bits 0-3) */
+ mr r4, r3 /* RPN = EPN */
+ ori r3, r3, (PPC44x_TLB_VALID | PPC44x_TLB_256M) /* SIZE = 256M, Valid */
+ insrwi r3, r7, 1, 23 /* Set TS from r7 */
+
+ tlbwe r3, r6, PPC44x_TLB_PAGEID /* PageID field : EPN, V, SIZE */
+ tlbwe r4, r6, PPC44x_TLB_XLAT /* Address translation : RPN */
+ tlbwe r5, r6, PPC44x_TLB_ATTRIB /* Attributes */
+
+ addi r8, r8, 1 /* Increment PN */
+ addi r6, r6, 1 /* Increment TLB Index */
+ cmpwi r8, 8 /* Are we done ? */
+ bne next_tlb
+ isync
+
+ /* Jump to the new mapping 1:1 */
+ li r9,0
+ insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */
+
+ bl 1f
+1: mflr r8
+ and r8, r8, r11 /* Get our offset within page */
+ addi r8, r8, (2f-1b)
+
+ and r5, r25, r10 /* Get our target PageNum */
+ or r8, r8, r5 /* Target jump address */
+
+ mtspr SPRN_SRR0, r8
+ mtspr SPRN_SRR1, r9
+ rfi
+2:
+ /* Invalidate the tmp entry we used */
+ li r3, 0
+ tlbwe r3, r24, PPC44x_TLB_PAGEID
+ sync
+
+ /* Restore the parameters */
+ mr r3, r29
+ mr r4, r30
+ mr r5, r31
+
+ li r0, 0
+#else
+ li r0, 0
+
+ /*
+ * Set Machine Status Register to a known status,
+ * switch the MMU off and jump to 1: in a single step.
+ */
+
+ mr r8, r0
+ ori r8, r8, MSR_RI|MSR_ME
+ mtspr SPRN_SRR1, r8
+ addi r8, r4, 1f - relocate_new_kernel
+ mtspr SPRN_SRR0, r8
+ sync
+ rfi
+
+1:
+#endif
+ /* from this point address translation is turned off */
+ /* and interrupts are disabled */
+
+ /* set a new stack at the bottom of our page... */
+ /* (not really needed now) */
+ addi r1, r4, KEXEC_CONTROL_PAGE_SIZE - 8 /* for LR Save+Back Chain */
+ stw r0, 0(r1)
+
+ /* Do the copies */
+ li r6, 0 /* checksum */
+ mr r0, r3
+ b 1f
+
+0: /* top, read another word for the indirection page */
+ lwzu r0, 4(r3)
+
+1:
+ /* is it a destination page? (r8) */
+ rlwinm. r7, r0, 0, 31, 31 /* IND_DESTINATION (1<<0) */
+ beq 2f
+
+ rlwinm r8, r0, 0, 0, 19 /* clear kexec flags, page align */
+ b 0b
+
+2: /* is it an indirection page? (r3) */
+ rlwinm. r7, r0, 0, 30, 30 /* IND_INDIRECTION (1<<1) */
+ beq 2f
+
+ rlwinm r3, r0, 0, 0, 19 /* clear kexec flags, page align */
+ subi r3, r3, 4
+ b 0b
+
+2: /* are we done? */
+ rlwinm. r7, r0, 0, 29, 29 /* IND_DONE (1<<2) */
+ beq 2f
+ b 3f
+
+2: /* is it a source page? (r9) */
+ rlwinm. r7, r0, 0, 28, 28 /* IND_SOURCE (1<<3) */
+ beq 0b
+
+ rlwinm r9, r0, 0, 0, 19 /* clear kexec flags, page align */
+
+ li r7, PAGE_SIZE / 4
+ mtctr r7
+ subi r9, r9, 4
+ subi r8, r8, 4
+9:
+ lwzu r0, 4(r9) /* do the copy */
+ xor r6, r6, r0
+ stwu r0, 4(r8)
+ dcbst 0, r8
+ sync
+ icbi 0, r8
+ bdnz 9b
+
+ addi r9, r9, 4
+ addi r8, r8, 4
+ b 0b
+
+3:
+
+ /* To be certain of avoiding problems with self-modifying code
+ * execute a serializing instruction here.
+ */
+ isync
+ sync
+
+ mfspr r3, SPRN_PIR /* current core we are running on */
+ mr r4, r5 /* load physical address of chunk called */
+
+ /* jump to the entry point, usually the setup routine */
+ mtlr r5
+ blrl
+
+1: b 1b
+
+relocate_new_kernel_end:
+
+ .globl relocate_new_kernel_size
+relocate_new_kernel_size:
+ .long relocate_new_kernel_end - relocate_new_kernel
+#endif
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
new file mode 100644
index 00000000..616921ef
--- /dev/null
+++ b/arch/powerpc/kernel/misc_64.S
@@ -0,0 +1,633 @@
+/*
+ * This file contains miscellaneous low-level functions.
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
+ * and Paul Mackerras.
+ * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com)
+ * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/sys.h>
+#include <asm/unistd.h>
+#include <asm/errno.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cache.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/kexec.h>
+#include <asm/ptrace.h>
+
+ .text
+
+_GLOBAL(call_do_softirq)
+ mflr r0
+ std r0,16(r1)
+ stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
+ mr r1,r3
+ bl .__do_softirq
+ ld r1,0(r1)
+ ld r0,16(r1)
+ mtlr r0
+ blr
+
+_GLOBAL(call_handle_irq)
+ ld r8,0(r6)
+ mflr r0
+ std r0,16(r1)
+ mtctr r8
+ stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5)
+ mr r1,r5
+ bctrl
+ ld r1,0(r1)
+ ld r0,16(r1)
+ mtlr r0
+ blr
+
+ .section ".toc","aw"
+PPC64_CACHES:
+ .tc ppc64_caches[TC],ppc64_caches
+ .section ".text"
+
+/*
+ * Write any modified data cache blocks out to memory
+ * and invalidate the corresponding instruction cache blocks.
+ *
+ * flush_icache_range(unsigned long start, unsigned long stop)
+ *
+ * flush all bytes from start through stop-1 inclusive
+ */
+
+_KPROBE(__flush_icache_range)
+
+/*
+ * Flush the data cache to memory
+ *
+ * Different systems have different cache line sizes
+ * and in some cases i-cache and d-cache line sizes differ from
+ * each other.
+ */
+ ld r10,PPC64_CACHES@toc(r2)
+ lwz r7,DCACHEL1LINESIZE(r10)/* Get cache line size */
+ addi r5,r7,-1
+ andc r6,r3,r5 /* round low to line bdy */
+ subf r8,r6,r4 /* compute length */
+ add r8,r8,r5 /* ensure we get enough */
+ lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of cache line size */
+ srw. r8,r8,r9 /* compute line count */
+ beqlr /* nothing to do? */
+ mtctr r8
+1: dcbst 0,r6
+ add r6,r6,r7
+ bdnz 1b
+ sync
+
+/* Now invalidate the instruction cache */
+
+ lwz r7,ICACHEL1LINESIZE(r10) /* Get Icache line size */
+ addi r5,r7,-1
+ andc r6,r3,r5 /* round low to line bdy */
+ subf r8,r6,r4 /* compute length */
+ add r8,r8,r5
+ lwz r9,ICACHEL1LOGLINESIZE(r10) /* Get log-2 of Icache line size */
+ srw. r8,r8,r9 /* compute line count */
+ beqlr /* nothing to do? */
+ mtctr r8
+2: icbi 0,r6
+ add r6,r6,r7
+ bdnz 2b
+ isync
+ blr
+ .previous .text
+/*
+ * Like above, but only do the D-cache.
+ *
+ * flush_dcache_range(unsigned long start, unsigned long stop)
+ *
+ * flush all bytes from start to stop-1 inclusive
+ */
+_GLOBAL(flush_dcache_range)
+
+/*
+ * Flush the data cache to memory
+ *
+ * Different systems have different cache line sizes
+ */
+ ld r10,PPC64_CACHES@toc(r2)
+ lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */
+ addi r5,r7,-1
+ andc r6,r3,r5 /* round low to line bdy */
+ subf r8,r6,r4 /* compute length */
+ add r8,r8,r5 /* ensure we get enough */
+ lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of dcache line size */
+ srw. r8,r8,r9 /* compute line count */
+ beqlr /* nothing to do? */
+ mtctr r8
+0: dcbst 0,r6
+ add r6,r6,r7
+ bdnz 0b
+ sync
+ blr
+
+/*
+ * Like above, but works on non-mapped physical addresses.
+ * Use only for non-LPAR setups ! It also assumes real mode
+ * is cacheable. Used for flushing out the DART before using
+ * it as uncacheable memory
+ *
+ * flush_dcache_phys_range(unsigned long start, unsigned long stop)
+ *
+ * flush all bytes from start to stop-1 inclusive
+ */
+_GLOBAL(flush_dcache_phys_range)
+ ld r10,PPC64_CACHES@toc(r2)
+ lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */
+ addi r5,r7,-1
+ andc r6,r3,r5 /* round low to line bdy */
+ subf r8,r6,r4 /* compute length */
+ add r8,r8,r5 /* ensure we get enough */
+ lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of dcache line size */
+ srw. r8,r8,r9 /* compute line count */
+ beqlr /* nothing to do? */
+ mfmsr r5 /* Disable MMU Data Relocation */
+ ori r0,r5,MSR_DR
+ xori r0,r0,MSR_DR
+ sync
+ mtmsr r0
+ sync
+ isync
+ mtctr r8
+0: dcbst 0,r6
+ add r6,r6,r7
+ bdnz 0b
+ sync
+ isync
+ mtmsr r5 /* Re-enable MMU Data Relocation */
+ sync
+ isync
+ blr
+
+_GLOBAL(flush_inval_dcache_range)
+ ld r10,PPC64_CACHES@toc(r2)
+ lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */
+ addi r5,r7,-1
+ andc r6,r3,r5 /* round low to line bdy */
+ subf r8,r6,r4 /* compute length */
+ add r8,r8,r5 /* ensure we get enough */
+ lwz r9,DCACHEL1LOGLINESIZE(r10)/* Get log-2 of dcache line size */
+ srw. r8,r8,r9 /* compute line count */
+ beqlr /* nothing to do? */
+ sync
+ isync
+ mtctr r8
+0: dcbf 0,r6
+ add r6,r6,r7
+ bdnz 0b
+ sync
+ isync
+ blr
+
+
+/*
+ * Flush a particular page from the data cache to RAM.
+ * Note: this is necessary because the instruction cache does *not*
+ * snoop from the data cache.
+ *
+ * void __flush_dcache_icache(void *page)
+ */
+_GLOBAL(__flush_dcache_icache)
+/*
+ * Flush the data cache to memory
+ *
+ * Different systems have different cache line sizes
+ */
+
+/* Flush the dcache */
+ ld r7,PPC64_CACHES@toc(r2)
+ clrrdi r3,r3,PAGE_SHIFT /* Page align */
+ lwz r4,DCACHEL1LINESPERPAGE(r7) /* Get # dcache lines per page */
+ lwz r5,DCACHEL1LINESIZE(r7) /* Get dcache line size */
+ mr r6,r3
+ mtctr r4
+0: dcbst 0,r6
+ add r6,r6,r5
+ bdnz 0b
+ sync
+
+/* Now invalidate the icache */
+
+ lwz r4,ICACHEL1LINESPERPAGE(r7) /* Get # icache lines per page */
+ lwz r5,ICACHEL1LINESIZE(r7) /* Get icache line size */
+ mtctr r4
+1: icbi 0,r3
+ add r3,r3,r5
+ bdnz 1b
+ isync
+ blr
+
+
+#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE)
+/*
+ * Do an IO access in real mode
+ */
+_GLOBAL(real_readb)
+ mfmsr r7
+ ori r0,r7,MSR_DR
+ xori r0,r0,MSR_DR
+ sync
+ mtmsrd r0
+ sync
+ isync
+ mfspr r6,SPRN_HID4
+ rldicl r5,r6,32,0
+ ori r5,r5,0x100
+ rldicl r5,r5,32,0
+ sync
+ mtspr SPRN_HID4,r5
+ isync
+ slbia
+ isync
+ lbz r3,0(r3)
+ sync
+ mtspr SPRN_HID4,r6
+ isync
+ slbia
+ isync
+ mtmsrd r7
+ sync
+ isync
+ blr
+
+ /*
+ * Do an IO access in real mode
+ */
+_GLOBAL(real_writeb)
+ mfmsr r7
+ ori r0,r7,MSR_DR
+ xori r0,r0,MSR_DR
+ sync
+ mtmsrd r0
+ sync
+ isync
+ mfspr r6,SPRN_HID4
+ rldicl r5,r6,32,0
+ ori r5,r5,0x100
+ rldicl r5,r5,32,0
+ sync
+ mtspr SPRN_HID4,r5
+ isync
+ slbia
+ isync
+ stb r3,0(r4)
+ sync
+ mtspr SPRN_HID4,r6
+ isync
+ slbia
+ isync
+ mtmsrd r7
+ sync
+ isync
+ blr
+#endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) */
+
+#ifdef CONFIG_PPC_PASEMI
+
+/* No support in all binutils for these yet, so use defines */
+#define LBZCIX(RT,RA,RB) .long (0x7c0006aa|(RT<<21)|(RA<<16)|(RB << 11))
+#define STBCIX(RS,RA,RB) .long (0x7c0007aa|(RS<<21)|(RA<<16)|(RB << 11))
+
+
+_GLOBAL(real_205_readb)
+ mfmsr r7
+ ori r0,r7,MSR_DR
+ xori r0,r0,MSR_DR
+ sync
+ mtmsrd r0
+ sync
+ isync
+ LBZCIX(r3,0,r3)
+ isync
+ mtmsrd r7
+ sync
+ isync
+ blr
+
+_GLOBAL(real_205_writeb)
+ mfmsr r7
+ ori r0,r7,MSR_DR
+ xori r0,r0,MSR_DR
+ sync
+ mtmsrd r0
+ sync
+ isync
+ STBCIX(r3,0,r4)
+ isync
+ mtmsrd r7
+ sync
+ isync
+ blr
+
+#endif /* CONFIG_PPC_PASEMI */
+
+
+#if defined(CONFIG_CPU_FREQ_PMAC64) || defined(CONFIG_CPU_FREQ_MAPLE)
+/*
+ * SCOM access functions for 970 (FX only for now)
+ *
+ * unsigned long scom970_read(unsigned int address);
+ * void scom970_write(unsigned int address, unsigned long value);
+ *
+ * The address passed in is the 24 bits register address. This code
+ * is 970 specific and will not check the status bits, so you should
+ * know what you are doing.
+ */
+_GLOBAL(scom970_read)
+ /* interrupts off */
+ mfmsr r4
+ ori r0,r4,MSR_EE
+ xori r0,r0,MSR_EE
+ mtmsrd r0,1
+
+ /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits
+ * (including parity). On current CPUs they must be 0'd,
+ * and finally or in RW bit
+ */
+ rlwinm r3,r3,8,0,15
+ ori r3,r3,0x8000
+
+ /* do the actual scom read */
+ sync
+ mtspr SPRN_SCOMC,r3
+ isync
+ mfspr r3,SPRN_SCOMD
+ isync
+ mfspr r0,SPRN_SCOMC
+ isync
+
+ /* XXX: fixup result on some buggy 970's (ouch ! we lost a bit, bah
+ * that's the best we can do). Not implemented yet as we don't use
+ * the scom on any of the bogus CPUs yet, but may have to be done
+ * ultimately
+ */
+
+ /* restore interrupts */
+ mtmsrd r4,1
+ blr
+
+
+_GLOBAL(scom970_write)
+ /* interrupts off */
+ mfmsr r5
+ ori r0,r5,MSR_EE
+ xori r0,r0,MSR_EE
+ mtmsrd r0,1
+
+ /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits
+ * (including parity). On current CPUs they must be 0'd.
+ */
+
+ rlwinm r3,r3,8,0,15
+
+ sync
+ mtspr SPRN_SCOMD,r4 /* write data */
+ isync
+ mtspr SPRN_SCOMC,r3 /* write command */
+ isync
+ mfspr 3,SPRN_SCOMC
+ isync
+
+ /* restore interrupts */
+ mtmsrd r5,1
+ blr
+#endif /* CONFIG_CPU_FREQ_PMAC64 || CONFIG_CPU_FREQ_MAPLE */
+
+
+/*
+ * Create a kernel thread
+ * kernel_thread(fn, arg, flags)
+ */
+_GLOBAL(kernel_thread)
+ std r29,-24(r1)
+ std r30,-16(r1)
+ stdu r1,-STACK_FRAME_OVERHEAD(r1)
+ mr r29,r3
+ mr r30,r4
+ ori r3,r5,CLONE_VM /* flags */
+ oris r3,r3,(CLONE_UNTRACED>>16)
+ li r4,0 /* new sp (unused) */
+ li r0,__NR_clone
+ sc
+ bns+ 1f /* did system call indicate error? */
+ neg r3,r3 /* if so, make return code negative */
+1: cmpdi 0,r3,0 /* parent or child? */
+ bne 2f /* return if parent */
+ li r0,0
+ stdu r0,-STACK_FRAME_OVERHEAD(r1)
+ ld r2,8(r29)
+ ld r29,0(r29)
+ mtlr r29 /* fn addr in lr */
+ mr r3,r30 /* load arg and call fn */
+ blrl
+ li r0,__NR_exit /* exit after child exits */
+ li r3,0
+ sc
+2: addi r1,r1,STACK_FRAME_OVERHEAD
+ ld r29,-24(r1)
+ ld r30,-16(r1)
+ blr
+
+/*
+ * disable_kernel_fp()
+ * Disable the FPU.
+ */
+_GLOBAL(disable_kernel_fp)
+ mfmsr r3
+ rldicl r0,r3,(63-MSR_FP_LG),1
+ rldicl r3,r0,(MSR_FP_LG+1),0
+ mtmsrd r3 /* disable use of fpu now */
+ isync
+ blr
+
+/* kexec_wait(phys_cpu)
+ *
+ * wait for the flag to change, indicating this kernel is going away but
+ * the slave code for the next one is at addresses 0 to 100.
+ *
+ * This is used by all slaves, even those that did not find a matching
+ * paca in the secondary startup code.
+ *
+ * Physical (hardware) cpu id should be in r3.
+ */
+_GLOBAL(kexec_wait)
+ bl 1f
+1: mflr r5
+ addi r5,r5,kexec_flag-1b
+
+99: HMT_LOW
+#ifdef CONFIG_KEXEC /* use no memory without kexec */
+ lwz r4,0(r5)
+ cmpwi 0,r4,0
+ bnea 0x60
+#endif
+ b 99b
+
+/* this can be in text because we won't change it until we are
+ * running in real anyways
+ */
+kexec_flag:
+ .long 0
+
+
+#ifdef CONFIG_KEXEC
+
+/* kexec_smp_wait(void)
+ *
+ * call with interrupts off
+ * note: this is a terminal routine, it does not save lr
+ *
+ * get phys id from paca
+ * switch to real mode
+ * mark the paca as no longer used
+ * join other cpus in kexec_wait(phys_id)
+ */
+_GLOBAL(kexec_smp_wait)
+ lhz r3,PACAHWCPUID(r13)
+ bl real_mode
+
+ li r4,KEXEC_STATE_REAL_MODE
+ stb r4,PACAKEXECSTATE(r13)
+ SYNC
+
+ b .kexec_wait
+
+/*
+ * switch to real mode (turn mmu off)
+ * we use the early kernel trick that the hardware ignores bits
+ * 0 and 1 (big endian) of the effective address in real mode
+ *
+ * don't overwrite r3 here, it is live for kexec_wait above.
+ */
+real_mode: /* assume normal blr return */
+1: li r9,MSR_RI
+ li r10,MSR_DR|MSR_IR
+ mflr r11 /* return address to SRR0 */
+ mfmsr r12
+ andc r9,r12,r9
+ andc r10,r12,r10
+
+ mtmsrd r9,1
+ mtspr SPRN_SRR1,r10
+ mtspr SPRN_SRR0,r11
+ rfid
+
+
+/*
+ * kexec_sequence(newstack, start, image, control, clear_all())
+ *
+ * does the grungy work with stack switching and real mode switches
+ * also does simple calls to other code
+ */
+
+_GLOBAL(kexec_sequence)
+ mflr r0
+ std r0,16(r1)
+
+ /* switch stacks to newstack -- &kexec_stack.stack */
+ stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
+ mr r1,r3
+
+ li r0,0
+ std r0,16(r1)
+
+ /* save regs for local vars on new stack.
+ * yes, we won't go back, but ...
+ */
+ std r31,-8(r1)
+ std r30,-16(r1)
+ std r29,-24(r1)
+ std r28,-32(r1)
+ std r27,-40(r1)
+ std r26,-48(r1)
+ std r25,-56(r1)
+
+ stdu r1,-STACK_FRAME_OVERHEAD-64(r1)
+
+ /* save args into preserved regs */
+ mr r31,r3 /* newstack (both) */
+ mr r30,r4 /* start (real) */
+ mr r29,r5 /* image (virt) */
+ mr r28,r6 /* control, unused */
+ mr r27,r7 /* clear_all() fn desc */
+ mr r26,r8 /* spare */
+ lhz r25,PACAHWCPUID(r13) /* get our phys cpu from paca */
+
+ /* disable interrupts, we are overwriting kernel data next */
+ mfmsr r3
+ rlwinm r3,r3,0,17,15
+ mtmsrd r3,1
+
+ /* copy dest pages, flush whole dest image */
+ mr r3,r29
+ bl .kexec_copy_flush /* (image) */
+
+ /* turn off mmu */
+ bl real_mode
+
+ /* copy 0x100 bytes starting at start to 0 */
+ li r3,0
+ mr r4,r30 /* start, aka phys mem offset */
+ li r5,0x100
+ li r6,0
+ bl .copy_and_flush /* (dest, src, copy limit, start offset) */
+1: /* assume normal blr return */
+
+ /* release other cpus to the new kernel secondary start at 0x60 */
+ mflr r5
+ li r6,1
+ stw r6,kexec_flag-1b(5)
+
+ /* clear out hardware hash page table and tlb */
+ ld r5,0(r27) /* deref function descriptor */
+ mtctr r5
+ bctrl /* ppc_md.hpte_clear_all(void); */
+
+/*
+ * kexec image calling is:
+ * the first 0x100 bytes of the entry point are copied to 0
+ *
+ * all slaves branch to slave = 0x60 (absolute)
+ * slave(phys_cpu_id);
+ *
+ * master goes to start = entry point
+ * start(phys_cpu_id, start, 0);
+ *
+ *
+ * a wrapper is needed to call existing kernels, here is an approximate
+ * description of one method:
+ *
+ * v2: (2.6.10)
+ * start will be near the boot_block (maybe 0x100 bytes before it?)
+ * it will have a 0x60, which will b to boot_block, where it will wait
+ * and 0 will store phys into struct boot-block and load r3 from there,
+ * copy kernel 0-0x100 and tell slaves to back down to 0x60 again
+ *
+ * v1: (2.6.9)
+ * boot block will have all cpus scanning device tree to see if they
+ * are the boot cpu ?????
+ * other device tree differences (prop sizes, va vs pa, etc)...
+ */
+ mr r3,r25 # my phys cpu
+ mr r4,r30 # start, aka phys mem offset
+ mtlr 4
+ li r5,0
+ blr /* image->start(physid, image->start, 0); */
+#endif /* CONFIG_KEXEC */
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
new file mode 100644
index 00000000..2d275707
--- /dev/null
+++ b/arch/powerpc/kernel/module.c
@@ -0,0 +1,80 @@
+/* Kernel module help for powerpc.
+ Copyright (C) 2001, 2003 Rusty Russell IBM Corporation.
+ Copyright (C) 2008 Freescale Semiconductor, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#include <linux/elf.h>
+#include <linux/moduleloader.h>
+#include <linux/err.h>
+#include <linux/vmalloc.h>
+#include <linux/bug.h>
+#include <asm/module.h>
+#include <asm/uaccess.h>
+#include <asm/firmware.h>
+#include <linux/sort.h>
+
+#include "setup.h"
+
+LIST_HEAD(module_bug_list);
+
+static const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs,
+ const char *name)
+{
+ char *secstrings;
+ unsigned int i;
+
+ secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+ for (i = 1; i < hdr->e_shnum; i++)
+ if (strcmp(secstrings+sechdrs[i].sh_name, name) == 0)
+ return &sechdrs[i];
+ return NULL;
+}
+
+int module_finalize(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs, struct module *me)
+{
+ const Elf_Shdr *sect;
+
+ /* Apply feature fixups */
+ sect = find_section(hdr, sechdrs, "__ftr_fixup");
+ if (sect != NULL)
+ do_feature_fixups(cur_cpu_spec->cpu_features,
+ (void *)sect->sh_addr,
+ (void *)sect->sh_addr + sect->sh_size);
+
+ sect = find_section(hdr, sechdrs, "__mmu_ftr_fixup");
+ if (sect != NULL)
+ do_feature_fixups(cur_cpu_spec->mmu_features,
+ (void *)sect->sh_addr,
+ (void *)sect->sh_addr + sect->sh_size);
+
+#ifdef CONFIG_PPC64
+ sect = find_section(hdr, sechdrs, "__fw_ftr_fixup");
+ if (sect != NULL)
+ do_feature_fixups(powerpc_firmware_features,
+ (void *)sect->sh_addr,
+ (void *)sect->sh_addr + sect->sh_size);
+#endif
+
+ sect = find_section(hdr, sechdrs, "__lwsync_fixup");
+ if (sect != NULL)
+ do_lwsync_fixups(cur_cpu_spec->cpu_features,
+ (void *)sect->sh_addr,
+ (void *)sect->sh_addr + sect->sh_size);
+
+ return 0;
+}
diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c
new file mode 100644
index 00000000..2e3200ca
--- /dev/null
+++ b/arch/powerpc/kernel/module_32.c
@@ -0,0 +1,308 @@
+/* Kernel module help for PPC.
+ Copyright (C) 2001 Rusty Russell.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#include <linux/module.h>
+#include <linux/moduleloader.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ftrace.h>
+#include <linux/cache.h>
+#include <linux/bug.h>
+#include <linux/sort.h>
+
+#include "setup.h"
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(fmt , ...)
+#endif
+
+/* Count how many different relocations (different symbol, different
+ addend) */
+static unsigned int count_relocs(const Elf32_Rela *rela, unsigned int num)
+{
+ unsigned int i, r_info, r_addend, _count_relocs;
+
+ _count_relocs = 0;
+ r_info = 0;
+ r_addend = 0;
+ for (i = 0; i < num; i++)
+ /* Only count 24-bit relocs, others don't need stubs */
+ if (ELF32_R_TYPE(rela[i].r_info) == R_PPC_REL24 &&
+ (r_info != ELF32_R_SYM(rela[i].r_info) ||
+ r_addend != rela[i].r_addend)) {
+ _count_relocs++;
+ r_info = ELF32_R_SYM(rela[i].r_info);
+ r_addend = rela[i].r_addend;
+ }
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+ _count_relocs++; /* add one for ftrace_caller */
+#endif
+ return _count_relocs;
+}
+
+static int relacmp(const void *_x, const void *_y)
+{
+ const Elf32_Rela *x, *y;
+
+ y = (Elf32_Rela *)_x;
+ x = (Elf32_Rela *)_y;
+
+ /* Compare the entire r_info (as opposed to ELF32_R_SYM(r_info) only) to
+ * make the comparison cheaper/faster. It won't affect the sorting or
+ * the counting algorithms' performance
+ */
+ if (x->r_info < y->r_info)
+ return -1;
+ else if (x->r_info > y->r_info)
+ return 1;
+ else if (x->r_addend < y->r_addend)
+ return -1;
+ else if (x->r_addend > y->r_addend)
+ return 1;
+ else
+ return 0;
+}
+
+static void relaswap(void *_x, void *_y, int size)
+{
+ uint32_t *x, *y, tmp;
+ int i;
+
+ y = (uint32_t *)_x;
+ x = (uint32_t *)_y;
+
+ for (i = 0; i < sizeof(Elf32_Rela) / sizeof(uint32_t); i++) {
+ tmp = x[i];
+ x[i] = y[i];
+ y[i] = tmp;
+ }
+}
+
+/* Get the potential trampolines size required of the init and
+ non-init sections */
+static unsigned long get_plt_size(const Elf32_Ehdr *hdr,
+ const Elf32_Shdr *sechdrs,
+ const char *secstrings,
+ int is_init)
+{
+ unsigned long ret = 0;
+ unsigned i;
+
+ /* Everything marked ALLOC (this includes the exported
+ symbols) */
+ for (i = 1; i < hdr->e_shnum; i++) {
+ /* If it's called *.init*, and we're not init, we're
+ not interested */
+ if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != 0)
+ != is_init)
+ continue;
+
+ /* We don't want to look at debug sections. */
+ if (strstr(secstrings + sechdrs[i].sh_name, ".debug") != 0)
+ continue;
+
+ if (sechdrs[i].sh_type == SHT_RELA) {
+ DEBUGP("Found relocations in section %u\n", i);
+ DEBUGP("Ptr: %p. Number: %u\n",
+ (void *)hdr + sechdrs[i].sh_offset,
+ sechdrs[i].sh_size / sizeof(Elf32_Rela));
+
+ /* Sort the relocation information based on a symbol and
+ * addend key. This is a stable O(n*log n) complexity
+ * alogrithm but it will reduce the complexity of
+ * count_relocs() to linear complexity O(n)
+ */
+ sort((void *)hdr + sechdrs[i].sh_offset,
+ sechdrs[i].sh_size / sizeof(Elf32_Rela),
+ sizeof(Elf32_Rela), relacmp, relaswap);
+
+ ret += count_relocs((void *)hdr
+ + sechdrs[i].sh_offset,
+ sechdrs[i].sh_size
+ / sizeof(Elf32_Rela))
+ * sizeof(struct ppc_plt_entry);
+ }
+ }
+
+ return ret;
+}
+
+int module_frob_arch_sections(Elf32_Ehdr *hdr,
+ Elf32_Shdr *sechdrs,
+ char *secstrings,
+ struct module *me)
+{
+ unsigned int i;
+
+ /* Find .plt and .init.plt sections */
+ for (i = 0; i < hdr->e_shnum; i++) {
+ if (strcmp(secstrings + sechdrs[i].sh_name, ".init.plt") == 0)
+ me->arch.init_plt_section = i;
+ else if (strcmp(secstrings + sechdrs[i].sh_name, ".plt") == 0)
+ me->arch.core_plt_section = i;
+ }
+ if (!me->arch.core_plt_section || !me->arch.init_plt_section) {
+ printk("Module doesn't contain .plt or .init.plt sections.\n");
+ return -ENOEXEC;
+ }
+
+ /* Override their sizes */
+ sechdrs[me->arch.core_plt_section].sh_size
+ = get_plt_size(hdr, sechdrs, secstrings, 0);
+ sechdrs[me->arch.init_plt_section].sh_size
+ = get_plt_size(hdr, sechdrs, secstrings, 1);
+ return 0;
+}
+
+static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val)
+{
+ if (entry->jump[0] == 0x3d800000 + ((val + 0x8000) >> 16)
+ && entry->jump[1] == 0x398c0000 + (val & 0xffff))
+ return 1;
+ return 0;
+}
+
+/* Set up a trampoline in the PLT to bounce us to the distant function */
+static uint32_t do_plt_call(void *location,
+ Elf32_Addr val,
+ Elf32_Shdr *sechdrs,
+ struct module *mod)
+{
+ struct ppc_plt_entry *entry;
+
+ DEBUGP("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location);
+ /* Init, or core PLT? */
+ if (location >= mod->module_core
+ && location < mod->module_core + mod->core_size)
+ entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr;
+ else
+ entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr;
+
+ /* Find this entry, or if that fails, the next avail. entry */
+ while (entry->jump[0]) {
+ if (entry_matches(entry, val)) return (uint32_t)entry;
+ entry++;
+ }
+
+ entry->jump[0] = 0x3d800000+((val+0x8000)>>16); /* lis r12,sym@ha */
+ entry->jump[1] = 0x398c0000 + (val&0xffff); /* addi r12,r12,sym@l*/
+ entry->jump[2] = 0x7d8903a6; /* mtctr r12 */
+ entry->jump[3] = 0x4e800420; /* bctr */
+
+ DEBUGP("Initialized plt for 0x%x at %p\n", val, entry);
+ return (uint32_t)entry;
+}
+
+int apply_relocate_add(Elf32_Shdr *sechdrs,
+ const char *strtab,
+ unsigned int symindex,
+ unsigned int relsec,
+ struct module *module)
+{
+ unsigned int i;
+ Elf32_Rela *rela = (void *)sechdrs[relsec].sh_addr;
+ Elf32_Sym *sym;
+ uint32_t *location;
+ uint32_t value;
+
+ DEBUGP("Applying ADD relocate section %u to %u\n", relsec,
+ sechdrs[relsec].sh_info);
+ for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) {
+ /* This is where to make the change */
+ location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+ + rela[i].r_offset;
+ /* This is the symbol it is referring to. Note that all
+ undefined symbols have been resolved. */
+ sym = (Elf32_Sym *)sechdrs[symindex].sh_addr
+ + ELF32_R_SYM(rela[i].r_info);
+ /* `Everything is relative'. */
+ value = sym->st_value + rela[i].r_addend;
+
+ switch (ELF32_R_TYPE(rela[i].r_info)) {
+ case R_PPC_ADDR32:
+ /* Simply set it */
+ *(uint32_t *)location = value;
+ break;
+
+ case R_PPC_ADDR16_LO:
+ /* Low half of the symbol */
+ *(uint16_t *)location = value;
+ break;
+
+ case R_PPC_ADDR16_HI:
+ /* Higher half of the symbol */
+ *(uint16_t *)location = (value >> 16);
+ break;
+
+ case R_PPC_ADDR16_HA:
+ /* Sign-adjusted lower 16 bits: PPC ELF ABI says:
+ (((x >> 16) + ((x & 0x8000) ? 1 : 0))) & 0xFFFF.
+ This is the same, only sane.
+ */
+ *(uint16_t *)location = (value + 0x8000) >> 16;
+ break;
+
+ case R_PPC_REL24:
+ if ((int)(value - (uint32_t)location) < -0x02000000
+ || (int)(value - (uint32_t)location) >= 0x02000000)
+ value = do_plt_call(location, value,
+ sechdrs, module);
+
+ /* Only replace bits 2 through 26 */
+ DEBUGP("REL24 value = %08X. location = %08X\n",
+ value, (uint32_t)location);
+ DEBUGP("Location before: %08X.\n",
+ *(uint32_t *)location);
+ *(uint32_t *)location
+ = (*(uint32_t *)location & ~0x03fffffc)
+ | ((value - (uint32_t)location)
+ & 0x03fffffc);
+ DEBUGP("Location after: %08X.\n",
+ *(uint32_t *)location);
+ DEBUGP("ie. jump to %08X+%08X = %08X\n",
+ *(uint32_t *)location & 0x03fffffc,
+ (uint32_t)location,
+ (*(uint32_t *)location & 0x03fffffc)
+ + (uint32_t)location);
+ break;
+
+ case R_PPC_REL32:
+ /* 32-bit relative jump. */
+ *(uint32_t *)location = value - (uint32_t)location;
+ break;
+
+ default:
+ printk("%s: unknown ADD relocation: %u\n",
+ module->name,
+ ELF32_R_TYPE(rela[i].r_info));
+ return -ENOEXEC;
+ }
+ }
+#ifdef CONFIG_DYNAMIC_FTRACE
+ module->arch.tramp =
+ do_plt_call(module->module_core,
+ (unsigned long)ftrace_caller,
+ sechdrs, module);
+#endif
+ return 0;
+}
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
new file mode 100644
index 00000000..9f44a775
--- /dev/null
+++ b/arch/powerpc/kernel/module_64.c
@@ -0,0 +1,448 @@
+/* Kernel module help for PPC64.
+ Copyright (C) 2001, 2003 Rusty Russell IBM Corporation.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#include <linux/module.h>
+#include <linux/elf.h>
+#include <linux/moduleloader.h>
+#include <linux/err.h>
+#include <linux/vmalloc.h>
+#include <linux/ftrace.h>
+#include <linux/bug.h>
+#include <asm/module.h>
+#include <asm/firmware.h>
+#include <asm/code-patching.h>
+#include <linux/sort.h>
+
+#include "setup.h"
+
+/* FIXME: We don't do .init separately. To do this, we'd need to have
+ a separate r2 value in the init and core section, and stub between
+ them, too.
+
+ Using a magic allocator which places modules within 32MB solves
+ this, and makes other things simpler. Anton?
+ --RR. */
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(fmt , ...)
+#endif
+
+/* Like PPC32, we need little trampolines to do > 24-bit jumps (into
+ the kernel itself). But on PPC64, these need to be used for every
+ jump, actually, to reset r2 (TOC+0x8000). */
+struct ppc64_stub_entry
+{
+ /* 28 byte jump instruction sequence (7 instructions) */
+ unsigned char jump[28];
+ unsigned char unused[4];
+ /* Data for the above code */
+ struct ppc64_opd_entry opd;
+};
+
+/* We use a stub to fix up r2 (TOC ptr) and to jump to the (external)
+ function which may be more than 24-bits away. We could simply
+ patch the new r2 value and function pointer into the stub, but it's
+ significantly shorter to put these values at the end of the stub
+ code, and patch the stub address (32-bits relative to the TOC ptr,
+ r2) into the stub. */
+static struct ppc64_stub_entry ppc64_stub =
+{ .jump = {
+ 0x3d, 0x82, 0x00, 0x00, /* addis r12,r2, <high> */
+ 0x39, 0x8c, 0x00, 0x00, /* addi r12,r12, <low> */
+ /* Save current r2 value in magic place on the stack. */
+ 0xf8, 0x41, 0x00, 0x28, /* std r2,40(r1) */
+ 0xe9, 0x6c, 0x00, 0x20, /* ld r11,32(r12) */
+ 0xe8, 0x4c, 0x00, 0x28, /* ld r2,40(r12) */
+ 0x7d, 0x69, 0x03, 0xa6, /* mtctr r11 */
+ 0x4e, 0x80, 0x04, 0x20 /* bctr */
+} };
+
+/* Count how many different 24-bit relocations (different symbol,
+ different addend) */
+static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num)
+{
+ unsigned int i, r_info, r_addend, _count_relocs;
+
+ /* FIXME: Only count external ones --RR */
+ _count_relocs = 0;
+ r_info = 0;
+ r_addend = 0;
+ for (i = 0; i < num; i++)
+ /* Only count 24-bit relocs, others don't need stubs */
+ if (ELF64_R_TYPE(rela[i].r_info) == R_PPC_REL24 &&
+ (r_info != ELF64_R_SYM(rela[i].r_info) ||
+ r_addend != rela[i].r_addend)) {
+ _count_relocs++;
+ r_info = ELF64_R_SYM(rela[i].r_info);
+ r_addend = rela[i].r_addend;
+ }
+
+ return _count_relocs;
+}
+
+static int relacmp(const void *_x, const void *_y)
+{
+ const Elf64_Rela *x, *y;
+
+ y = (Elf64_Rela *)_x;
+ x = (Elf64_Rela *)_y;
+
+ /* Compare the entire r_info (as opposed to ELF64_R_SYM(r_info) only) to
+ * make the comparison cheaper/faster. It won't affect the sorting or
+ * the counting algorithms' performance
+ */
+ if (x->r_info < y->r_info)
+ return -1;
+ else if (x->r_info > y->r_info)
+ return 1;
+ else if (x->r_addend < y->r_addend)
+ return -1;
+ else if (x->r_addend > y->r_addend)
+ return 1;
+ else
+ return 0;
+}
+
+static void relaswap(void *_x, void *_y, int size)
+{
+ uint64_t *x, *y, tmp;
+ int i;
+
+ y = (uint64_t *)_x;
+ x = (uint64_t *)_y;
+
+ for (i = 0; i < sizeof(Elf64_Rela) / sizeof(uint64_t); i++) {
+ tmp = x[i];
+ x[i] = y[i];
+ y[i] = tmp;
+ }
+}
+
+/* Get size of potential trampolines required. */
+static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
+ const Elf64_Shdr *sechdrs)
+{
+ /* One extra reloc so it's always 0-funcaddr terminated */
+ unsigned long relocs = 1;
+ unsigned i;
+
+ /* Every relocated section... */
+ for (i = 1; i < hdr->e_shnum; i++) {
+ if (sechdrs[i].sh_type == SHT_RELA) {
+ DEBUGP("Found relocations in section %u\n", i);
+ DEBUGP("Ptr: %p. Number: %lu\n",
+ (void *)sechdrs[i].sh_addr,
+ sechdrs[i].sh_size / sizeof(Elf64_Rela));
+
+ /* Sort the relocation information based on a symbol and
+ * addend key. This is a stable O(n*log n) complexity
+ * alogrithm but it will reduce the complexity of
+ * count_relocs() to linear complexity O(n)
+ */
+ sort((void *)sechdrs[i].sh_addr,
+ sechdrs[i].sh_size / sizeof(Elf64_Rela),
+ sizeof(Elf64_Rela), relacmp, relaswap);
+
+ relocs += count_relocs((void *)sechdrs[i].sh_addr,
+ sechdrs[i].sh_size
+ / sizeof(Elf64_Rela));
+ }
+ }
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+ /* make the trampoline to the ftrace_caller */
+ relocs++;
+#endif
+
+ DEBUGP("Looks like a total of %lu stubs, max\n", relocs);
+ return relocs * sizeof(struct ppc64_stub_entry);
+}
+
+static void dedotify_versions(struct modversion_info *vers,
+ unsigned long size)
+{
+ struct modversion_info *end;
+
+ for (end = (void *)vers + size; vers < end; vers++)
+ if (vers->name[0] == '.')
+ memmove(vers->name, vers->name+1, strlen(vers->name));
+}
+
+/* Undefined symbols which refer to .funcname, hack to funcname */
+static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
+{
+ unsigned int i;
+
+ for (i = 1; i < numsyms; i++) {
+ if (syms[i].st_shndx == SHN_UNDEF) {
+ char *name = strtab + syms[i].st_name;
+ if (name[0] == '.')
+ memmove(name, name+1, strlen(name));
+ }
+ }
+}
+
+int module_frob_arch_sections(Elf64_Ehdr *hdr,
+ Elf64_Shdr *sechdrs,
+ char *secstrings,
+ struct module *me)
+{
+ unsigned int i;
+
+ /* Find .toc and .stubs sections, symtab and strtab */
+ for (i = 1; i < hdr->e_shnum; i++) {
+ char *p;
+ if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0)
+ me->arch.stubs_section = i;
+ else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0)
+ me->arch.toc_section = i;
+ else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0)
+ dedotify_versions((void *)hdr + sechdrs[i].sh_offset,
+ sechdrs[i].sh_size);
+
+ /* We don't handle .init for the moment: rename to _init */
+ while ((p = strstr(secstrings + sechdrs[i].sh_name, ".init")))
+ p[0] = '_';
+
+ if (sechdrs[i].sh_type == SHT_SYMTAB)
+ dedotify((void *)hdr + sechdrs[i].sh_offset,
+ sechdrs[i].sh_size / sizeof(Elf64_Sym),
+ (void *)hdr
+ + sechdrs[sechdrs[i].sh_link].sh_offset);
+ }
+
+ if (!me->arch.stubs_section) {
+ printk("%s: doesn't contain .stubs.\n", me->name);
+ return -ENOEXEC;
+ }
+
+ /* If we don't have a .toc, just use .stubs. We need to set r2
+ to some reasonable value in case the module calls out to
+ other functions via a stub, or if a function pointer escapes
+ the module by some means. */
+ if (!me->arch.toc_section)
+ me->arch.toc_section = me->arch.stubs_section;
+
+ /* Override the stubs size */
+ sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs);
+ return 0;
+}
+
+/* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this
+ gives the value maximum span in an instruction which uses a signed
+ offset) */
+static inline unsigned long my_r2(Elf64_Shdr *sechdrs, struct module *me)
+{
+ return sechdrs[me->arch.toc_section].sh_addr + 0x8000;
+}
+
+/* Both low and high 16 bits are added as SIGNED additions, so if low
+ 16 bits has high bit set, high 16 bits must be adjusted. These
+ macros do that (stolen from binutils). */
+#define PPC_LO(v) ((v) & 0xffff)
+#define PPC_HI(v) (((v) >> 16) & 0xffff)
+#define PPC_HA(v) PPC_HI ((v) + 0x8000)
+
+/* Patch stub to reference function and correct r2 value. */
+static inline int create_stub(Elf64_Shdr *sechdrs,
+ struct ppc64_stub_entry *entry,
+ struct ppc64_opd_entry *opd,
+ struct module *me)
+{
+ Elf64_Half *loc1, *loc2;
+ long reladdr;
+
+ *entry = ppc64_stub;
+
+ loc1 = (Elf64_Half *)&entry->jump[2];
+ loc2 = (Elf64_Half *)&entry->jump[6];
+
+ /* Stub uses address relative to r2. */
+ reladdr = (unsigned long)entry - my_r2(sechdrs, me);
+ if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
+ printk("%s: Address %p of stub out of range of %p.\n",
+ me->name, (void *)reladdr, (void *)my_r2);
+ return 0;
+ }
+ DEBUGP("Stub %p get data from reladdr %li\n", entry, reladdr);
+
+ *loc1 = PPC_HA(reladdr);
+ *loc2 = PPC_LO(reladdr);
+ entry->opd.funcaddr = opd->funcaddr;
+ entry->opd.r2 = opd->r2;
+ return 1;
+}
+
+/* Create stub to jump to function described in this OPD: we need the
+ stub to set up the TOC ptr (r2) for the function. */
+static unsigned long stub_for_addr(Elf64_Shdr *sechdrs,
+ unsigned long opdaddr,
+ struct module *me)
+{
+ struct ppc64_stub_entry *stubs;
+ struct ppc64_opd_entry *opd = (void *)opdaddr;
+ unsigned int i, num_stubs;
+
+ num_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*stubs);
+
+ /* Find this stub, or if that fails, the next avail. entry */
+ stubs = (void *)sechdrs[me->arch.stubs_section].sh_addr;
+ for (i = 0; stubs[i].opd.funcaddr; i++) {
+ BUG_ON(i >= num_stubs);
+
+ if (stubs[i].opd.funcaddr == opd->funcaddr)
+ return (unsigned long)&stubs[i];
+ }
+
+ if (!create_stub(sechdrs, &stubs[i], opd, me))
+ return 0;
+
+ return (unsigned long)&stubs[i];
+}
+
+/* We expect a noop next: if it is, replace it with instruction to
+ restore r2. */
+static int restore_r2(u32 *instruction, struct module *me)
+{
+ if (*instruction != PPC_INST_NOP) {
+ printk("%s: Expect noop after relocate, got %08x\n",
+ me->name, *instruction);
+ return 0;
+ }
+ *instruction = 0xe8410028; /* ld r2,40(r1) */
+ return 1;
+}
+
+int apply_relocate_add(Elf64_Shdr *sechdrs,
+ const char *strtab,
+ unsigned int symindex,
+ unsigned int relsec,
+ struct module *me)
+{
+ unsigned int i;
+ Elf64_Rela *rela = (void *)sechdrs[relsec].sh_addr;
+ Elf64_Sym *sym;
+ unsigned long *location;
+ unsigned long value;
+
+ DEBUGP("Applying ADD relocate section %u to %u\n", relsec,
+ sechdrs[relsec].sh_info);
+ for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) {
+ /* This is where to make the change */
+ location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+ + rela[i].r_offset;
+ /* This is the symbol it is referring to */
+ sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
+ + ELF64_R_SYM(rela[i].r_info);
+
+ DEBUGP("RELOC at %p: %li-type as %s (%lu) + %li\n",
+ location, (long)ELF64_R_TYPE(rela[i].r_info),
+ strtab + sym->st_name, (unsigned long)sym->st_value,
+ (long)rela[i].r_addend);
+
+ /* `Everything is relative'. */
+ value = sym->st_value + rela[i].r_addend;
+
+ switch (ELF64_R_TYPE(rela[i].r_info)) {
+ case R_PPC64_ADDR32:
+ /* Simply set it */
+ *(u32 *)location = value;
+ break;
+
+ case R_PPC64_ADDR64:
+ /* Simply set it */
+ *(unsigned long *)location = value;
+ break;
+
+ case R_PPC64_TOC:
+ *(unsigned long *)location = my_r2(sechdrs, me);
+ break;
+
+ case R_PPC64_TOC16:
+ /* Subtract TOC pointer */
+ value -= my_r2(sechdrs, me);
+ if (value + 0x8000 > 0xffff) {
+ printk("%s: bad TOC16 relocation (%lu)\n",
+ me->name, value);
+ return -ENOEXEC;
+ }
+ *((uint16_t *) location)
+ = (*((uint16_t *) location) & ~0xffff)
+ | (value & 0xffff);
+ break;
+
+ case R_PPC64_TOC16_DS:
+ /* Subtract TOC pointer */
+ value -= my_r2(sechdrs, me);
+ if ((value & 3) != 0 || value + 0x8000 > 0xffff) {
+ printk("%s: bad TOC16_DS relocation (%lu)\n",
+ me->name, value);
+ return -ENOEXEC;
+ }
+ *((uint16_t *) location)
+ = (*((uint16_t *) location) & ~0xfffc)
+ | (value & 0xfffc);
+ break;
+
+ case R_PPC_REL24:
+ /* FIXME: Handle weak symbols here --RR */
+ if (sym->st_shndx == SHN_UNDEF) {
+ /* External: go via stub */
+ value = stub_for_addr(sechdrs, value, me);
+ if (!value)
+ return -ENOENT;
+ if (!restore_r2((u32 *)location + 1, me))
+ return -ENOEXEC;
+ }
+
+ /* Convert value to relative */
+ value -= (unsigned long)location;
+ if (value + 0x2000000 > 0x3ffffff || (value & 3) != 0){
+ printk("%s: REL24 %li out of range!\n",
+ me->name, (long int)value);
+ return -ENOEXEC;
+ }
+
+ /* Only replace bits 2 through 26 */
+ *(uint32_t *)location
+ = (*(uint32_t *)location & ~0x03fffffc)
+ | (value & 0x03fffffc);
+ break;
+
+ case R_PPC64_REL64:
+ /* 64 bits relative (used by features fixups) */
+ *location = value - (unsigned long)location;
+ break;
+
+ default:
+ printk("%s: Unknown ADD relocation: %lu\n",
+ me->name,
+ (unsigned long)ELF64_R_TYPE(rela[i].r_info));
+ return -ENOEXEC;
+ }
+ }
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+ me->arch.toc = my_r2(sechdrs, me);
+ me->arch.tramp = stub_for_addr(sechdrs,
+ (unsigned long)ftrace_caller,
+ me);
+#endif
+
+ return 0;
+}
diff --git a/arch/powerpc/kernel/msi.c b/arch/powerpc/kernel/msi.c
new file mode 100644
index 00000000..8bbc12d2
--- /dev/null
+++ b/arch/powerpc/kernel/msi.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2006-2007, Michael Ellerman, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/msi.h>
+#include <linux/pci.h>
+
+#include <asm/machdep.h>
+
+int arch_msi_check_device(struct pci_dev* dev, int nvec, int type)
+{
+ if (!ppc_md.setup_msi_irqs || !ppc_md.teardown_msi_irqs) {
+ pr_debug("msi: Platform doesn't provide MSI callbacks.\n");
+ return -ENOSYS;
+ }
+
+ /* PowerPC doesn't support multiple MSI yet */
+ if (type == PCI_CAP_ID_MSI && nvec > 1)
+ return 1;
+
+ if (ppc_md.msi_check_device) {
+ pr_debug("msi: Using platform check routine.\n");
+ return ppc_md.msi_check_device(dev, nvec, type);
+ }
+
+ return 0;
+}
+
+int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+ return ppc_md.setup_msi_irqs(dev, nvec, type);
+}
+
+void arch_teardown_msi_irqs(struct pci_dev *dev)
+{
+ ppc_md.teardown_msi_irqs(dev);
+}
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
new file mode 100644
index 00000000..bec1e930
--- /dev/null
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -0,0 +1,564 @@
+/*
+ * c 2001 PPC 64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * /dev/nvram driver for PPC64
+ *
+ * This perhaps should live in drivers/char
+ *
+ * TODO: Split the /dev/nvram part (that one can use
+ * drivers/char/generic_nvram.c) from the arch & partition
+ * parsing code.
+ */
+
+#include <linux/module.h>
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/fcntl.h>
+#include <linux/nvram.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <asm/uaccess.h>
+#include <asm/nvram.h>
+#include <asm/rtas.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+
+#undef DEBUG_NVRAM
+
+#define NVRAM_HEADER_LEN sizeof(struct nvram_header)
+#define NVRAM_BLOCK_LEN NVRAM_HEADER_LEN
+
+/* If change this size, then change the size of NVNAME_LEN */
+struct nvram_header {
+ unsigned char signature;
+ unsigned char checksum;
+ unsigned short length;
+ /* Terminating null required only for names < 12 chars. */
+ char name[12];
+};
+
+struct nvram_partition {
+ struct list_head partition;
+ struct nvram_header header;
+ unsigned int index;
+};
+
+static LIST_HEAD(nvram_partitions);
+
+static loff_t dev_nvram_llseek(struct file *file, loff_t offset, int origin)
+{
+ int size;
+
+ if (ppc_md.nvram_size == NULL)
+ return -ENODEV;
+ size = ppc_md.nvram_size();
+
+ switch (origin) {
+ case 1:
+ offset += file->f_pos;
+ break;
+ case 2:
+ offset += size;
+ break;
+ }
+ if (offset < 0)
+ return -EINVAL;
+ file->f_pos = offset;
+ return file->f_pos;
+}
+
+
+static ssize_t dev_nvram_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ ssize_t ret;
+ char *tmp = NULL;
+ ssize_t size;
+
+ ret = -ENODEV;
+ if (!ppc_md.nvram_size)
+ goto out;
+
+ ret = 0;
+ size = ppc_md.nvram_size();
+ if (*ppos >= size || size < 0)
+ goto out;
+
+ count = min_t(size_t, count, size - *ppos);
+ count = min(count, PAGE_SIZE);
+
+ ret = -ENOMEM;
+ tmp = kmalloc(count, GFP_KERNEL);
+ if (!tmp)
+ goto out;
+
+ ret = ppc_md.nvram_read(tmp, count, ppos);
+ if (ret <= 0)
+ goto out;
+
+ if (copy_to_user(buf, tmp, ret))
+ ret = -EFAULT;
+
+out:
+ kfree(tmp);
+ return ret;
+
+}
+
+static ssize_t dev_nvram_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ ssize_t ret;
+ char *tmp = NULL;
+ ssize_t size;
+
+ ret = -ENODEV;
+ if (!ppc_md.nvram_size)
+ goto out;
+
+ ret = 0;
+ size = ppc_md.nvram_size();
+ if (*ppos >= size || size < 0)
+ goto out;
+
+ count = min_t(size_t, count, size - *ppos);
+ count = min(count, PAGE_SIZE);
+
+ ret = -ENOMEM;
+ tmp = kmalloc(count, GFP_KERNEL);
+ if (!tmp)
+ goto out;
+
+ ret = -EFAULT;
+ if (copy_from_user(tmp, buf, count))
+ goto out;
+
+ ret = ppc_md.nvram_write(tmp, count, ppos);
+
+out:
+ kfree(tmp);
+ return ret;
+
+}
+
+static long dev_nvram_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ switch(cmd) {
+#ifdef CONFIG_PPC_PMAC
+ case OBSOLETE_PMAC_NVRAM_GET_OFFSET:
+ printk(KERN_WARNING "nvram: Using obsolete PMAC_NVRAM_GET_OFFSET ioctl\n");
+ case IOC_NVRAM_GET_OFFSET: {
+ int part, offset;
+
+ if (!machine_is(powermac))
+ return -EINVAL;
+ if (copy_from_user(&part, (void __user*)arg, sizeof(part)) != 0)
+ return -EFAULT;
+ if (part < pmac_nvram_OF || part > pmac_nvram_NR)
+ return -EINVAL;
+ offset = pmac_get_partition(part);
+ if (offset < 0)
+ return offset;
+ if (copy_to_user((void __user*)arg, &offset, sizeof(offset)) != 0)
+ return -EFAULT;
+ return 0;
+ }
+#endif /* CONFIG_PPC_PMAC */
+ default:
+ return -EINVAL;
+ }
+}
+
+const struct file_operations nvram_fops = {
+ .owner = THIS_MODULE,
+ .llseek = dev_nvram_llseek,
+ .read = dev_nvram_read,
+ .write = dev_nvram_write,
+ .unlocked_ioctl = dev_nvram_ioctl,
+};
+
+static struct miscdevice nvram_dev = {
+ NVRAM_MINOR,
+ "nvram",
+ &nvram_fops
+};
+
+
+#ifdef DEBUG_NVRAM
+static void __init nvram_print_partitions(char * label)
+{
+ struct nvram_partition * tmp_part;
+
+ printk(KERN_WARNING "--------%s---------\n", label);
+ printk(KERN_WARNING "indx\t\tsig\tchks\tlen\tname\n");
+ list_for_each_entry(tmp_part, &nvram_partitions, partition) {
+ printk(KERN_WARNING "%4d \t%02x\t%02x\t%d\t%12s\n",
+ tmp_part->index, tmp_part->header.signature,
+ tmp_part->header.checksum, tmp_part->header.length,
+ tmp_part->header.name);
+ }
+}
+#endif
+
+
+static int __init nvram_write_header(struct nvram_partition * part)
+{
+ loff_t tmp_index;
+ int rc;
+
+ tmp_index = part->index;
+ rc = ppc_md.nvram_write((char *)&part->header, NVRAM_HEADER_LEN, &tmp_index);
+
+ return rc;
+}
+
+
+static unsigned char __init nvram_checksum(struct nvram_header *p)
+{
+ unsigned int c_sum, c_sum2;
+ unsigned short *sp = (unsigned short *)p->name; /* assume 6 shorts */
+ c_sum = p->signature + p->length + sp[0] + sp[1] + sp[2] + sp[3] + sp[4] + sp[5];
+
+ /* The sum may have spilled into the 3rd byte. Fold it back. */
+ c_sum = ((c_sum & 0xffff) + (c_sum >> 16)) & 0xffff;
+ /* The sum cannot exceed 2 bytes. Fold it into a checksum */
+ c_sum2 = (c_sum >> 8) + (c_sum << 8);
+ c_sum = ((c_sum + c_sum2) >> 8) & 0xff;
+ return c_sum;
+}
+
+/*
+ * Per the criteria passed via nvram_remove_partition(), should this
+ * partition be removed? 1=remove, 0=keep
+ */
+static int nvram_can_remove_partition(struct nvram_partition *part,
+ const char *name, int sig, const char *exceptions[])
+{
+ if (part->header.signature != sig)
+ return 0;
+ if (name) {
+ if (strncmp(name, part->header.name, 12))
+ return 0;
+ } else if (exceptions) {
+ const char **except;
+ for (except = exceptions; *except; except++) {
+ if (!strncmp(*except, part->header.name, 12))
+ return 0;
+ }
+ }
+ return 1;
+}
+
+/**
+ * nvram_remove_partition - Remove one or more partitions in nvram
+ * @name: name of the partition to remove, or NULL for a
+ * signature only match
+ * @sig: signature of the partition(s) to remove
+ * @exceptions: When removing all partitions with a matching signature,
+ * leave these alone.
+ */
+
+int __init nvram_remove_partition(const char *name, int sig,
+ const char *exceptions[])
+{
+ struct nvram_partition *part, *prev, *tmp;
+ int rc;
+
+ list_for_each_entry(part, &nvram_partitions, partition) {
+ if (!nvram_can_remove_partition(part, name, sig, exceptions))
+ continue;
+
+ /* Make partition a free partition */
+ part->header.signature = NVRAM_SIG_FREE;
+ strncpy(part->header.name, "wwwwwwwwwwww", 12);
+ part->header.checksum = nvram_checksum(&part->header);
+ rc = nvram_write_header(part);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_remove_partition: nvram_write failed (%d)\n", rc);
+ return rc;
+ }
+ }
+
+ /* Merge contiguous ones */
+ prev = NULL;
+ list_for_each_entry_safe(part, tmp, &nvram_partitions, partition) {
+ if (part->header.signature != NVRAM_SIG_FREE) {
+ prev = NULL;
+ continue;
+ }
+ if (prev) {
+ prev->header.length += part->header.length;
+ prev->header.checksum = nvram_checksum(&part->header);
+ rc = nvram_write_header(part);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_remove_partition: nvram_write failed (%d)\n", rc);
+ return rc;
+ }
+ list_del(&part->partition);
+ kfree(part);
+ } else
+ prev = part;
+ }
+
+ return 0;
+}
+
+/**
+ * nvram_create_partition - Create a partition in nvram
+ * @name: name of the partition to create
+ * @sig: signature of the partition to create
+ * @req_size: size of data to allocate in bytes
+ * @min_size: minimum acceptable size (0 means req_size)
+ *
+ * Returns a negative error code or a positive nvram index
+ * of the beginning of the data area of the newly created
+ * partition. If you provided a min_size smaller than req_size
+ * you need to query for the actual size yourself after the
+ * call using nvram_partition_get_size().
+ */
+loff_t __init nvram_create_partition(const char *name, int sig,
+ int req_size, int min_size)
+{
+ struct nvram_partition *part;
+ struct nvram_partition *new_part;
+ struct nvram_partition *free_part = NULL;
+ static char nv_init_vals[16];
+ loff_t tmp_index;
+ long size = 0;
+ int rc;
+
+ /* Convert sizes from bytes to blocks */
+ req_size = _ALIGN_UP(req_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN;
+ min_size = _ALIGN_UP(min_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN;
+
+ /* If no minimum size specified, make it the same as the
+ * requested size
+ */
+ if (min_size == 0)
+ min_size = req_size;
+ if (min_size > req_size)
+ return -EINVAL;
+
+ /* Now add one block to each for the header */
+ req_size += 1;
+ min_size += 1;
+
+ /* Find a free partition that will give us the maximum needed size
+ If can't find one that will give us the minimum size needed */
+ list_for_each_entry(part, &nvram_partitions, partition) {
+ if (part->header.signature != NVRAM_SIG_FREE)
+ continue;
+
+ if (part->header.length >= req_size) {
+ size = req_size;
+ free_part = part;
+ break;
+ }
+ if (part->header.length > size &&
+ part->header.length >= min_size) {
+ size = part->header.length;
+ free_part = part;
+ }
+ }
+ if (!size)
+ return -ENOSPC;
+
+ /* Create our OS partition */
+ new_part = kmalloc(sizeof(*new_part), GFP_KERNEL);
+ if (!new_part) {
+ pr_err("nvram_create_os_partition: kmalloc failed\n");
+ return -ENOMEM;
+ }
+
+ new_part->index = free_part->index;
+ new_part->header.signature = sig;
+ new_part->header.length = size;
+ strncpy(new_part->header.name, name, 12);
+ new_part->header.checksum = nvram_checksum(&new_part->header);
+
+ rc = nvram_write_header(new_part);
+ if (rc <= 0) {
+ pr_err("nvram_create_os_partition: nvram_write_header "
+ "failed (%d)\n", rc);
+ return rc;
+ }
+ list_add_tail(&new_part->partition, &free_part->partition);
+
+ /* Adjust or remove the partition we stole the space from */
+ if (free_part->header.length > size) {
+ free_part->index += size * NVRAM_BLOCK_LEN;
+ free_part->header.length -= size;
+ free_part->header.checksum = nvram_checksum(&free_part->header);
+ rc = nvram_write_header(free_part);
+ if (rc <= 0) {
+ pr_err("nvram_create_os_partition: nvram_write_header "
+ "failed (%d)\n", rc);
+ return rc;
+ }
+ } else {
+ list_del(&free_part->partition);
+ kfree(free_part);
+ }
+
+ /* Clear the new partition */
+ for (tmp_index = new_part->index + NVRAM_HEADER_LEN;
+ tmp_index < ((size - 1) * NVRAM_BLOCK_LEN);
+ tmp_index += NVRAM_BLOCK_LEN) {
+ rc = ppc_md.nvram_write(nv_init_vals, NVRAM_BLOCK_LEN, &tmp_index);
+ if (rc <= 0) {
+ pr_err("nvram_create_partition: nvram_write failed (%d)\n", rc);
+ return rc;
+ }
+ }
+
+ return new_part->index + NVRAM_HEADER_LEN;
+}
+
+/**
+ * nvram_get_partition_size - Get the data size of an nvram partition
+ * @data_index: This is the offset of the start of the data of
+ * the partition. The same value that is returned by
+ * nvram_create_partition().
+ */
+int nvram_get_partition_size(loff_t data_index)
+{
+ struct nvram_partition *part;
+
+ list_for_each_entry(part, &nvram_partitions, partition) {
+ if (part->index + NVRAM_HEADER_LEN == data_index)
+ return (part->header.length - 1) * NVRAM_BLOCK_LEN;
+ }
+ return -1;
+}
+
+
+/**
+ * nvram_find_partition - Find an nvram partition by signature and name
+ * @name: Name of the partition or NULL for any name
+ * @sig: Signature to test against
+ * @out_size: if non-NULL, returns the size of the data part of the partition
+ */
+loff_t nvram_find_partition(const char *name, int sig, int *out_size)
+{
+ struct nvram_partition *p;
+
+ list_for_each_entry(p, &nvram_partitions, partition) {
+ if (p->header.signature == sig &&
+ (!name || !strncmp(p->header.name, name, 12))) {
+ if (out_size)
+ *out_size = (p->header.length - 1) *
+ NVRAM_BLOCK_LEN;
+ return p->index + NVRAM_HEADER_LEN;
+ }
+ }
+ return 0;
+}
+
+int __init nvram_scan_partitions(void)
+{
+ loff_t cur_index = 0;
+ struct nvram_header phead;
+ struct nvram_partition * tmp_part;
+ unsigned char c_sum;
+ char * header;
+ int total_size;
+ int err;
+
+ if (ppc_md.nvram_size == NULL || ppc_md.nvram_size() <= 0)
+ return -ENODEV;
+ total_size = ppc_md.nvram_size();
+
+ header = kmalloc(NVRAM_HEADER_LEN, GFP_KERNEL);
+ if (!header) {
+ printk(KERN_ERR "nvram_scan_partitions: Failed kmalloc\n");
+ return -ENOMEM;
+ }
+
+ while (cur_index < total_size) {
+
+ err = ppc_md.nvram_read(header, NVRAM_HEADER_LEN, &cur_index);
+ if (err != NVRAM_HEADER_LEN) {
+ printk(KERN_ERR "nvram_scan_partitions: Error parsing "
+ "nvram partitions\n");
+ goto out;
+ }
+
+ cur_index -= NVRAM_HEADER_LEN; /* nvram_read will advance us */
+
+ memcpy(&phead, header, NVRAM_HEADER_LEN);
+
+ err = 0;
+ c_sum = nvram_checksum(&phead);
+ if (c_sum != phead.checksum) {
+ printk(KERN_WARNING "WARNING: nvram partition checksum"
+ " was %02x, should be %02x!\n",
+ phead.checksum, c_sum);
+ printk(KERN_WARNING "Terminating nvram partition scan\n");
+ goto out;
+ }
+ if (!phead.length) {
+ printk(KERN_WARNING "WARNING: nvram corruption "
+ "detected: 0-length partition\n");
+ goto out;
+ }
+ tmp_part = (struct nvram_partition *)
+ kmalloc(sizeof(struct nvram_partition), GFP_KERNEL);
+ err = -ENOMEM;
+ if (!tmp_part) {
+ printk(KERN_ERR "nvram_scan_partitions: kmalloc failed\n");
+ goto out;
+ }
+
+ memcpy(&tmp_part->header, &phead, NVRAM_HEADER_LEN);
+ tmp_part->index = cur_index;
+ list_add_tail(&tmp_part->partition, &nvram_partitions);
+
+ cur_index += phead.length * NVRAM_BLOCK_LEN;
+ }
+ err = 0;
+
+#ifdef DEBUG_NVRAM
+ nvram_print_partitions("NVRAM Partitions");
+#endif
+
+ out:
+ kfree(header);
+ return err;
+}
+
+static int __init nvram_init(void)
+{
+ int rc;
+
+ BUILD_BUG_ON(NVRAM_BLOCK_LEN != 16);
+
+ if (ppc_md.nvram_size == NULL || ppc_md.nvram_size() <= 0)
+ return -ENODEV;
+
+ rc = misc_register(&nvram_dev);
+ if (rc != 0) {
+ printk(KERN_ERR "nvram_init: failed to register device\n");
+ return rc;
+ }
+
+ return rc;
+}
+
+void __exit nvram_cleanup(void)
+{
+ misc_deregister( &nvram_dev );
+}
+
+module_init(nvram_init);
+module_exit(nvram_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
new file mode 100644
index 00000000..2049f2d0
--- /dev/null
+++ b/arch/powerpc/kernel/of_platform.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corp.
+ * <benh@kernel.crashing.org>
+ * and Arnd Bergmann, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#undef DEBUG
+
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/mod_devicetable.h>
+#include <linux/pci.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/atomic.h>
+
+#include <asm/errno.h>
+#include <asm/topology.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+#include <asm/eeh.h>
+
+#ifdef CONFIG_PPC_OF_PLATFORM_PCI
+
+/* The probing of PCI controllers from of_platform is currently
+ * 64 bits only, mostly due to gratuitous differences between
+ * the 32 and 64 bits PCI code on PowerPC and the 32 bits one
+ * lacking some bits needed here.
+ */
+
+static int __devinit of_pci_phb_probe(struct platform_device *dev)
+{
+ struct pci_controller *phb;
+
+ /* Check if we can do that ... */
+ if (ppc_md.pci_setup_phb == NULL)
+ return -ENODEV;
+
+ pr_info("Setting up PCI bus %s\n", dev->dev.of_node->full_name);
+
+ /* Alloc and setup PHB data structure */
+ phb = pcibios_alloc_controller(dev->dev.of_node);
+ if (!phb)
+ return -ENODEV;
+
+ /* Setup parent in sysfs */
+ phb->parent = &dev->dev;
+
+ /* Setup the PHB using arch provided callback */
+ if (ppc_md.pci_setup_phb(phb)) {
+ pcibios_free_controller(phb);
+ return -ENODEV;
+ }
+
+ /* Process "ranges" property */
+ pci_process_bridge_OF_ranges(phb, dev->dev.of_node, 0);
+
+ /* Init pci_dn data structures */
+ pci_devs_phb_init_dynamic(phb);
+
+ /* Create EEH devices for the PHB */
+ eeh_dev_phb_init_dynamic(phb);
+
+ /* Register devices with EEH */
+#ifdef CONFIG_EEH
+ if (dev->dev.of_node->child)
+ eeh_add_device_tree_early(dev->dev.of_node);
+#endif /* CONFIG_EEH */
+
+ /* Scan the bus */
+ pcibios_scan_phb(phb);
+ if (phb->bus == NULL)
+ return -ENXIO;
+
+ /* Claim resources. This might need some rework as well depending
+ * wether we are doing probe-only or not, like assigning unassigned
+ * resources etc...
+ */
+ pcibios_claim_one_bus(phb->bus);
+
+ /* Finish EEH setup */
+#ifdef CONFIG_EEH
+ eeh_add_device_tree_late(phb->bus);
+#endif
+
+ /* Add probed PCI devices to the device model */
+ pci_bus_add_devices(phb->bus);
+
+ return 0;
+}
+
+static struct of_device_id of_pci_phb_ids[] = {
+ { .type = "pci", },
+ { .type = "pcix", },
+ { .type = "pcie", },
+ { .type = "pciex", },
+ { .type = "ht", },
+ {}
+};
+
+static struct platform_driver of_pci_phb_driver = {
+ .probe = of_pci_phb_probe,
+ .driver = {
+ .name = "of-pci",
+ .owner = THIS_MODULE,
+ .of_match_table = of_pci_phb_ids,
+ },
+};
+
+static __init int of_pci_phb_init(void)
+{
+ return platform_driver_register(&of_pci_phb_driver);
+}
+
+device_initcall(of_pci_phb_init);
+
+#endif /* CONFIG_PPC_OF_PLATFORM_PCI */
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
new file mode 100644
index 00000000..0bb1f986
--- /dev/null
+++ b/arch/powerpc/kernel/paca.c
@@ -0,0 +1,219 @@
+/*
+ * c 2001 PPC 64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/smp.h>
+#include <linux/export.h>
+#include <linux/memblock.h>
+
+#include <asm/lppaca.h>
+#include <asm/paca.h>
+#include <asm/sections.h>
+#include <asm/pgtable.h>
+#include <asm/kexec.h>
+
+/* This symbol is provided by the linker - let it fill in the paca
+ * field correctly */
+extern unsigned long __toc_start;
+
+#ifdef CONFIG_PPC_BOOK3S
+
+/*
+ * The structure which the hypervisor knows about - this structure
+ * should not cross a page boundary. The vpa_init/register_vpa call
+ * is now known to fail if the lppaca structure crosses a page
+ * boundary. The lppaca is also used on POWER5 pSeries boxes.
+ * The lppaca is 640 bytes long, and cannot readily
+ * change since the hypervisor knows its layout, so a 1kB alignment
+ * will suffice to ensure that it doesn't cross a page boundary.
+ */
+struct lppaca lppaca[] = {
+ [0 ... (NR_LPPACAS-1)] = {
+ .desc = 0xd397d781, /* "LpPa" */
+ .size = sizeof(struct lppaca),
+ .dyn_proc_status = 2,
+ .decr_val = 0x00ff0000,
+ .fpregs_in_use = 1,
+ .end_of_quantum = 0xfffffffffffffffful,
+ .slb_count = 64,
+ .vmxregs_in_use = 0,
+ .page_ins = 0,
+ },
+};
+
+static struct lppaca *extra_lppacas;
+static long __initdata lppaca_size;
+
+static void allocate_lppacas(int nr_cpus, unsigned long limit)
+{
+ if (nr_cpus <= NR_LPPACAS)
+ return;
+
+ lppaca_size = PAGE_ALIGN(sizeof(struct lppaca) *
+ (nr_cpus - NR_LPPACAS));
+ extra_lppacas = __va(memblock_alloc_base(lppaca_size,
+ PAGE_SIZE, limit));
+}
+
+static struct lppaca *new_lppaca(int cpu)
+{
+ struct lppaca *lp;
+
+ if (cpu < NR_LPPACAS)
+ return &lppaca[cpu];
+
+ lp = extra_lppacas + (cpu - NR_LPPACAS);
+ *lp = lppaca[0];
+
+ return lp;
+}
+
+static void free_lppacas(void)
+{
+ long new_size = 0, nr;
+
+ if (!lppaca_size)
+ return;
+ nr = num_possible_cpus() - NR_LPPACAS;
+ if (nr > 0)
+ new_size = PAGE_ALIGN(nr * sizeof(struct lppaca));
+ if (new_size >= lppaca_size)
+ return;
+
+ memblock_free(__pa(extra_lppacas) + new_size, lppaca_size - new_size);
+ lppaca_size = new_size;
+}
+
+#else
+
+static inline void allocate_lppacas(int nr_cpus, unsigned long limit) { }
+static inline void free_lppacas(void) { }
+
+#endif /* CONFIG_PPC_BOOK3S */
+
+#ifdef CONFIG_PPC_STD_MMU_64
+
+/*
+ * 3 persistent SLBs are registered here. The buffer will be zero
+ * initially, hence will all be invaild until we actually write them.
+ */
+struct slb_shadow slb_shadow[] __cacheline_aligned = {
+ [0 ... (NR_CPUS-1)] = {
+ .persistent = SLB_NUM_BOLTED,
+ .buffer_length = sizeof(struct slb_shadow),
+ },
+};
+
+#endif /* CONFIG_PPC_STD_MMU_64 */
+
+/* The Paca is an array with one entry per processor. Each contains an
+ * lppaca, which contains the information shared between the
+ * hypervisor and Linux.
+ * On systems with hardware multi-threading, there are two threads
+ * per processor. The Paca array must contain an entry for each thread.
+ * The VPD Areas will give a max logical processors = 2 * max physical
+ * processors. The processor VPD array needs one entry per physical
+ * processor (not thread).
+ */
+struct paca_struct *paca;
+EXPORT_SYMBOL(paca);
+
+struct paca_struct boot_paca;
+
+void __init initialise_paca(struct paca_struct *new_paca, int cpu)
+{
+ /* The TOC register (GPR2) points 32kB into the TOC, so that 64kB
+ * of the TOC can be addressed using a single machine instruction.
+ */
+ unsigned long kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL;
+
+#ifdef CONFIG_PPC_BOOK3S
+ new_paca->lppaca_ptr = new_lppaca(cpu);
+#else
+ new_paca->kernel_pgd = swapper_pg_dir;
+#endif
+ new_paca->lock_token = 0x8000;
+ new_paca->paca_index = cpu;
+ new_paca->kernel_toc = kernel_toc;
+ new_paca->kernelbase = (unsigned long) _stext;
+ new_paca->kernel_msr = MSR_KERNEL;
+ new_paca->hw_cpu_id = 0xffff;
+ new_paca->kexec_state = KEXEC_STATE_NONE;
+ new_paca->__current = &init_task;
+#ifdef CONFIG_PPC_STD_MMU_64
+ new_paca->slb_shadow_ptr = &slb_shadow[cpu];
+#endif /* CONFIG_PPC_STD_MMU_64 */
+}
+
+/* Put the paca pointer into r13 and SPRG_PACA */
+void setup_paca(struct paca_struct *new_paca)
+{
+ /* Setup r13 */
+ local_paca = new_paca;
+
+#ifdef CONFIG_PPC_BOOK3E
+ /* On Book3E, initialize the TLB miss exception frames */
+ mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb);
+#else
+ /* In HV mode, we setup both HPACA and PACA to avoid problems
+ * if we do a GET_PACA() before the feature fixups have been
+ * applied
+ */
+ if (cpu_has_feature(CPU_FTR_HVMODE))
+ mtspr(SPRN_SPRG_HPACA, local_paca);
+#endif
+ mtspr(SPRN_SPRG_PACA, local_paca);
+
+}
+
+static int __initdata paca_size;
+
+void __init allocate_pacas(void)
+{
+ int cpu, limit;
+
+ /*
+ * We can't take SLB misses on the paca, and we want to access them
+ * in real mode, so allocate them within the RMA and also within
+ * the first segment.
+ */
+ limit = min(0x10000000ULL, ppc64_rma_size);
+
+ paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids);
+
+ paca = __va(memblock_alloc_base(paca_size, PAGE_SIZE, limit));
+ memset(paca, 0, paca_size);
+
+ printk(KERN_DEBUG "Allocated %u bytes for %d pacas at %p\n",
+ paca_size, nr_cpu_ids, paca);
+
+ allocate_lppacas(nr_cpu_ids, limit);
+
+ /* Can't use for_each_*_cpu, as they aren't functional yet */
+ for (cpu = 0; cpu < nr_cpu_ids; cpu++)
+ initialise_paca(&paca[cpu], cpu);
+}
+
+void __init free_unused_pacas(void)
+{
+ int new_size;
+
+ new_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids);
+
+ if (new_size >= paca_size)
+ return;
+
+ memblock_free(__pa(paca) + new_size, paca_size - new_size);
+
+ printk(KERN_DEBUG "Freed %u bytes for unused pacas\n",
+ paca_size - new_size);
+
+ paca_size = new_size;
+
+ free_lppacas();
+}
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
new file mode 100644
index 00000000..8e78e93c
--- /dev/null
+++ b/arch/powerpc/kernel/pci-common.c
@@ -0,0 +1,1702 @@
+/*
+ * Contains common pci routines for ALL ppc platform
+ * (based on pci_32.c and pci_64.c)
+ *
+ * Port for PPC64 David Engebretsen, IBM Corp.
+ * Contains common pci routines for ppc64 platform, pSeries and iSeries brands.
+ *
+ * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
+ * Rework, based on alpha PCI code.
+ *
+ * Common pmac/prep/chrp pci routines. -- Cort
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/export.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
+#include <linux/mm.h>
+#include <linux/list.h>
+#include <linux/syscalls.h>
+#include <linux/irq.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/byteorder.h>
+#include <asm/machdep.h>
+#include <asm/ppc-pci.h>
+#include <asm/eeh.h>
+
+static DEFINE_SPINLOCK(hose_spinlock);
+LIST_HEAD(hose_list);
+
+/* XXX kill that some day ... */
+static int global_phb_number; /* Global phb counter */
+
+/* ISA Memory physical address */
+resource_size_t isa_mem_base;
+
+
+static struct dma_map_ops *pci_dma_ops = &dma_direct_ops;
+
+void set_pci_dma_ops(struct dma_map_ops *dma_ops)
+{
+ pci_dma_ops = dma_ops;
+}
+
+struct dma_map_ops *get_pci_dma_ops(void)
+{
+ return pci_dma_ops;
+}
+EXPORT_SYMBOL(get_pci_dma_ops);
+
+struct pci_controller *pcibios_alloc_controller(struct device_node *dev)
+{
+ struct pci_controller *phb;
+
+ phb = zalloc_maybe_bootmem(sizeof(struct pci_controller), GFP_KERNEL);
+ if (phb == NULL)
+ return NULL;
+ spin_lock(&hose_spinlock);
+ phb->global_number = global_phb_number++;
+ list_add_tail(&phb->list_node, &hose_list);
+ spin_unlock(&hose_spinlock);
+ phb->dn = dev;
+ phb->is_dynamic = mem_init_done;
+#ifdef CONFIG_PPC64
+ if (dev) {
+ int nid = of_node_to_nid(dev);
+
+ if (nid < 0 || !node_online(nid))
+ nid = -1;
+
+ PHB_SET_NODE(phb, nid);
+ }
+#endif
+ return phb;
+}
+
+void pcibios_free_controller(struct pci_controller *phb)
+{
+ spin_lock(&hose_spinlock);
+ list_del(&phb->list_node);
+ spin_unlock(&hose_spinlock);
+
+ if (phb->is_dynamic)
+ kfree(phb);
+}
+
+static resource_size_t pcibios_io_size(const struct pci_controller *hose)
+{
+#ifdef CONFIG_PPC64
+ return hose->pci_io_size;
+#else
+ return resource_size(&hose->io_resource);
+#endif
+}
+
+int pcibios_vaddr_is_ioport(void __iomem *address)
+{
+ int ret = 0;
+ struct pci_controller *hose;
+ resource_size_t size;
+
+ spin_lock(&hose_spinlock);
+ list_for_each_entry(hose, &hose_list, list_node) {
+ size = pcibios_io_size(hose);
+ if (address >= hose->io_base_virt &&
+ address < (hose->io_base_virt + size)) {
+ ret = 1;
+ break;
+ }
+ }
+ spin_unlock(&hose_spinlock);
+ return ret;
+}
+
+unsigned long pci_address_to_pio(phys_addr_t address)
+{
+ struct pci_controller *hose;
+ resource_size_t size;
+ unsigned long ret = ~0;
+
+ spin_lock(&hose_spinlock);
+ list_for_each_entry(hose, &hose_list, list_node) {
+ size = pcibios_io_size(hose);
+ if (address >= hose->io_base_phys &&
+ address < (hose->io_base_phys + size)) {
+ unsigned long base =
+ (unsigned long)hose->io_base_virt - _IO_BASE;
+ ret = base + (address - hose->io_base_phys);
+ break;
+ }
+ }
+ spin_unlock(&hose_spinlock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(pci_address_to_pio);
+
+/*
+ * Return the domain number for this bus.
+ */
+int pci_domain_nr(struct pci_bus *bus)
+{
+ struct pci_controller *hose = pci_bus_to_host(bus);
+
+ return hose->global_number;
+}
+EXPORT_SYMBOL(pci_domain_nr);
+
+/* This routine is meant to be used early during boot, when the
+ * PCI bus numbers have not yet been assigned, and you need to
+ * issue PCI config cycles to an OF device.
+ * It could also be used to "fix" RTAS config cycles if you want
+ * to set pci_assign_all_buses to 1 and still use RTAS for PCI
+ * config cycles.
+ */
+struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node)
+{
+ while(node) {
+ struct pci_controller *hose, *tmp;
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+ if (hose->dn == node)
+ return hose;
+ node = node->parent;
+ }
+ return NULL;
+}
+
+static ssize_t pci_show_devspec(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pci_dev *pdev;
+ struct device_node *np;
+
+ pdev = to_pci_dev (dev);
+ np = pci_device_to_OF_node(pdev);
+ if (np == NULL || np->full_name == NULL)
+ return 0;
+ return sprintf(buf, "%s", np->full_name);
+}
+static DEVICE_ATTR(devspec, S_IRUGO, pci_show_devspec, NULL);
+
+/* Add sysfs properties */
+int pcibios_add_platform_entries(struct pci_dev *pdev)
+{
+ return device_create_file(&pdev->dev, &dev_attr_devspec);
+}
+
+char __devinit *pcibios_setup(char *str)
+{
+ return str;
+}
+
+/*
+ * Reads the interrupt pin to determine if interrupt is use by card.
+ * If the interrupt is used, then gets the interrupt line from the
+ * openfirmware and sets it in the pci_dev and pci_config line.
+ */
+static int pci_read_irq_line(struct pci_dev *pci_dev)
+{
+ struct of_irq oirq;
+ unsigned int virq;
+
+ pr_debug("PCI: Try to map irq for %s...\n", pci_name(pci_dev));
+
+#ifdef DEBUG
+ memset(&oirq, 0xff, sizeof(oirq));
+#endif
+ /* Try to get a mapping from the device-tree */
+ if (of_irq_map_pci(pci_dev, &oirq)) {
+ u8 line, pin;
+
+ /* If that fails, lets fallback to what is in the config
+ * space and map that through the default controller. We
+ * also set the type to level low since that's what PCI
+ * interrupts are. If your platform does differently, then
+ * either provide a proper interrupt tree or don't use this
+ * function.
+ */
+ if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &pin))
+ return -1;
+ if (pin == 0)
+ return -1;
+ if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_LINE, &line) ||
+ line == 0xff || line == 0) {
+ return -1;
+ }
+ pr_debug(" No map ! Using line %d (pin %d) from PCI config\n",
+ line, pin);
+
+ virq = irq_create_mapping(NULL, line);
+ if (virq != NO_IRQ)
+ irq_set_irq_type(virq, IRQ_TYPE_LEVEL_LOW);
+ } else {
+ pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %s\n",
+ oirq.size, oirq.specifier[0], oirq.specifier[1],
+ oirq.controller ? oirq.controller->full_name :
+ "<default>");
+
+ virq = irq_create_of_mapping(oirq.controller, oirq.specifier,
+ oirq.size);
+ }
+ if(virq == NO_IRQ) {
+ pr_debug(" Failed to map !\n");
+ return -1;
+ }
+
+ pr_debug(" Mapped to linux irq %d\n", virq);
+
+ pci_dev->irq = virq;
+
+ return 0;
+}
+
+/*
+ * Platform support for /proc/bus/pci/X/Y mmap()s,
+ * modelled on the sparc64 implementation by Dave Miller.
+ * -- paulus.
+ */
+
+/*
+ * Adjust vm_pgoff of VMA such that it is the physical page offset
+ * corresponding to the 32-bit pci bus offset for DEV requested by the user.
+ *
+ * Basically, the user finds the base address for his device which he wishes
+ * to mmap. They read the 32-bit value from the config space base register,
+ * add whatever PAGE_SIZE multiple offset they wish, and feed this into the
+ * offset parameter of mmap on /proc/bus/pci/XXX for that device.
+ *
+ * Returns negative error code on failure, zero on success.
+ */
+static struct resource *__pci_mmap_make_offset(struct pci_dev *dev,
+ resource_size_t *offset,
+ enum pci_mmap_state mmap_state)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ unsigned long io_offset = 0;
+ int i, res_bit;
+
+ if (hose == 0)
+ return NULL; /* should never happen */
+
+ /* If memory, add on the PCI bridge address offset */
+ if (mmap_state == pci_mmap_mem) {
+#if 0 /* See comment in pci_resource_to_user() for why this is disabled */
+ *offset += hose->pci_mem_offset;
+#endif
+ res_bit = IORESOURCE_MEM;
+ } else {
+ io_offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+ *offset += io_offset;
+ res_bit = IORESOURCE_IO;
+ }
+
+ /*
+ * Check that the offset requested corresponds to one of the
+ * resources of the device.
+ */
+ for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
+ struct resource *rp = &dev->resource[i];
+ int flags = rp->flags;
+
+ /* treat ROM as memory (should be already) */
+ if (i == PCI_ROM_RESOURCE)
+ flags |= IORESOURCE_MEM;
+
+ /* Active and same type? */
+ if ((flags & res_bit) == 0)
+ continue;
+
+ /* In the range of this resource? */
+ if (*offset < (rp->start & PAGE_MASK) || *offset > rp->end)
+ continue;
+
+ /* found it! construct the final physical address */
+ if (mmap_state == pci_mmap_io)
+ *offset += hose->io_base_phys - io_offset;
+ return rp;
+ }
+
+ return NULL;
+}
+
+/*
+ * Set vm_page_prot of VMA, as appropriate for this architecture, for a pci
+ * device mapping.
+ */
+static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp,
+ pgprot_t protection,
+ enum pci_mmap_state mmap_state,
+ int write_combine)
+{
+ unsigned long prot = pgprot_val(protection);
+
+ /* Write combine is always 0 on non-memory space mappings. On
+ * memory space, if the user didn't pass 1, we check for a
+ * "prefetchable" resource. This is a bit hackish, but we use
+ * this to workaround the inability of /sysfs to provide a write
+ * combine bit
+ */
+ if (mmap_state != pci_mmap_mem)
+ write_combine = 0;
+ else if (write_combine == 0) {
+ if (rp->flags & IORESOURCE_PREFETCH)
+ write_combine = 1;
+ }
+
+ /* XXX would be nice to have a way to ask for write-through */
+ if (write_combine)
+ return pgprot_noncached_wc(prot);
+ else
+ return pgprot_noncached(prot);
+}
+
+/*
+ * This one is used by /dev/mem and fbdev who have no clue about the
+ * PCI device, it tries to find the PCI device first and calls the
+ * above routine
+ */
+pgprot_t pci_phys_mem_access_prot(struct file *file,
+ unsigned long pfn,
+ unsigned long size,
+ pgprot_t prot)
+{
+ struct pci_dev *pdev = NULL;
+ struct resource *found = NULL;
+ resource_size_t offset = ((resource_size_t)pfn) << PAGE_SHIFT;
+ int i;
+
+ if (page_is_ram(pfn))
+ return prot;
+
+ prot = pgprot_noncached(prot);
+ for_each_pci_dev(pdev) {
+ for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
+ struct resource *rp = &pdev->resource[i];
+ int flags = rp->flags;
+
+ /* Active and same type? */
+ if ((flags & IORESOURCE_MEM) == 0)
+ continue;
+ /* In the range of this resource? */
+ if (offset < (rp->start & PAGE_MASK) ||
+ offset > rp->end)
+ continue;
+ found = rp;
+ break;
+ }
+ if (found)
+ break;
+ }
+ if (found) {
+ if (found->flags & IORESOURCE_PREFETCH)
+ prot = pgprot_noncached_wc(prot);
+ pci_dev_put(pdev);
+ }
+
+ pr_debug("PCI: Non-PCI map for %llx, prot: %lx\n",
+ (unsigned long long)offset, pgprot_val(prot));
+
+ return prot;
+}
+
+
+/*
+ * Perform the actual remap of the pages for a PCI device mapping, as
+ * appropriate for this architecture. The region in the process to map
+ * is described by vm_start and vm_end members of VMA, the base physical
+ * address is found in vm_pgoff.
+ * The pci device structure is provided so that architectures may make mapping
+ * decisions on a per-device or per-bus basis.
+ *
+ * Returns a negative error code on failure, zero on success.
+ */
+int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_state, int write_combine)
+{
+ resource_size_t offset =
+ ((resource_size_t)vma->vm_pgoff) << PAGE_SHIFT;
+ struct resource *rp;
+ int ret;
+
+ rp = __pci_mmap_make_offset(dev, &offset, mmap_state);
+ if (rp == NULL)
+ return -EINVAL;
+
+ vma->vm_pgoff = offset >> PAGE_SHIFT;
+ vma->vm_page_prot = __pci_mmap_set_pgprot(dev, rp,
+ vma->vm_page_prot,
+ mmap_state, write_combine);
+
+ ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+ vma->vm_end - vma->vm_start, vma->vm_page_prot);
+
+ return ret;
+}
+
+/* This provides legacy IO read access on a bus */
+int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t size)
+{
+ unsigned long offset;
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ struct resource *rp = &hose->io_resource;
+ void __iomem *addr;
+
+ /* Check if port can be supported by that bus. We only check
+ * the ranges of the PHB though, not the bus itself as the rules
+ * for forwarding legacy cycles down bridges are not our problem
+ * here. So if the host bridge supports it, we do it.
+ */
+ offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+ offset += port;
+
+ if (!(rp->flags & IORESOURCE_IO))
+ return -ENXIO;
+ if (offset < rp->start || (offset + size) > rp->end)
+ return -ENXIO;
+ addr = hose->io_base_virt + port;
+
+ switch(size) {
+ case 1:
+ *((u8 *)val) = in_8(addr);
+ return 1;
+ case 2:
+ if (port & 1)
+ return -EINVAL;
+ *((u16 *)val) = in_le16(addr);
+ return 2;
+ case 4:
+ if (port & 3)
+ return -EINVAL;
+ *((u32 *)val) = in_le32(addr);
+ return 4;
+ }
+ return -EINVAL;
+}
+
+/* This provides legacy IO write access on a bus */
+int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val, size_t size)
+{
+ unsigned long offset;
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ struct resource *rp = &hose->io_resource;
+ void __iomem *addr;
+
+ /* Check if port can be supported by that bus. We only check
+ * the ranges of the PHB though, not the bus itself as the rules
+ * for forwarding legacy cycles down bridges are not our problem
+ * here. So if the host bridge supports it, we do it.
+ */
+ offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+ offset += port;
+
+ if (!(rp->flags & IORESOURCE_IO))
+ return -ENXIO;
+ if (offset < rp->start || (offset + size) > rp->end)
+ return -ENXIO;
+ addr = hose->io_base_virt + port;
+
+ /* WARNING: The generic code is idiotic. It gets passed a pointer
+ * to what can be a 1, 2 or 4 byte quantity and always reads that
+ * as a u32, which means that we have to correct the location of
+ * the data read within those 32 bits for size 1 and 2
+ */
+ switch(size) {
+ case 1:
+ out_8(addr, val >> 24);
+ return 1;
+ case 2:
+ if (port & 1)
+ return -EINVAL;
+ out_le16(addr, val >> 16);
+ return 2;
+ case 4:
+ if (port & 3)
+ return -EINVAL;
+ out_le32(addr, val);
+ return 4;
+ }
+ return -EINVAL;
+}
+
+/* This provides legacy IO or memory mmap access on a bus */
+int pci_mmap_legacy_page_range(struct pci_bus *bus,
+ struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_state)
+{
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ resource_size_t offset =
+ ((resource_size_t)vma->vm_pgoff) << PAGE_SHIFT;
+ resource_size_t size = vma->vm_end - vma->vm_start;
+ struct resource *rp;
+
+ pr_debug("pci_mmap_legacy_page_range(%04x:%02x, %s @%llx..%llx)\n",
+ pci_domain_nr(bus), bus->number,
+ mmap_state == pci_mmap_mem ? "MEM" : "IO",
+ (unsigned long long)offset,
+ (unsigned long long)(offset + size - 1));
+
+ if (mmap_state == pci_mmap_mem) {
+ /* Hack alert !
+ *
+ * Because X is lame and can fail starting if it gets an error trying
+ * to mmap legacy_mem (instead of just moving on without legacy memory
+ * access) we fake it here by giving it anonymous memory, effectively
+ * behaving just like /dev/zero
+ */
+ if ((offset + size) > hose->isa_mem_size) {
+ printk(KERN_DEBUG
+ "Process %s (pid:%d) mapped non-existing PCI legacy memory for 0%04x:%02x\n",
+ current->comm, current->pid, pci_domain_nr(bus), bus->number);
+ if (vma->vm_flags & VM_SHARED)
+ return shmem_zero_setup(vma);
+ return 0;
+ }
+ offset += hose->isa_mem_phys;
+ } else {
+ unsigned long io_offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+ unsigned long roffset = offset + io_offset;
+ rp = &hose->io_resource;
+ if (!(rp->flags & IORESOURCE_IO))
+ return -ENXIO;
+ if (roffset < rp->start || (roffset + size) > rp->end)
+ return -ENXIO;
+ offset += hose->io_base_phys;
+ }
+ pr_debug(" -> mapping phys %llx\n", (unsigned long long)offset);
+
+ vma->vm_pgoff = offset >> PAGE_SHIFT;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+ vma->vm_end - vma->vm_start,
+ vma->vm_page_prot);
+}
+
+void pci_resource_to_user(const struct pci_dev *dev, int bar,
+ const struct resource *rsrc,
+ resource_size_t *start, resource_size_t *end)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ resource_size_t offset = 0;
+
+ if (hose == NULL)
+ return;
+
+ if (rsrc->flags & IORESOURCE_IO)
+ offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+
+ /* We pass a fully fixed up address to userland for MMIO instead of
+ * a BAR value because X is lame and expects to be able to use that
+ * to pass to /dev/mem !
+ *
+ * That means that we'll have potentially 64 bits values where some
+ * userland apps only expect 32 (like X itself since it thinks only
+ * Sparc has 64 bits MMIO) but if we don't do that, we break it on
+ * 32 bits CHRPs :-(
+ *
+ * Hopefully, the sysfs insterface is immune to that gunk. Once X
+ * has been fixed (and the fix spread enough), we can re-enable the
+ * 2 lines below and pass down a BAR value to userland. In that case
+ * we'll also have to re-enable the matching code in
+ * __pci_mmap_make_offset().
+ *
+ * BenH.
+ */
+#if 0
+ else if (rsrc->flags & IORESOURCE_MEM)
+ offset = hose->pci_mem_offset;
+#endif
+
+ *start = rsrc->start - offset;
+ *end = rsrc->end - offset;
+}
+
+/**
+ * pci_process_bridge_OF_ranges - Parse PCI bridge resources from device tree
+ * @hose: newly allocated pci_controller to be setup
+ * @dev: device node of the host bridge
+ * @primary: set if primary bus (32 bits only, soon to be deprecated)
+ *
+ * This function will parse the "ranges" property of a PCI host bridge device
+ * node and setup the resource mapping of a pci controller based on its
+ * content.
+ *
+ * Life would be boring if it wasn't for a few issues that we have to deal
+ * with here:
+ *
+ * - We can only cope with one IO space range and up to 3 Memory space
+ * ranges. However, some machines (thanks Apple !) tend to split their
+ * space into lots of small contiguous ranges. So we have to coalesce.
+ *
+ * - We can only cope with all memory ranges having the same offset
+ * between CPU addresses and PCI addresses. Unfortunately, some bridges
+ * are setup for a large 1:1 mapping along with a small "window" which
+ * maps PCI address 0 to some arbitrary high address of the CPU space in
+ * order to give access to the ISA memory hole.
+ * The way out of here that I've chosen for now is to always set the
+ * offset based on the first resource found, then override it if we
+ * have a different offset and the previous was set by an ISA hole.
+ *
+ * - Some busses have IO space not starting at 0, which causes trouble with
+ * the way we do our IO resource renumbering. The code somewhat deals with
+ * it for 64 bits but I would expect problems on 32 bits.
+ *
+ * - Some 32 bits platforms such as 4xx can have physical space larger than
+ * 32 bits so we need to use 64 bits values for the parsing
+ */
+void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose,
+ struct device_node *dev,
+ int primary)
+{
+ const u32 *ranges;
+ int rlen;
+ int pna = of_n_addr_cells(dev);
+ int np = pna + 5;
+ int memno = 0, isa_hole = -1;
+ u32 pci_space;
+ unsigned long long pci_addr, cpu_addr, pci_next, cpu_next, size;
+ unsigned long long isa_mb = 0;
+ struct resource *res;
+
+ printk(KERN_INFO "PCI host bridge %s %s ranges:\n",
+ dev->full_name, primary ? "(primary)" : "");
+
+ /* Get ranges property */
+ ranges = of_get_property(dev, "ranges", &rlen);
+ if (ranges == NULL)
+ return;
+
+ /* Parse it */
+ while ((rlen -= np * 4) >= 0) {
+ /* Read next ranges element */
+ pci_space = ranges[0];
+ pci_addr = of_read_number(ranges + 1, 2);
+ cpu_addr = of_translate_address(dev, ranges + 3);
+ size = of_read_number(ranges + pna + 3, 2);
+ ranges += np;
+
+ /* If we failed translation or got a zero-sized region
+ * (some FW try to feed us with non sensical zero sized regions
+ * such as power3 which look like some kind of attempt at exposing
+ * the VGA memory hole)
+ */
+ if (cpu_addr == OF_BAD_ADDR || size == 0)
+ continue;
+
+ /* Now consume following elements while they are contiguous */
+ for (; rlen >= np * sizeof(u32);
+ ranges += np, rlen -= np * 4) {
+ if (ranges[0] != pci_space)
+ break;
+ pci_next = of_read_number(ranges + 1, 2);
+ cpu_next = of_translate_address(dev, ranges + 3);
+ if (pci_next != pci_addr + size ||
+ cpu_next != cpu_addr + size)
+ break;
+ size += of_read_number(ranges + pna + 3, 2);
+ }
+
+ /* Act based on address space type */
+ res = NULL;
+ switch ((pci_space >> 24) & 0x3) {
+ case 1: /* PCI IO space */
+ printk(KERN_INFO
+ " IO 0x%016llx..0x%016llx -> 0x%016llx\n",
+ cpu_addr, cpu_addr + size - 1, pci_addr);
+
+ /* We support only one IO range */
+ if (hose->pci_io_size) {
+ printk(KERN_INFO
+ " \\--> Skipped (too many) !\n");
+ continue;
+ }
+#ifdef CONFIG_PPC32
+ /* On 32 bits, limit I/O space to 16MB */
+ if (size > 0x01000000)
+ size = 0x01000000;
+
+ /* 32 bits needs to map IOs here */
+ hose->io_base_virt = ioremap(cpu_addr, size);
+
+ /* Expect trouble if pci_addr is not 0 */
+ if (primary)
+ isa_io_base =
+ (unsigned long)hose->io_base_virt;
+#endif /* CONFIG_PPC32 */
+ /* pci_io_size and io_base_phys always represent IO
+ * space starting at 0 so we factor in pci_addr
+ */
+ hose->pci_io_size = pci_addr + size;
+ hose->io_base_phys = cpu_addr - pci_addr;
+
+ /* Build resource */
+ res = &hose->io_resource;
+ res->flags = IORESOURCE_IO;
+ res->start = pci_addr;
+ break;
+ case 2: /* PCI Memory space */
+ case 3: /* PCI 64 bits Memory space */
+ printk(KERN_INFO
+ " MEM 0x%016llx..0x%016llx -> 0x%016llx %s\n",
+ cpu_addr, cpu_addr + size - 1, pci_addr,
+ (pci_space & 0x40000000) ? "Prefetch" : "");
+
+ /* We support only 3 memory ranges */
+ if (memno >= 3) {
+ printk(KERN_INFO
+ " \\--> Skipped (too many) !\n");
+ continue;
+ }
+ /* Handles ISA memory hole space here */
+ if (pci_addr == 0) {
+ isa_mb = cpu_addr;
+ isa_hole = memno;
+ if (primary || isa_mem_base == 0)
+ isa_mem_base = cpu_addr;
+ hose->isa_mem_phys = cpu_addr;
+ hose->isa_mem_size = size;
+ }
+
+ /* We get the PCI/Mem offset from the first range or
+ * the, current one if the offset came from an ISA
+ * hole. If they don't match, bugger.
+ */
+ if (memno == 0 ||
+ (isa_hole >= 0 && pci_addr != 0 &&
+ hose->pci_mem_offset == isa_mb))
+ hose->pci_mem_offset = cpu_addr - pci_addr;
+ else if (pci_addr != 0 &&
+ hose->pci_mem_offset != cpu_addr - pci_addr) {
+ printk(KERN_INFO
+ " \\--> Skipped (offset mismatch) !\n");
+ continue;
+ }
+
+ /* Build resource */
+ res = &hose->mem_resources[memno++];
+ res->flags = IORESOURCE_MEM;
+ if (pci_space & 0x40000000)
+ res->flags |= IORESOURCE_PREFETCH;
+ res->start = cpu_addr;
+ break;
+ }
+ if (res != NULL) {
+ res->name = dev->full_name;
+ res->end = res->start + size - 1;
+ res->parent = NULL;
+ res->sibling = NULL;
+ res->child = NULL;
+ }
+ }
+
+ /* If there's an ISA hole and the pci_mem_offset is -not- matching
+ * the ISA hole offset, then we need to remove the ISA hole from
+ * the resource list for that brige
+ */
+ if (isa_hole >= 0 && hose->pci_mem_offset != isa_mb) {
+ unsigned int next = isa_hole + 1;
+ printk(KERN_INFO " Removing ISA hole at 0x%016llx\n", isa_mb);
+ if (next < memno)
+ memmove(&hose->mem_resources[isa_hole],
+ &hose->mem_resources[next],
+ sizeof(struct resource) * (memno - next));
+ hose->mem_resources[--memno].flags = 0;
+ }
+}
+
+/* Decide whether to display the domain number in /proc */
+int pci_proc_domain(struct pci_bus *bus)
+{
+ struct pci_controller *hose = pci_bus_to_host(bus);
+
+ if (!pci_has_flag(PCI_ENABLE_PROC_DOMAINS))
+ return 0;
+ if (pci_has_flag(PCI_COMPAT_DOMAIN_0))
+ return hose->global_number != 0;
+ return 1;
+}
+
+/* This header fixup will do the resource fixup for all devices as they are
+ * probed, but not for bridge ranges
+ */
+static void __devinit pcibios_fixup_resources(struct pci_dev *dev)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ int i;
+
+ if (!hose) {
+ printk(KERN_ERR "No host bridge for PCI dev %s !\n",
+ pci_name(dev));
+ return;
+ }
+ for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
+ struct resource *res = dev->resource + i;
+ if (!res->flags)
+ continue;
+
+ /* If we're going to re-assign everything, we mark all resources
+ * as unset (and 0-base them). In addition, we mark BARs starting
+ * at 0 as unset as well, except if PCI_PROBE_ONLY is also set
+ * since in that case, we don't want to re-assign anything
+ */
+ if (pci_has_flag(PCI_REASSIGN_ALL_RSRC) ||
+ (res->start == 0 && !pci_has_flag(PCI_PROBE_ONLY))) {
+ /* Only print message if not re-assigning */
+ if (!pci_has_flag(PCI_REASSIGN_ALL_RSRC))
+ pr_debug("PCI:%s Resource %d %016llx-%016llx [%x] "
+ "is unassigned\n",
+ pci_name(dev), i,
+ (unsigned long long)res->start,
+ (unsigned long long)res->end,
+ (unsigned int)res->flags);
+ res->end -= res->start;
+ res->start = 0;
+ res->flags |= IORESOURCE_UNSET;
+ continue;
+ }
+
+ pr_debug("PCI:%s Resource %d %016llx-%016llx [%x]\n",
+ pci_name(dev), i,
+ (unsigned long long)res->start,\
+ (unsigned long long)res->end,
+ (unsigned int)res->flags);
+ }
+
+ /* Call machine specific resource fixup */
+ if (ppc_md.pcibios_fixup_resources)
+ ppc_md.pcibios_fixup_resources(dev);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_resources);
+
+/* This function tries to figure out if a bridge resource has been initialized
+ * by the firmware or not. It doesn't have to be absolutely bullet proof, but
+ * things go more smoothly when it gets it right. It should covers cases such
+ * as Apple "closed" bridge resources and bare-metal pSeries unassigned bridges
+ */
+static int __devinit pcibios_uninitialized_bridge_resource(struct pci_bus *bus,
+ struct resource *res)
+{
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ struct pci_dev *dev = bus->self;
+ resource_size_t offset;
+ u16 command;
+ int i;
+
+ /* We don't do anything if PCI_PROBE_ONLY is set */
+ if (pci_has_flag(PCI_PROBE_ONLY))
+ return 0;
+
+ /* Job is a bit different between memory and IO */
+ if (res->flags & IORESOURCE_MEM) {
+ /* If the BAR is non-0 (res != pci_mem_offset) then it's probably been
+ * initialized by somebody
+ */
+ if (res->start != hose->pci_mem_offset)
+ return 0;
+
+ /* The BAR is 0, let's check if memory decoding is enabled on
+ * the bridge. If not, we consider it unassigned
+ */
+ pci_read_config_word(dev, PCI_COMMAND, &command);
+ if ((command & PCI_COMMAND_MEMORY) == 0)
+ return 1;
+
+ /* Memory decoding is enabled and the BAR is 0. If any of the bridge
+ * resources covers that starting address (0 then it's good enough for
+ * us for memory
+ */
+ for (i = 0; i < 3; i++) {
+ if ((hose->mem_resources[i].flags & IORESOURCE_MEM) &&
+ hose->mem_resources[i].start == hose->pci_mem_offset)
+ return 0;
+ }
+
+ /* Well, it starts at 0 and we know it will collide so we may as
+ * well consider it as unassigned. That covers the Apple case.
+ */
+ return 1;
+ } else {
+ /* If the BAR is non-0, then we consider it assigned */
+ offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+ if (((res->start - offset) & 0xfffffffful) != 0)
+ return 0;
+
+ /* Here, we are a bit different than memory as typically IO space
+ * starting at low addresses -is- valid. What we do instead if that
+ * we consider as unassigned anything that doesn't have IO enabled
+ * in the PCI command register, and that's it.
+ */
+ pci_read_config_word(dev, PCI_COMMAND, &command);
+ if (command & PCI_COMMAND_IO)
+ return 0;
+
+ /* It's starting at 0 and IO is disabled in the bridge, consider
+ * it unassigned
+ */
+ return 1;
+ }
+}
+
+/* Fixup resources of a PCI<->PCI bridge */
+static void __devinit pcibios_fixup_bridge(struct pci_bus *bus)
+{
+ struct resource *res;
+ int i;
+
+ struct pci_dev *dev = bus->self;
+
+ pci_bus_for_each_resource(bus, res, i) {
+ if (!res || !res->flags)
+ continue;
+ if (i >= 3 && bus->self->transparent)
+ continue;
+
+ /* If we are going to re-assign everything, mark the resource
+ * as unset and move it down to 0
+ */
+ if (pci_has_flag(PCI_REASSIGN_ALL_RSRC)) {
+ res->flags |= IORESOURCE_UNSET;
+ res->end -= res->start;
+ res->start = 0;
+ continue;
+ }
+
+ pr_debug("PCI:%s Bus rsrc %d %016llx-%016llx [%x]\n",
+ pci_name(dev), i,
+ (unsigned long long)res->start,\
+ (unsigned long long)res->end,
+ (unsigned int)res->flags);
+
+ /* Try to detect uninitialized P2P bridge resources,
+ * and clear them out so they get re-assigned later
+ */
+ if (pcibios_uninitialized_bridge_resource(bus, res)) {
+ res->flags = 0;
+ pr_debug("PCI:%s (unassigned)\n", pci_name(dev));
+ }
+ }
+}
+
+void __devinit pcibios_setup_bus_self(struct pci_bus *bus)
+{
+ /* Fix up the bus resources for P2P bridges */
+ if (bus->self != NULL)
+ pcibios_fixup_bridge(bus);
+
+ /* Platform specific bus fixups. This is currently only used
+ * by fsl_pci and I'm hoping to get rid of it at some point
+ */
+ if (ppc_md.pcibios_fixup_bus)
+ ppc_md.pcibios_fixup_bus(bus);
+
+ /* Setup bus DMA mappings */
+ if (ppc_md.pci_dma_bus_setup)
+ ppc_md.pci_dma_bus_setup(bus);
+}
+
+void __devinit pcibios_setup_bus_devices(struct pci_bus *bus)
+{
+ struct pci_dev *dev;
+
+ pr_debug("PCI: Fixup bus devices %d (%s)\n",
+ bus->number, bus->self ? pci_name(bus->self) : "PHB");
+
+ list_for_each_entry(dev, &bus->devices, bus_list) {
+ /* Cardbus can call us to add new devices to a bus, so ignore
+ * those who are already fully discovered
+ */
+ if (dev->is_added)
+ continue;
+
+ /* Fixup NUMA node as it may not be setup yet by the generic
+ * code and is needed by the DMA init
+ */
+ set_dev_node(&dev->dev, pcibus_to_node(dev->bus));
+
+ /* Hook up default DMA ops */
+ set_dma_ops(&dev->dev, pci_dma_ops);
+ set_dma_offset(&dev->dev, PCI_DRAM_OFFSET);
+
+ /* Additional platform DMA/iommu setup */
+ if (ppc_md.pci_dma_dev_setup)
+ ppc_md.pci_dma_dev_setup(dev);
+
+ /* Read default IRQs and fixup if necessary */
+ pci_read_irq_line(dev);
+ if (ppc_md.pci_irq_fixup)
+ ppc_md.pci_irq_fixup(dev);
+ }
+}
+
+void pcibios_set_master(struct pci_dev *dev)
+{
+ /* No special bus mastering setup handling */
+}
+
+void __devinit pcibios_fixup_bus(struct pci_bus *bus)
+{
+ /* When called from the generic PCI probe, read PCI<->PCI bridge
+ * bases. This is -not- called when generating the PCI tree from
+ * the OF device-tree.
+ */
+ if (bus->self != NULL)
+ pci_read_bridge_bases(bus);
+
+ /* Now fixup the bus bus */
+ pcibios_setup_bus_self(bus);
+
+ /* Now fixup devices on that bus */
+ pcibios_setup_bus_devices(bus);
+}
+EXPORT_SYMBOL(pcibios_fixup_bus);
+
+void __devinit pci_fixup_cardbus(struct pci_bus *bus)
+{
+ /* Now fixup devices on that bus */
+ pcibios_setup_bus_devices(bus);
+}
+
+
+static int skip_isa_ioresource_align(struct pci_dev *dev)
+{
+ if (pci_has_flag(PCI_CAN_SKIP_ISA_ALIGN) &&
+ !(dev->bus->bridge_ctl & PCI_BRIDGE_CTL_ISA))
+ return 1;
+ return 0;
+}
+
+/*
+ * We need to avoid collisions with `mirrored' VGA ports
+ * and other strange ISA hardware, so we always want the
+ * addresses to be allocated in the 0x000-0x0ff region
+ * modulo 0x400.
+ *
+ * Why? Because some silly external IO cards only decode
+ * the low 10 bits of the IO address. The 0x00-0xff region
+ * is reserved for motherboard devices that decode all 16
+ * bits, so it's ok to allocate at, say, 0x2800-0x28ff,
+ * but we want to try to avoid allocating at 0x2900-0x2bff
+ * which might have be mirrored at 0x0100-0x03ff..
+ */
+resource_size_t pcibios_align_resource(void *data, const struct resource *res,
+ resource_size_t size, resource_size_t align)
+{
+ struct pci_dev *dev = data;
+ resource_size_t start = res->start;
+
+ if (res->flags & IORESOURCE_IO) {
+ if (skip_isa_ioresource_align(dev))
+ return start;
+ if (start & 0x300)
+ start = (start + 0x3ff) & ~0x3ff;
+ }
+
+ return start;
+}
+EXPORT_SYMBOL(pcibios_align_resource);
+
+/*
+ * Reparent resource children of pr that conflict with res
+ * under res, and make res replace those children.
+ */
+static int reparent_resources(struct resource *parent,
+ struct resource *res)
+{
+ struct resource *p, **pp;
+ struct resource **firstpp = NULL;
+
+ for (pp = &parent->child; (p = *pp) != NULL; pp = &p->sibling) {
+ if (p->end < res->start)
+ continue;
+ if (res->end < p->start)
+ break;
+ if (p->start < res->start || p->end > res->end)
+ return -1; /* not completely contained */
+ if (firstpp == NULL)
+ firstpp = pp;
+ }
+ if (firstpp == NULL)
+ return -1; /* didn't find any conflicting entries? */
+ res->parent = parent;
+ res->child = *firstpp;
+ res->sibling = *pp;
+ *firstpp = res;
+ *pp = NULL;
+ for (p = res->child; p != NULL; p = p->sibling) {
+ p->parent = res;
+ pr_debug("PCI: Reparented %s [%llx..%llx] under %s\n",
+ p->name,
+ (unsigned long long)p->start,
+ (unsigned long long)p->end, res->name);
+ }
+ return 0;
+}
+
+/*
+ * Handle resources of PCI devices. If the world were perfect, we could
+ * just allocate all the resource regions and do nothing more. It isn't.
+ * On the other hand, we cannot just re-allocate all devices, as it would
+ * require us to know lots of host bridge internals. So we attempt to
+ * keep as much of the original configuration as possible, but tweak it
+ * when it's found to be wrong.
+ *
+ * Known BIOS problems we have to work around:
+ * - I/O or memory regions not configured
+ * - regions configured, but not enabled in the command register
+ * - bogus I/O addresses above 64K used
+ * - expansion ROMs left enabled (this may sound harmless, but given
+ * the fact the PCI specs explicitly allow address decoders to be
+ * shared between expansion ROMs and other resource regions, it's
+ * at least dangerous)
+ *
+ * Our solution:
+ * (1) Allocate resources for all buses behind PCI-to-PCI bridges.
+ * This gives us fixed barriers on where we can allocate.
+ * (2) Allocate resources for all enabled devices. If there is
+ * a collision, just mark the resource as unallocated. Also
+ * disable expansion ROMs during this step.
+ * (3) Try to allocate resources for disabled devices. If the
+ * resources were assigned correctly, everything goes well,
+ * if they weren't, they won't disturb allocation of other
+ * resources.
+ * (4) Assign new addresses to resources which were either
+ * not configured at all or misconfigured. If explicitly
+ * requested by the user, configure expansion ROM address
+ * as well.
+ */
+
+void pcibios_allocate_bus_resources(struct pci_bus *bus)
+{
+ struct pci_bus *b;
+ int i;
+ struct resource *res, *pr;
+
+ pr_debug("PCI: Allocating bus resources for %04x:%02x...\n",
+ pci_domain_nr(bus), bus->number);
+
+ pci_bus_for_each_resource(bus, res, i) {
+ if (!res || !res->flags || res->start > res->end || res->parent)
+ continue;
+
+ /* If the resource was left unset at this point, we clear it */
+ if (res->flags & IORESOURCE_UNSET)
+ goto clear_resource;
+
+ if (bus->parent == NULL)
+ pr = (res->flags & IORESOURCE_IO) ?
+ &ioport_resource : &iomem_resource;
+ else {
+ pr = pci_find_parent_resource(bus->self, res);
+ if (pr == res) {
+ /* this happens when the generic PCI
+ * code (wrongly) decides that this
+ * bridge is transparent -- paulus
+ */
+ continue;
+ }
+ }
+
+ pr_debug("PCI: %s (bus %d) bridge rsrc %d: %016llx-%016llx "
+ "[0x%x], parent %p (%s)\n",
+ bus->self ? pci_name(bus->self) : "PHB",
+ bus->number, i,
+ (unsigned long long)res->start,
+ (unsigned long long)res->end,
+ (unsigned int)res->flags,
+ pr, (pr && pr->name) ? pr->name : "nil");
+
+ if (pr && !(pr->flags & IORESOURCE_UNSET)) {
+ if (request_resource(pr, res) == 0)
+ continue;
+ /*
+ * Must be a conflict with an existing entry.
+ * Move that entry (or entries) under the
+ * bridge resource and try again.
+ */
+ if (reparent_resources(pr, res) == 0)
+ continue;
+ }
+ pr_warning("PCI: Cannot allocate resource region "
+ "%d of PCI bridge %d, will remap\n", i, bus->number);
+ clear_resource:
+ res->start = res->end = 0;
+ res->flags = 0;
+ }
+
+ list_for_each_entry(b, &bus->children, node)
+ pcibios_allocate_bus_resources(b);
+}
+
+static inline void __devinit alloc_resource(struct pci_dev *dev, int idx)
+{
+ struct resource *pr, *r = &dev->resource[idx];
+
+ pr_debug("PCI: Allocating %s: Resource %d: %016llx..%016llx [%x]\n",
+ pci_name(dev), idx,
+ (unsigned long long)r->start,
+ (unsigned long long)r->end,
+ (unsigned int)r->flags);
+
+ pr = pci_find_parent_resource(dev, r);
+ if (!pr || (pr->flags & IORESOURCE_UNSET) ||
+ request_resource(pr, r) < 0) {
+ printk(KERN_WARNING "PCI: Cannot allocate resource region %d"
+ " of device %s, will remap\n", idx, pci_name(dev));
+ if (pr)
+ pr_debug("PCI: parent is %p: %016llx-%016llx [%x]\n",
+ pr,
+ (unsigned long long)pr->start,
+ (unsigned long long)pr->end,
+ (unsigned int)pr->flags);
+ /* We'll assign a new address later */
+ r->flags |= IORESOURCE_UNSET;
+ r->end -= r->start;
+ r->start = 0;
+ }
+}
+
+static void __init pcibios_allocate_resources(int pass)
+{
+ struct pci_dev *dev = NULL;
+ int idx, disabled;
+ u16 command;
+ struct resource *r;
+
+ for_each_pci_dev(dev) {
+ pci_read_config_word(dev, PCI_COMMAND, &command);
+ for (idx = 0; idx <= PCI_ROM_RESOURCE; idx++) {
+ r = &dev->resource[idx];
+ if (r->parent) /* Already allocated */
+ continue;
+ if (!r->flags || (r->flags & IORESOURCE_UNSET))
+ continue; /* Not assigned at all */
+ /* We only allocate ROMs on pass 1 just in case they
+ * have been screwed up by firmware
+ */
+ if (idx == PCI_ROM_RESOURCE )
+ disabled = 1;
+ if (r->flags & IORESOURCE_IO)
+ disabled = !(command & PCI_COMMAND_IO);
+ else
+ disabled = !(command & PCI_COMMAND_MEMORY);
+ if (pass == disabled)
+ alloc_resource(dev, idx);
+ }
+ if (pass)
+ continue;
+ r = &dev->resource[PCI_ROM_RESOURCE];
+ if (r->flags) {
+ /* Turn the ROM off, leave the resource region,
+ * but keep it unregistered.
+ */
+ u32 reg;
+ pci_read_config_dword(dev, dev->rom_base_reg, &reg);
+ if (reg & PCI_ROM_ADDRESS_ENABLE) {
+ pr_debug("PCI: Switching off ROM of %s\n",
+ pci_name(dev));
+ r->flags &= ~IORESOURCE_ROM_ENABLE;
+ pci_write_config_dword(dev, dev->rom_base_reg,
+ reg & ~PCI_ROM_ADDRESS_ENABLE);
+ }
+ }
+ }
+}
+
+static void __init pcibios_reserve_legacy_regions(struct pci_bus *bus)
+{
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ resource_size_t offset;
+ struct resource *res, *pres;
+ int i;
+
+ pr_debug("Reserving legacy ranges for domain %04x\n", pci_domain_nr(bus));
+
+ /* Check for IO */
+ if (!(hose->io_resource.flags & IORESOURCE_IO))
+ goto no_io;
+ offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+ res = kzalloc(sizeof(struct resource), GFP_KERNEL);
+ BUG_ON(res == NULL);
+ res->name = "Legacy IO";
+ res->flags = IORESOURCE_IO;
+ res->start = offset;
+ res->end = (offset + 0xfff) & 0xfffffffful;
+ pr_debug("Candidate legacy IO: %pR\n", res);
+ if (request_resource(&hose->io_resource, res)) {
+ printk(KERN_DEBUG
+ "PCI %04x:%02x Cannot reserve Legacy IO %pR\n",
+ pci_domain_nr(bus), bus->number, res);
+ kfree(res);
+ }
+
+ no_io:
+ /* Check for memory */
+ offset = hose->pci_mem_offset;
+ pr_debug("hose mem offset: %016llx\n", (unsigned long long)offset);
+ for (i = 0; i < 3; i++) {
+ pres = &hose->mem_resources[i];
+ if (!(pres->flags & IORESOURCE_MEM))
+ continue;
+ pr_debug("hose mem res: %pR\n", pres);
+ if ((pres->start - offset) <= 0xa0000 &&
+ (pres->end - offset) >= 0xbffff)
+ break;
+ }
+ if (i >= 3)
+ return;
+ res = kzalloc(sizeof(struct resource), GFP_KERNEL);
+ BUG_ON(res == NULL);
+ res->name = "Legacy VGA memory";
+ res->flags = IORESOURCE_MEM;
+ res->start = 0xa0000 + offset;
+ res->end = 0xbffff + offset;
+ pr_debug("Candidate VGA memory: %pR\n", res);
+ if (request_resource(pres, res)) {
+ printk(KERN_DEBUG
+ "PCI %04x:%02x Cannot reserve VGA memory %pR\n",
+ pci_domain_nr(bus), bus->number, res);
+ kfree(res);
+ }
+}
+
+void __init pcibios_resource_survey(void)
+{
+ struct pci_bus *b;
+
+ /* Allocate and assign resources */
+ list_for_each_entry(b, &pci_root_buses, node)
+ pcibios_allocate_bus_resources(b);
+ pcibios_allocate_resources(0);
+ pcibios_allocate_resources(1);
+
+ /* Before we start assigning unassigned resource, we try to reserve
+ * the low IO area and the VGA memory area if they intersect the
+ * bus available resources to avoid allocating things on top of them
+ */
+ if (!pci_has_flag(PCI_PROBE_ONLY)) {
+ list_for_each_entry(b, &pci_root_buses, node)
+ pcibios_reserve_legacy_regions(b);
+ }
+
+ /* Now, if the platform didn't decide to blindly trust the firmware,
+ * we proceed to assigning things that were left unassigned
+ */
+ if (!pci_has_flag(PCI_PROBE_ONLY)) {
+ pr_debug("PCI: Assigning unassigned resources...\n");
+ pci_assign_unassigned_resources();
+ }
+
+ /* Call machine dependent fixup */
+ if (ppc_md.pcibios_fixup)
+ ppc_md.pcibios_fixup();
+}
+
+#ifdef CONFIG_HOTPLUG
+
+/* This is used by the PCI hotplug driver to allocate resource
+ * of newly plugged busses. We can try to consolidate with the
+ * rest of the code later, for now, keep it as-is as our main
+ * resource allocation function doesn't deal with sub-trees yet.
+ */
+void pcibios_claim_one_bus(struct pci_bus *bus)
+{
+ struct pci_dev *dev;
+ struct pci_bus *child_bus;
+
+ list_for_each_entry(dev, &bus->devices, bus_list) {
+ int i;
+
+ for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+ struct resource *r = &dev->resource[i];
+
+ if (r->parent || !r->start || !r->flags)
+ continue;
+
+ pr_debug("PCI: Claiming %s: "
+ "Resource %d: %016llx..%016llx [%x]\n",
+ pci_name(dev), i,
+ (unsigned long long)r->start,
+ (unsigned long long)r->end,
+ (unsigned int)r->flags);
+
+ pci_claim_resource(dev, i);
+ }
+ }
+
+ list_for_each_entry(child_bus, &bus->children, node)
+ pcibios_claim_one_bus(child_bus);
+}
+
+
+/* pcibios_finish_adding_to_bus
+ *
+ * This is to be called by the hotplug code after devices have been
+ * added to a bus, this include calling it for a PHB that is just
+ * being added
+ */
+void pcibios_finish_adding_to_bus(struct pci_bus *bus)
+{
+ pr_debug("PCI: Finishing adding to hotplug bus %04x:%02x\n",
+ pci_domain_nr(bus), bus->number);
+
+ /* Allocate bus and devices resources */
+ pcibios_allocate_bus_resources(bus);
+ pcibios_claim_one_bus(bus);
+
+ /* Add new devices to global lists. Register in proc, sysfs. */
+ pci_bus_add_devices(bus);
+
+ /* Fixup EEH */
+ eeh_add_device_tree_late(bus);
+}
+EXPORT_SYMBOL_GPL(pcibios_finish_adding_to_bus);
+
+#endif /* CONFIG_HOTPLUG */
+
+int pcibios_enable_device(struct pci_dev *dev, int mask)
+{
+ if (ppc_md.pcibios_enable_device_hook)
+ if (ppc_md.pcibios_enable_device_hook(dev))
+ return -EINVAL;
+
+ return pci_enable_resources(dev, mask);
+}
+
+resource_size_t pcibios_io_space_offset(struct pci_controller *hose)
+{
+ return (unsigned long) hose->io_base_virt - _IO_BASE;
+}
+
+static void __devinit pcibios_setup_phb_resources(struct pci_controller *hose, struct list_head *resources)
+{
+ struct resource *res;
+ int i;
+
+ /* Hookup PHB IO resource */
+ res = &hose->io_resource;
+
+ if (!res->flags) {
+ printk(KERN_WARNING "PCI: I/O resource not set for host"
+ " bridge %s (domain %d)\n",
+ hose->dn->full_name, hose->global_number);
+#ifdef CONFIG_PPC32
+ /* Workaround for lack of IO resource only on 32-bit */
+ res->start = (unsigned long)hose->io_base_virt - isa_io_base;
+ res->end = res->start + IO_SPACE_LIMIT;
+ res->flags = IORESOURCE_IO;
+#endif /* CONFIG_PPC32 */
+ }
+
+ pr_debug("PCI: PHB IO resource = %016llx-%016llx [%lx]\n",
+ (unsigned long long)res->start,
+ (unsigned long long)res->end,
+ (unsigned long)res->flags);
+ pci_add_resource_offset(resources, res, pcibios_io_space_offset(hose));
+
+ /* Hookup PHB Memory resources */
+ for (i = 0; i < 3; ++i) {
+ res = &hose->mem_resources[i];
+ if (!res->flags) {
+ if (i > 0)
+ continue;
+ printk(KERN_ERR "PCI: Memory resource 0 not set for "
+ "host bridge %s (domain %d)\n",
+ hose->dn->full_name, hose->global_number);
+#ifdef CONFIG_PPC32
+ /* Workaround for lack of MEM resource only on 32-bit */
+ res->start = hose->pci_mem_offset;
+ res->end = (resource_size_t)-1LL;
+ res->flags = IORESOURCE_MEM;
+#endif /* CONFIG_PPC32 */
+ }
+
+ pr_debug("PCI: PHB MEM resource %d = %016llx-%016llx [%lx]\n", i,
+ (unsigned long long)res->start,
+ (unsigned long long)res->end,
+ (unsigned long)res->flags);
+ pci_add_resource_offset(resources, res, hose->pci_mem_offset);
+ }
+
+ pr_debug("PCI: PHB MEM offset = %016llx\n",
+ (unsigned long long)hose->pci_mem_offset);
+ pr_debug("PCI: PHB IO offset = %08lx\n",
+ (unsigned long)hose->io_base_virt - _IO_BASE);
+
+}
+
+/*
+ * Null PCI config access functions, for the case when we can't
+ * find a hose.
+ */
+#define NULL_PCI_OP(rw, size, type) \
+static int \
+null_##rw##_config_##size(struct pci_dev *dev, int offset, type val) \
+{ \
+ return PCIBIOS_DEVICE_NOT_FOUND; \
+}
+
+static int
+null_read_config(struct pci_bus *bus, unsigned int devfn, int offset,
+ int len, u32 *val)
+{
+ return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+static int
+null_write_config(struct pci_bus *bus, unsigned int devfn, int offset,
+ int len, u32 val)
+{
+ return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+static struct pci_ops null_pci_ops =
+{
+ .read = null_read_config,
+ .write = null_write_config,
+};
+
+/*
+ * These functions are used early on before PCI scanning is done
+ * and all of the pci_dev and pci_bus structures have been created.
+ */
+static struct pci_bus *
+fake_pci_bus(struct pci_controller *hose, int busnr)
+{
+ static struct pci_bus bus;
+
+ if (hose == 0) {
+ printk(KERN_ERR "Can't find hose for PCI bus %d!\n", busnr);
+ }
+ bus.number = busnr;
+ bus.sysdata = hose;
+ bus.ops = hose? hose->ops: &null_pci_ops;
+ return &bus;
+}
+
+#define EARLY_PCI_OP(rw, size, type) \
+int early_##rw##_config_##size(struct pci_controller *hose, int bus, \
+ int devfn, int offset, type value) \
+{ \
+ return pci_bus_##rw##_config_##size(fake_pci_bus(hose, bus), \
+ devfn, offset, value); \
+}
+
+EARLY_PCI_OP(read, byte, u8 *)
+EARLY_PCI_OP(read, word, u16 *)
+EARLY_PCI_OP(read, dword, u32 *)
+EARLY_PCI_OP(write, byte, u8)
+EARLY_PCI_OP(write, word, u16)
+EARLY_PCI_OP(write, dword, u32)
+
+extern int pci_bus_find_capability (struct pci_bus *bus, unsigned int devfn, int cap);
+int early_find_capability(struct pci_controller *hose, int bus, int devfn,
+ int cap)
+{
+ return pci_bus_find_capability(fake_pci_bus(hose, bus), devfn, cap);
+}
+
+struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus)
+{
+ struct pci_controller *hose = bus->sysdata;
+
+ return of_node_get(hose->dn);
+}
+
+/**
+ * pci_scan_phb - Given a pci_controller, setup and scan the PCI bus
+ * @hose: Pointer to the PCI host controller instance structure
+ */
+void __devinit pcibios_scan_phb(struct pci_controller *hose)
+{
+ LIST_HEAD(resources);
+ struct pci_bus *bus;
+ struct device_node *node = hose->dn;
+ int mode;
+
+ pr_debug("PCI: Scanning PHB %s\n",
+ node ? node->full_name : "<NO NAME>");
+
+ /* Get some IO space for the new PHB */
+ pcibios_setup_phb_io_space(hose);
+
+ /* Wire up PHB bus resources */
+ pcibios_setup_phb_resources(hose, &resources);
+
+ /* Create an empty bus for the toplevel */
+ bus = pci_create_root_bus(hose->parent, hose->first_busno,
+ hose->ops, hose, &resources);
+ if (bus == NULL) {
+ pr_err("Failed to create bus for PCI domain %04x\n",
+ hose->global_number);
+ pci_free_resource_list(&resources);
+ return;
+ }
+ bus->secondary = hose->first_busno;
+ hose->bus = bus;
+
+ /* Get probe mode and perform scan */
+ mode = PCI_PROBE_NORMAL;
+ if (node && ppc_md.pci_probe_mode)
+ mode = ppc_md.pci_probe_mode(bus);
+ pr_debug(" probe mode: %d\n", mode);
+ if (mode == PCI_PROBE_DEVTREE) {
+ bus->subordinate = hose->last_busno;
+ of_scan_bus(node, bus);
+ }
+
+ if (mode == PCI_PROBE_NORMAL)
+ hose->last_busno = bus->subordinate = pci_scan_child_bus(bus);
+
+ /* Platform gets a chance to do some global fixups before
+ * we proceed to resource allocation
+ */
+ if (ppc_md.pcibios_fixup_phb)
+ ppc_md.pcibios_fixup_phb(hose);
+
+ /* Configure PCI Express settings */
+ if (bus && !pci_has_flag(PCI_PROBE_ONLY)) {
+ struct pci_bus *child;
+ list_for_each_entry(child, &bus->children, node) {
+ struct pci_dev *self = child->self;
+ if (!self)
+ continue;
+ pcie_bus_configure_settings(child, self->pcie_mpss);
+ }
+ }
+}
+
+static void fixup_hide_host_resource_fsl(struct pci_dev *dev)
+{
+ int i, class = dev->class >> 8;
+ /* When configured as agent, programing interface = 1 */
+ int prog_if = dev->class & 0xf;
+
+ if ((class == PCI_CLASS_PROCESSOR_POWERPC ||
+ class == PCI_CLASS_BRIDGE_OTHER) &&
+ (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) &&
+ (prog_if == 0) &&
+ (dev->bus->parent == NULL)) {
+ for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
+ dev->resource[i].start = 0;
+ dev->resource[i].end = 0;
+ dev->resource[i].flags = 0;
+ }
+ }
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MOTOROLA, PCI_ANY_ID, fixup_hide_host_resource_fsl);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_FREESCALE, PCI_ANY_ID, fixup_hide_host_resource_fsl);
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
new file mode 100644
index 00000000..4b06ec5a
--- /dev/null
+++ b/arch/powerpc/kernel/pci_32.c
@@ -0,0 +1,310 @@
+/*
+ * Common pmac/prep/chrp pci routines. -- Cort
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/capability.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/bootmem.h>
+#include <linux/irq.h>
+#include <linux/list.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/sections.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+#include <asm/byteorder.h>
+#include <asm/uaccess.h>
+#include <asm/machdep.h>
+
+#undef DEBUG
+
+unsigned long isa_io_base = 0;
+unsigned long pci_dram_offset = 0;
+int pcibios_assign_bus_offset = 1;
+
+void pcibios_make_OF_bus_map(void);
+
+static void fixup_cpc710_pci64(struct pci_dev* dev);
+static u8* pci_to_OF_bus_map;
+
+/* By default, we don't re-assign bus numbers. We do this only on
+ * some pmacs
+ */
+static int pci_assign_all_buses;
+
+static int pci_bus_count;
+
+/* This will remain NULL for now, until isa-bridge.c is made common
+ * to both 32-bit and 64-bit.
+ */
+struct pci_dev *isa_bridge_pcidev;
+EXPORT_SYMBOL_GPL(isa_bridge_pcidev);
+
+static void
+fixup_cpc710_pci64(struct pci_dev* dev)
+{
+ /* Hide the PCI64 BARs from the kernel as their content doesn't
+ * fit well in the resource management
+ */
+ dev->resource[0].start = dev->resource[0].end = 0;
+ dev->resource[0].flags = 0;
+ dev->resource[1].start = dev->resource[1].end = 0;
+ dev->resource[1].flags = 0;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CPC710_PCI64, fixup_cpc710_pci64);
+
+/*
+ * Functions below are used on OpenFirmware machines.
+ */
+static void
+make_one_node_map(struct device_node* node, u8 pci_bus)
+{
+ const int *bus_range;
+ int len;
+
+ if (pci_bus >= pci_bus_count)
+ return;
+ bus_range = of_get_property(node, "bus-range", &len);
+ if (bus_range == NULL || len < 2 * sizeof(int)) {
+ printk(KERN_WARNING "Can't get bus-range for %s, "
+ "assuming it starts at 0\n", node->full_name);
+ pci_to_OF_bus_map[pci_bus] = 0;
+ } else
+ pci_to_OF_bus_map[pci_bus] = bus_range[0];
+
+ for_each_child_of_node(node, node) {
+ struct pci_dev* dev;
+ const unsigned int *class_code, *reg;
+
+ class_code = of_get_property(node, "class-code", NULL);
+ if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI &&
+ (*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS))
+ continue;
+ reg = of_get_property(node, "reg", NULL);
+ if (!reg)
+ continue;
+ dev = pci_get_bus_and_slot(pci_bus, ((reg[0] >> 8) & 0xff));
+ if (!dev || !dev->subordinate) {
+ pci_dev_put(dev);
+ continue;
+ }
+ make_one_node_map(node, dev->subordinate->number);
+ pci_dev_put(dev);
+ }
+}
+
+void
+pcibios_make_OF_bus_map(void)
+{
+ int i;
+ struct pci_controller *hose, *tmp;
+ struct property *map_prop;
+ struct device_node *dn;
+
+ pci_to_OF_bus_map = kmalloc(pci_bus_count, GFP_KERNEL);
+ if (!pci_to_OF_bus_map) {
+ printk(KERN_ERR "Can't allocate OF bus map !\n");
+ return;
+ }
+
+ /* We fill the bus map with invalid values, that helps
+ * debugging.
+ */
+ for (i=0; i<pci_bus_count; i++)
+ pci_to_OF_bus_map[i] = 0xff;
+
+ /* For each hose, we begin searching bridges */
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+ struct device_node* node = hose->dn;
+
+ if (!node)
+ continue;
+ make_one_node_map(node, hose->first_busno);
+ }
+ dn = of_find_node_by_path("/");
+ map_prop = of_find_property(dn, "pci-OF-bus-map", NULL);
+ if (map_prop) {
+ BUG_ON(pci_bus_count > map_prop->length);
+ memcpy(map_prop->value, pci_to_OF_bus_map, pci_bus_count);
+ }
+ of_node_put(dn);
+#ifdef DEBUG
+ printk("PCI->OF bus map:\n");
+ for (i=0; i<pci_bus_count; i++) {
+ if (pci_to_OF_bus_map[i] == 0xff)
+ continue;
+ printk("%d -> %d\n", i, pci_to_OF_bus_map[i]);
+ }
+#endif
+}
+
+
+/*
+ * Returns the PCI device matching a given OF node
+ */
+int pci_device_from_OF_node(struct device_node *node, u8 *bus, u8 *devfn)
+{
+ struct pci_dev *dev = NULL;
+ const __be32 *reg;
+ int size;
+
+ /* Check if it might have a chance to be a PCI device */
+ if (!pci_find_hose_for_OF_device(node))
+ return -ENODEV;
+
+ reg = of_get_property(node, "reg", &size);
+ if (!reg || size < 5 * sizeof(u32))
+ return -ENODEV;
+
+ *bus = (be32_to_cpup(&reg[0]) >> 16) & 0xff;
+ *devfn = (be32_to_cpup(&reg[0]) >> 8) & 0xff;
+
+ /* Ok, here we need some tweak. If we have already renumbered
+ * all busses, we can't rely on the OF bus number any more.
+ * the pci_to_OF_bus_map is not enough as several PCI busses
+ * may match the same OF bus number.
+ */
+ if (!pci_to_OF_bus_map)
+ return 0;
+
+ for_each_pci_dev(dev)
+ if (pci_to_OF_bus_map[dev->bus->number] == *bus &&
+ dev->devfn == *devfn) {
+ *bus = dev->bus->number;
+ pci_dev_put(dev);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+EXPORT_SYMBOL(pci_device_from_OF_node);
+
+/* We create the "pci-OF-bus-map" property now so it appears in the
+ * /proc device tree
+ */
+void __init
+pci_create_OF_bus_map(void)
+{
+ struct property* of_prop;
+ struct device_node *dn;
+
+ of_prop = (struct property*) alloc_bootmem(sizeof(struct property) + 256);
+ if (!of_prop)
+ return;
+ dn = of_find_node_by_path("/");
+ if (dn) {
+ memset(of_prop, -1, sizeof(struct property) + 256);
+ of_prop->name = "pci-OF-bus-map";
+ of_prop->length = 256;
+ of_prop->value = &of_prop[1];
+ prom_add_property(dn, of_prop);
+ of_node_put(dn);
+ }
+}
+
+void __devinit pcibios_setup_phb_io_space(struct pci_controller *hose)
+{
+ unsigned long io_offset;
+ struct resource *res = &hose->io_resource;
+
+ /* Fixup IO space offset */
+ io_offset = pcibios_io_space_offset(hose);
+ res->start += io_offset;
+ res->end += io_offset;
+}
+
+static int __init pcibios_init(void)
+{
+ struct pci_controller *hose, *tmp;
+ int next_busno = 0;
+
+ printk(KERN_INFO "PCI: Probing PCI hardware\n");
+
+ if (pci_has_flag(PCI_REASSIGN_ALL_BUS))
+ pci_assign_all_buses = 1;
+
+ /* Scan all of the recorded PCI controllers. */
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+ if (pci_assign_all_buses)
+ hose->first_busno = next_busno;
+ hose->last_busno = 0xff;
+ pcibios_scan_phb(hose);
+ pci_bus_add_devices(hose->bus);
+ if (pci_assign_all_buses || next_busno <= hose->last_busno)
+ next_busno = hose->last_busno + pcibios_assign_bus_offset;
+ }
+ pci_bus_count = next_busno;
+
+ /* OpenFirmware based machines need a map of OF bus
+ * numbers vs. kernel bus numbers since we may have to
+ * remap them.
+ */
+ if (pci_assign_all_buses)
+ pcibios_make_OF_bus_map();
+
+ /* Call common code to handle resource allocation */
+ pcibios_resource_survey();
+
+ /* Call machine dependent post-init code */
+ if (ppc_md.pcibios_after_init)
+ ppc_md.pcibios_after_init();
+
+ return 0;
+}
+
+subsys_initcall(pcibios_init);
+
+static struct pci_controller*
+pci_bus_to_hose(int bus)
+{
+ struct pci_controller *hose, *tmp;
+
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+ if (bus >= hose->first_busno && bus <= hose->last_busno)
+ return hose;
+ return NULL;
+}
+
+/* Provide information on locations of various I/O regions in physical
+ * memory. Do this on a per-card basis so that we choose the right
+ * root bridge.
+ * Note that the returned IO or memory base is a physical address
+ */
+
+long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn)
+{
+ struct pci_controller* hose;
+ long result = -EOPNOTSUPP;
+
+ hose = pci_bus_to_hose(bus);
+ if (!hose)
+ return -ENODEV;
+
+ switch (which) {
+ case IOBASE_BRIDGE_NUMBER:
+ return (long)hose->first_busno;
+ case IOBASE_MEMORY:
+ return (long)hose->pci_mem_offset;
+ case IOBASE_IO:
+ return (long)hose->io_base_phys;
+ case IOBASE_ISA_IO:
+ return (long)isa_io_base;
+ case IOBASE_ISA_MEM:
+ return (long)isa_mem_base;
+ }
+
+ return result;
+}
+
+
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
new file mode 100644
index 00000000..94a54f61
--- /dev/null
+++ b/arch/powerpc/kernel/pci_64.c
@@ -0,0 +1,272 @@
+/*
+ * Port for PPC64 David Engebretsen, IBM Corp.
+ * Contains common pci routines for ppc64 platform, pSeries and iSeries brands.
+ *
+ * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
+ * Rework, based on alpha PCI code.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/list.h>
+#include <linux/syscalls.h>
+#include <linux/irq.h>
+#include <linux/vmalloc.h>
+
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/byteorder.h>
+#include <asm/machdep.h>
+#include <asm/ppc-pci.h>
+
+/* pci_io_base -- the base address from which io bars are offsets.
+ * This is the lowest I/O base address (so bar values are always positive),
+ * and it *must* be the start of ISA space if an ISA bus exists because
+ * ISA drivers use hard coded offsets. If no ISA bus exists nothing
+ * is mapped on the first 64K of IO space
+ */
+unsigned long pci_io_base = ISA_IO_BASE;
+EXPORT_SYMBOL(pci_io_base);
+
+static int __init pcibios_init(void)
+{
+ struct pci_controller *hose, *tmp;
+
+ printk(KERN_INFO "PCI: Probing PCI hardware\n");
+
+ /* For now, override phys_mem_access_prot. If we need it,g
+ * later, we may move that initialization to each ppc_md
+ */
+ ppc_md.phys_mem_access_prot = pci_phys_mem_access_prot;
+
+ /* On ppc64, we always enable PCI domains and we keep domain 0
+ * backward compatible in /proc for video cards
+ */
+ pci_add_flags(PCI_ENABLE_PROC_DOMAINS | PCI_COMPAT_DOMAIN_0);
+
+ /* Scan all of the recorded PCI controllers. */
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+ pcibios_scan_phb(hose);
+ pci_bus_add_devices(hose->bus);
+ }
+
+ /* Call common code to handle resource allocation */
+ pcibios_resource_survey();
+
+ printk(KERN_DEBUG "PCI: Probing PCI hardware done\n");
+
+ return 0;
+}
+
+subsys_initcall(pcibios_init);
+
+#ifdef CONFIG_HOTPLUG
+
+int pcibios_unmap_io_space(struct pci_bus *bus)
+{
+ struct pci_controller *hose;
+
+ WARN_ON(bus == NULL);
+
+ /* If this is not a PHB, we only flush the hash table over
+ * the area mapped by this bridge. We don't play with the PTE
+ * mappings since we might have to deal with sub-page alignemnts
+ * so flushing the hash table is the only sane way to make sure
+ * that no hash entries are covering that removed bridge area
+ * while still allowing other busses overlapping those pages
+ *
+ * Note: If we ever support P2P hotplug on Book3E, we'll have
+ * to do an appropriate TLB flush here too
+ */
+ if (bus->self) {
+#ifdef CONFIG_PPC_STD_MMU_64
+ struct resource *res = bus->resource[0];
+#endif
+
+ pr_debug("IO unmapping for PCI-PCI bridge %s\n",
+ pci_name(bus->self));
+
+#ifdef CONFIG_PPC_STD_MMU_64
+ __flush_hash_table_range(&init_mm, res->start + _IO_BASE,
+ res->end + _IO_BASE + 1);
+#endif
+ return 0;
+ }
+
+ /* Get the host bridge */
+ hose = pci_bus_to_host(bus);
+
+ /* Check if we have IOs allocated */
+ if (hose->io_base_alloc == 0)
+ return 0;
+
+ pr_debug("IO unmapping for PHB %s\n", hose->dn->full_name);
+ pr_debug(" alloc=0x%p\n", hose->io_base_alloc);
+
+ /* This is a PHB, we fully unmap the IO area */
+ vunmap(hose->io_base_alloc);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pcibios_unmap_io_space);
+
+#endif /* CONFIG_HOTPLUG */
+
+static int __devinit pcibios_map_phb_io_space(struct pci_controller *hose)
+{
+ struct vm_struct *area;
+ unsigned long phys_page;
+ unsigned long size_page;
+ unsigned long io_virt_offset;
+
+ phys_page = _ALIGN_DOWN(hose->io_base_phys, PAGE_SIZE);
+ size_page = _ALIGN_UP(hose->pci_io_size, PAGE_SIZE);
+
+ /* Make sure IO area address is clear */
+ hose->io_base_alloc = NULL;
+
+ /* If there's no IO to map on that bus, get away too */
+ if (hose->pci_io_size == 0 || hose->io_base_phys == 0)
+ return 0;
+
+ /* Let's allocate some IO space for that guy. We don't pass
+ * VM_IOREMAP because we don't care about alignment tricks that
+ * the core does in that case. Maybe we should due to stupid card
+ * with incomplete address decoding but I'd rather not deal with
+ * those outside of the reserved 64K legacy region.
+ */
+ area = __get_vm_area(size_page, 0, PHB_IO_BASE, PHB_IO_END);
+ if (area == NULL)
+ return -ENOMEM;
+ hose->io_base_alloc = area->addr;
+ hose->io_base_virt = (void __iomem *)(area->addr +
+ hose->io_base_phys - phys_page);
+
+ pr_debug("IO mapping for PHB %s\n", hose->dn->full_name);
+ pr_debug(" phys=0x%016llx, virt=0x%p (alloc=0x%p)\n",
+ hose->io_base_phys, hose->io_base_virt, hose->io_base_alloc);
+ pr_debug(" size=0x%016llx (alloc=0x%016lx)\n",
+ hose->pci_io_size, size_page);
+
+ /* Establish the mapping */
+ if (__ioremap_at(phys_page, area->addr, size_page,
+ _PAGE_NO_CACHE | _PAGE_GUARDED) == NULL)
+ return -ENOMEM;
+
+ /* Fixup hose IO resource */
+ io_virt_offset = pcibios_io_space_offset(hose);
+ hose->io_resource.start += io_virt_offset;
+ hose->io_resource.end += io_virt_offset;
+
+ pr_debug(" hose->io_resource=%pR\n", &hose->io_resource);
+
+ return 0;
+}
+
+int __devinit pcibios_map_io_space(struct pci_bus *bus)
+{
+ WARN_ON(bus == NULL);
+
+ /* If this not a PHB, nothing to do, page tables still exist and
+ * thus HPTEs will be faulted in when needed
+ */
+ if (bus->self) {
+ pr_debug("IO mapping for PCI-PCI bridge %s\n",
+ pci_name(bus->self));
+ pr_debug(" virt=0x%016llx...0x%016llx\n",
+ bus->resource[0]->start + _IO_BASE,
+ bus->resource[0]->end + _IO_BASE);
+ return 0;
+ }
+
+ return pcibios_map_phb_io_space(pci_bus_to_host(bus));
+}
+EXPORT_SYMBOL_GPL(pcibios_map_io_space);
+
+void __devinit pcibios_setup_phb_io_space(struct pci_controller *hose)
+{
+ pcibios_map_phb_io_space(hose);
+}
+
+#define IOBASE_BRIDGE_NUMBER 0
+#define IOBASE_MEMORY 1
+#define IOBASE_IO 2
+#define IOBASE_ISA_IO 3
+#define IOBASE_ISA_MEM 4
+
+long sys_pciconfig_iobase(long which, unsigned long in_bus,
+ unsigned long in_devfn)
+{
+ struct pci_controller* hose;
+ struct list_head *ln;
+ struct pci_bus *bus = NULL;
+ struct device_node *hose_node;
+
+ /* Argh ! Please forgive me for that hack, but that's the
+ * simplest way to get existing XFree to not lockup on some
+ * G5 machines... So when something asks for bus 0 io base
+ * (bus 0 is HT root), we return the AGP one instead.
+ */
+ if (in_bus == 0 && of_machine_is_compatible("MacRISC4")) {
+ struct device_node *agp;
+
+ agp = of_find_compatible_node(NULL, NULL, "u3-agp");
+ if (agp)
+ in_bus = 0xf0;
+ of_node_put(agp);
+ }
+
+ /* That syscall isn't quite compatible with PCI domains, but it's
+ * used on pre-domains setup. We return the first match
+ */
+
+ for (ln = pci_root_buses.next; ln != &pci_root_buses; ln = ln->next) {
+ bus = pci_bus_b(ln);
+ if (in_bus >= bus->number && in_bus <= bus->subordinate)
+ break;
+ bus = NULL;
+ }
+ if (bus == NULL || bus->dev.of_node == NULL)
+ return -ENODEV;
+
+ hose_node = bus->dev.of_node;
+ hose = PCI_DN(hose_node)->phb;
+
+ switch (which) {
+ case IOBASE_BRIDGE_NUMBER:
+ return (long)hose->first_busno;
+ case IOBASE_MEMORY:
+ return (long)hose->pci_mem_offset;
+ case IOBASE_IO:
+ return (long)hose->io_base_phys;
+ case IOBASE_ISA_IO:
+ return (long)isa_io_base;
+ case IOBASE_ISA_MEM:
+ return -EINVAL;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+#ifdef CONFIG_NUMA
+int pcibus_to_node(struct pci_bus *bus)
+{
+ struct pci_controller *phb = pci_bus_to_host(bus);
+ return phb->node;
+}
+EXPORT_SYMBOL(pcibus_to_node);
+#endif
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
new file mode 100644
index 00000000..dd9e4a04
--- /dev/null
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -0,0 +1,165 @@
+/*
+ * pci_dn.c
+ *
+ * Copyright (C) 2001 Todd Inglett, IBM Corporation
+ *
+ * PCI manipulation via device_nodes.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/gfp.h>
+
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+#include <asm/firmware.h>
+
+/*
+ * Traverse_func that inits the PCI fields of the device node.
+ * NOTE: this *must* be done before read/write config to the device.
+ */
+void * __devinit update_dn_pci_info(struct device_node *dn, void *data)
+{
+ struct pci_controller *phb = data;
+ const int *type =
+ of_get_property(dn, "ibm,pci-config-space-type", NULL);
+ const u32 *regs;
+ struct pci_dn *pdn;
+
+ pdn = zalloc_maybe_bootmem(sizeof(*pdn), GFP_KERNEL);
+ if (pdn == NULL)
+ return NULL;
+ dn->data = pdn;
+ pdn->node = dn;
+ pdn->phb = phb;
+#ifdef CONFIG_PPC_POWERNV
+ pdn->pe_number = IODA_INVALID_PE;
+#endif
+ regs = of_get_property(dn, "reg", NULL);
+ if (regs) {
+ /* First register entry is addr (00BBSS00) */
+ pdn->busno = (regs[0] >> 16) & 0xff;
+ pdn->devfn = (regs[0] >> 8) & 0xff;
+ }
+
+ pdn->pci_ext_config_space = (type && *type == 1);
+ return NULL;
+}
+
+/*
+ * Traverse a device tree stopping each PCI device in the tree.
+ * This is done depth first. As each node is processed, a "pre"
+ * function is called and the children are processed recursively.
+ *
+ * The "pre" func returns a value. If non-zero is returned from
+ * the "pre" func, the traversal stops and this value is returned.
+ * This return value is useful when using traverse as a method of
+ * finding a device.
+ *
+ * NOTE: we do not run the func for devices that do not appear to
+ * be PCI except for the start node which we assume (this is good
+ * because the start node is often a phb which may be missing PCI
+ * properties).
+ * We use the class-code as an indicator. If we run into
+ * one of these nodes we also assume its siblings are non-pci for
+ * performance.
+ */
+void *traverse_pci_devices(struct device_node *start, traverse_func pre,
+ void *data)
+{
+ struct device_node *dn, *nextdn;
+ void *ret;
+
+ /* We started with a phb, iterate all childs */
+ for (dn = start->child; dn; dn = nextdn) {
+ const u32 *classp;
+ u32 class;
+
+ nextdn = NULL;
+ classp = of_get_property(dn, "class-code", NULL);
+ class = classp ? *classp : 0;
+
+ if (pre && ((ret = pre(dn, data)) != NULL))
+ return ret;
+
+ /* If we are a PCI bridge, go down */
+ if (dn->child && ((class >> 8) == PCI_CLASS_BRIDGE_PCI ||
+ (class >> 8) == PCI_CLASS_BRIDGE_CARDBUS))
+ /* Depth first...do children */
+ nextdn = dn->child;
+ else if (dn->sibling)
+ /* ok, try next sibling instead. */
+ nextdn = dn->sibling;
+ if (!nextdn) {
+ /* Walk up to next valid sibling. */
+ do {
+ dn = dn->parent;
+ if (dn == start)
+ return NULL;
+ } while (dn->sibling == NULL);
+ nextdn = dn->sibling;
+ }
+ }
+ return NULL;
+}
+
+/**
+ * pci_devs_phb_init_dynamic - setup pci devices under this PHB
+ * phb: pci-to-host bridge (top-level bridge connecting to cpu)
+ *
+ * This routine is called both during boot, (before the memory
+ * subsystem is set up, before kmalloc is valid) and during the
+ * dynamic lpar operation of adding a PHB to a running system.
+ */
+void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb)
+{
+ struct device_node *dn = phb->dn;
+ struct pci_dn *pdn;
+
+ /* PHB nodes themselves must not match */
+ update_dn_pci_info(dn, phb);
+ pdn = dn->data;
+ if (pdn) {
+ pdn->devfn = pdn->busno = -1;
+ pdn->phb = phb;
+ }
+
+ /* Update dn->phb ptrs for new phb and children devices */
+ traverse_pci_devices(dn, update_dn_pci_info, phb);
+}
+
+/**
+ * pci_devs_phb_init - Initialize phbs and pci devs under them.
+ *
+ * This routine walks over all phb's (pci-host bridges) on the
+ * system, and sets up assorted pci-related structures
+ * (including pci info in the device node structs) for each
+ * pci device found underneath. This routine runs once,
+ * early in the boot sequence.
+ */
+void __init pci_devs_phb_init(void)
+{
+ struct pci_controller *phb, *tmp;
+
+ /* This must be done first so the device nodes have valid pci info! */
+ list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
+ pci_devs_phb_init_dynamic(phb);
+}
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
new file mode 100644
index 00000000..89dde171
--- /dev/null
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -0,0 +1,373 @@
+/*
+ * Helper routines to scan the device tree for PCI devices and busses
+ *
+ * Migrated out of PowerPC architecture pci_64.c file by Grant Likely
+ * <grant.likely@secretlab.ca> so that these routines are available for
+ * 32 bit also.
+ *
+ * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
+ * Rework, based on alpha PCI code.
+ * Copyright (c) 2009 Secret Lab Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/pci.h>
+#include <linux/export.h>
+#include <asm/pci-bridge.h>
+#include <asm/prom.h>
+
+/**
+ * get_int_prop - Decode a u32 from a device tree property
+ */
+static u32 get_int_prop(struct device_node *np, const char *name, u32 def)
+{
+ const u32 *prop;
+ int len;
+
+ prop = of_get_property(np, name, &len);
+ if (prop && len >= 4)
+ return *prop;
+ return def;
+}
+
+/**
+ * pci_parse_of_flags - Parse the flags cell of a device tree PCI address
+ * @addr0: value of 1st cell of a device tree PCI address.
+ * @bridge: Set this flag if the address is from a bridge 'ranges' property
+ */
+unsigned int pci_parse_of_flags(u32 addr0, int bridge)
+{
+ unsigned int flags = 0;
+
+ if (addr0 & 0x02000000) {
+ flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY;
+ flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64;
+ flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M;
+ if (addr0 & 0x40000000)
+ flags |= IORESOURCE_PREFETCH
+ | PCI_BASE_ADDRESS_MEM_PREFETCH;
+ /* Note: We don't know whether the ROM has been left enabled
+ * by the firmware or not. We mark it as disabled (ie, we do
+ * not set the IORESOURCE_ROM_ENABLE flag) for now rather than
+ * do a config space read, it will be force-enabled if needed
+ */
+ if (!bridge && (addr0 & 0xff) == 0x30)
+ flags |= IORESOURCE_READONLY;
+ } else if (addr0 & 0x01000000)
+ flags = IORESOURCE_IO | PCI_BASE_ADDRESS_SPACE_IO;
+ if (flags)
+ flags |= IORESOURCE_SIZEALIGN;
+ return flags;
+}
+
+/**
+ * of_pci_parse_addrs - Parse PCI addresses assigned in the device tree node
+ * @node: device tree node for the PCI device
+ * @dev: pci_dev structure for the device
+ *
+ * This function parses the 'assigned-addresses' property of a PCI devices'
+ * device tree node and writes them into the associated pci_dev structure.
+ */
+static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev)
+{
+ u64 base, size;
+ unsigned int flags;
+ struct pci_bus_region region;
+ struct resource *res;
+ const u32 *addrs;
+ u32 i;
+ int proplen;
+
+ addrs = of_get_property(node, "assigned-addresses", &proplen);
+ if (!addrs)
+ return;
+ pr_debug(" parse addresses (%d bytes) @ %p\n", proplen, addrs);
+ for (; proplen >= 20; proplen -= 20, addrs += 5) {
+ flags = pci_parse_of_flags(addrs[0], 0);
+ if (!flags)
+ continue;
+ base = of_read_number(&addrs[1], 2);
+ size = of_read_number(&addrs[3], 2);
+ if (!size)
+ continue;
+ i = addrs[0] & 0xff;
+ pr_debug(" base: %llx, size: %llx, i: %x\n",
+ (unsigned long long)base,
+ (unsigned long long)size, i);
+
+ if (PCI_BASE_ADDRESS_0 <= i && i <= PCI_BASE_ADDRESS_5) {
+ res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2];
+ } else if (i == dev->rom_base_reg) {
+ res = &dev->resource[PCI_ROM_RESOURCE];
+ flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE;
+ } else {
+ printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i);
+ continue;
+ }
+ res->flags = flags;
+ res->name = pci_name(dev);
+ region.start = base;
+ region.end = base + size - 1;
+ pcibios_bus_to_resource(dev, res, &region);
+ }
+}
+
+/**
+ * of_create_pci_dev - Given a device tree node on a pci bus, create a pci_dev
+ * @node: device tree node pointer
+ * @bus: bus the device is sitting on
+ * @devfn: PCI function number, extracted from device tree by caller.
+ */
+struct pci_dev *of_create_pci_dev(struct device_node *node,
+ struct pci_bus *bus, int devfn)
+{
+ struct pci_dev *dev;
+ const char *type;
+ struct pci_slot *slot;
+
+ dev = alloc_pci_dev();
+ if (!dev)
+ return NULL;
+ type = of_get_property(node, "device_type", NULL);
+ if (type == NULL)
+ type = "";
+
+ pr_debug(" create device, devfn: %x, type: %s\n", devfn, type);
+
+ dev->bus = bus;
+ dev->dev.of_node = of_node_get(node);
+ dev->dev.parent = bus->bridge;
+ dev->dev.bus = &pci_bus_type;
+ dev->devfn = devfn;
+ dev->multifunction = 0; /* maybe a lie? */
+ dev->needs_freset = 0; /* pcie fundamental reset required */
+ set_pcie_port_type(dev);
+
+ list_for_each_entry(slot, &dev->bus->slots, list)
+ if (PCI_SLOT(dev->devfn) == slot->number)
+ dev->slot = slot;
+
+ dev->vendor = get_int_prop(node, "vendor-id", 0xffff);
+ dev->device = get_int_prop(node, "device-id", 0xffff);
+ dev->subsystem_vendor = get_int_prop(node, "subsystem-vendor-id", 0);
+ dev->subsystem_device = get_int_prop(node, "subsystem-id", 0);
+
+ dev->cfg_size = pci_cfg_space_size(dev);
+
+ dev_set_name(&dev->dev, "%04x:%02x:%02x.%d", pci_domain_nr(bus),
+ dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn));
+ dev->class = get_int_prop(node, "class-code", 0);
+ dev->revision = get_int_prop(node, "revision-id", 0);
+
+ pr_debug(" class: 0x%x\n", dev->class);
+ pr_debug(" revision: 0x%x\n", dev->revision);
+
+ dev->current_state = 4; /* unknown power state */
+ dev->error_state = pci_channel_io_normal;
+ dev->dma_mask = 0xffffffff;
+
+ /* Early fixups, before probing the BARs */
+ pci_fixup_device(pci_fixup_early, dev);
+
+ if (!strcmp(type, "pci") || !strcmp(type, "pciex")) {
+ /* a PCI-PCI bridge */
+ dev->hdr_type = PCI_HEADER_TYPE_BRIDGE;
+ dev->rom_base_reg = PCI_ROM_ADDRESS1;
+ set_pcie_hotplug_bridge(dev);
+ } else if (!strcmp(type, "cardbus")) {
+ dev->hdr_type = PCI_HEADER_TYPE_CARDBUS;
+ } else {
+ dev->hdr_type = PCI_HEADER_TYPE_NORMAL;
+ dev->rom_base_reg = PCI_ROM_ADDRESS;
+ /* Maybe do a default OF mapping here */
+ dev->irq = NO_IRQ;
+ }
+
+ of_pci_parse_addrs(node, dev);
+
+ pr_debug(" adding to system ...\n");
+
+ pci_device_add(dev, bus);
+
+ return dev;
+}
+EXPORT_SYMBOL(of_create_pci_dev);
+
+/**
+ * of_scan_pci_bridge - Set up a PCI bridge and scan for child nodes
+ * @node: device tree node of bridge
+ * @dev: pci_dev structure for the bridge
+ *
+ * of_scan_bus() calls this routine for each PCI bridge that it finds, and
+ * this routine in turn call of_scan_bus() recusively to scan for more child
+ * devices.
+ */
+void __devinit of_scan_pci_bridge(struct pci_dev *dev)
+{
+ struct device_node *node = dev->dev.of_node;
+ struct pci_bus *bus;
+ const u32 *busrange, *ranges;
+ int len, i, mode;
+ struct pci_bus_region region;
+ struct resource *res;
+ unsigned int flags;
+ u64 size;
+
+ pr_debug("of_scan_pci_bridge(%s)\n", node->full_name);
+
+ /* parse bus-range property */
+ busrange = of_get_property(node, "bus-range", &len);
+ if (busrange == NULL || len != 8) {
+ printk(KERN_DEBUG "Can't get bus-range for PCI-PCI bridge %s\n",
+ node->full_name);
+ return;
+ }
+ ranges = of_get_property(node, "ranges", &len);
+ if (ranges == NULL) {
+ printk(KERN_DEBUG "Can't get ranges for PCI-PCI bridge %s\n",
+ node->full_name);
+ return;
+ }
+
+ bus = pci_add_new_bus(dev->bus, dev, busrange[0]);
+ if (!bus) {
+ printk(KERN_ERR "Failed to create pci bus for %s\n",
+ node->full_name);
+ return;
+ }
+
+ bus->primary = dev->bus->number;
+ bus->subordinate = busrange[1];
+ bus->bridge_ctl = 0;
+
+ /* parse ranges property */
+ /* PCI #address-cells == 3 and #size-cells == 2 always */
+ res = &dev->resource[PCI_BRIDGE_RESOURCES];
+ for (i = 0; i < PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES; ++i) {
+ res->flags = 0;
+ bus->resource[i] = res;
+ ++res;
+ }
+ i = 1;
+ for (; len >= 32; len -= 32, ranges += 8) {
+ flags = pci_parse_of_flags(ranges[0], 1);
+ size = of_read_number(&ranges[6], 2);
+ if (flags == 0 || size == 0)
+ continue;
+ if (flags & IORESOURCE_IO) {
+ res = bus->resource[0];
+ if (res->flags) {
+ printk(KERN_ERR "PCI: ignoring extra I/O range"
+ " for bridge %s\n", node->full_name);
+ continue;
+ }
+ } else {
+ if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) {
+ printk(KERN_ERR "PCI: too many memory ranges"
+ " for bridge %s\n", node->full_name);
+ continue;
+ }
+ res = bus->resource[i];
+ ++i;
+ }
+ res->flags = flags;
+ region.start = of_read_number(&ranges[1], 2);
+ region.end = region.start + size - 1;
+ pcibios_bus_to_resource(dev, res, &region);
+ }
+ sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus),
+ bus->number);
+ pr_debug(" bus name: %s\n", bus->name);
+
+ mode = PCI_PROBE_NORMAL;
+ if (ppc_md.pci_probe_mode)
+ mode = ppc_md.pci_probe_mode(bus);
+ pr_debug(" probe mode: %d\n", mode);
+
+ if (mode == PCI_PROBE_DEVTREE)
+ of_scan_bus(node, bus);
+ else if (mode == PCI_PROBE_NORMAL)
+ pci_scan_child_bus(bus);
+}
+EXPORT_SYMBOL(of_scan_pci_bridge);
+
+/**
+ * __of_scan_bus - given a PCI bus node, setup bus and scan for child devices
+ * @node: device tree node for the PCI bus
+ * @bus: pci_bus structure for the PCI bus
+ * @rescan_existing: Flag indicating bus has already been set up
+ */
+static void __devinit __of_scan_bus(struct device_node *node,
+ struct pci_bus *bus, int rescan_existing)
+{
+ struct device_node *child;
+ const u32 *reg;
+ int reglen, devfn;
+ struct pci_dev *dev;
+
+ pr_debug("of_scan_bus(%s) bus no %d...\n",
+ node->full_name, bus->number);
+
+ /* Scan direct children */
+ for_each_child_of_node(node, child) {
+ pr_debug(" * %s\n", child->full_name);
+ if (!of_device_is_available(child))
+ continue;
+ reg = of_get_property(child, "reg", &reglen);
+ if (reg == NULL || reglen < 20)
+ continue;
+ devfn = (reg[0] >> 8) & 0xff;
+
+ /* create a new pci_dev for this device */
+ dev = of_create_pci_dev(child, bus, devfn);
+ if (!dev)
+ continue;
+ pr_debug(" dev header type: %x\n", dev->hdr_type);
+ }
+
+ /* Apply all fixups necessary. We don't fixup the bus "self"
+ * for an existing bridge that is being rescanned
+ */
+ if (!rescan_existing)
+ pcibios_setup_bus_self(bus);
+ pcibios_setup_bus_devices(bus);
+
+ /* Now scan child busses */
+ list_for_each_entry(dev, &bus->devices, bus_list) {
+ if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
+ dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) {
+ of_scan_pci_bridge(dev);
+ }
+ }
+}
+
+/**
+ * of_scan_bus - given a PCI bus node, setup bus and scan for child devices
+ * @node: device tree node for the PCI bus
+ * @bus: pci_bus structure for the PCI bus
+ */
+void __devinit of_scan_bus(struct device_node *node,
+ struct pci_bus *bus)
+{
+ __of_scan_bus(node, bus, 0);
+}
+EXPORT_SYMBOL_GPL(of_scan_bus);
+
+/**
+ * of_rescan_bus - given a PCI bus node, scan for child devices
+ * @node: device tree node for the PCI bus
+ * @bus: pci_bus structure for the PCI bus
+ *
+ * Same as of_scan_bus, but for a pci_bus structure that has already been
+ * setup.
+ */
+void __devinit of_rescan_bus(struct device_node *node,
+ struct pci_bus *bus)
+{
+ __of_scan_bus(node, bus, 1);
+}
+EXPORT_SYMBOL_GPL(of_rescan_bus);
+
diff --git a/arch/powerpc/kernel/pmc.c b/arch/powerpc/kernel/pmc.c
new file mode 100644
index 00000000..58eaa3dd
--- /dev/null
+++ b/arch/powerpc/kernel/pmc.c
@@ -0,0 +1,102 @@
+/*
+ * arch/powerpc/kernel/pmc.c
+ *
+ * Copyright (C) 2004 David Gibson, IBM Corporation.
+ * Includes code formerly from arch/ppc/kernel/perfmon.c:
+ * Author: Andy Fleming
+ * Copyright (c) 2004 Freescale Semiconductor, Inc
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/bug.h>
+#include <linux/spinlock.h>
+#include <linux/export.h>
+
+#include <asm/processor.h>
+#include <asm/cputable.h>
+#include <asm/pmc.h>
+
+#ifndef MMCR0_PMAO
+#define MMCR0_PMAO 0
+#endif
+
+static void dummy_perf(struct pt_regs *regs)
+{
+#if defined(CONFIG_FSL_EMB_PERFMON)
+ mtpmr(PMRN_PMGC0, mfpmr(PMRN_PMGC0) & ~PMGC0_PMIE);
+#elif defined(CONFIG_PPC64) || defined(CONFIG_6xx)
+ if (cur_cpu_spec->pmc_type == PPC_PMC_IBM)
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~(MMCR0_PMXE|MMCR0_PMAO));
+#else
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMXE);
+#endif
+}
+
+
+static DEFINE_RAW_SPINLOCK(pmc_owner_lock);
+static void *pmc_owner_caller; /* mostly for debugging */
+perf_irq_t perf_irq = dummy_perf;
+
+int reserve_pmc_hardware(perf_irq_t new_perf_irq)
+{
+ int err = 0;
+
+ raw_spin_lock(&pmc_owner_lock);
+
+ if (pmc_owner_caller) {
+ printk(KERN_WARNING "reserve_pmc_hardware: "
+ "PMC hardware busy (reserved by caller %p)\n",
+ pmc_owner_caller);
+ err = -EBUSY;
+ goto out;
+ }
+
+ pmc_owner_caller = __builtin_return_address(0);
+ perf_irq = new_perf_irq ? new_perf_irq : dummy_perf;
+
+ out:
+ raw_spin_unlock(&pmc_owner_lock);
+ return err;
+}
+EXPORT_SYMBOL_GPL(reserve_pmc_hardware);
+
+void release_pmc_hardware(void)
+{
+ raw_spin_lock(&pmc_owner_lock);
+
+ WARN_ON(! pmc_owner_caller);
+
+ pmc_owner_caller = NULL;
+ perf_irq = dummy_perf;
+
+ raw_spin_unlock(&pmc_owner_lock);
+}
+EXPORT_SYMBOL_GPL(release_pmc_hardware);
+
+#ifdef CONFIG_PPC64
+void power4_enable_pmcs(void)
+{
+ unsigned long hid0;
+
+ hid0 = mfspr(SPRN_HID0);
+ hid0 |= 1UL << (63 - 20);
+
+ /* POWER4 requires the following sequence */
+ asm volatile(
+ "sync\n"
+ "mtspr %1, %0\n"
+ "mfspr %0, %1\n"
+ "mfspr %0, %1\n"
+ "mfspr %0, %1\n"
+ "mfspr %0, %1\n"
+ "mfspr %0, %1\n"
+ "mfspr %0, %1\n"
+ "isync" : "=&r" (hid0) : "i" (SPRN_HID0), "0" (hid0):
+ "memory");
+}
+#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/ppc32.h b/arch/powerpc/kernel/ppc32.h
new file mode 100644
index 00000000..dc16aefe
--- /dev/null
+++ b/arch/powerpc/kernel/ppc32.h
@@ -0,0 +1,139 @@
+#ifndef _PPC64_PPC32_H
+#define _PPC64_PPC32_H
+
+#include <linux/compat.h>
+#include <asm/siginfo.h>
+#include <asm/signal.h>
+
+/*
+ * Data types and macros for providing 32b PowerPC support.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/* These are here to support 32-bit syscalls on a 64-bit kernel. */
+
+typedef struct compat_siginfo {
+ int si_signo;
+ int si_errno;
+ int si_code;
+
+ union {
+ int _pad[SI_PAD_SIZE32];
+
+ /* kill() */
+ struct {
+ compat_pid_t _pid; /* sender's pid */
+ compat_uid_t _uid; /* sender's uid */
+ } _kill;
+
+ /* POSIX.1b timers */
+ struct {
+ compat_timer_t _tid; /* timer id */
+ int _overrun; /* overrun count */
+ compat_sigval_t _sigval; /* same as below */
+ int _sys_private; /* not to be passed to user */
+ } _timer;
+
+ /* POSIX.1b signals */
+ struct {
+ compat_pid_t _pid; /* sender's pid */
+ compat_uid_t _uid; /* sender's uid */
+ compat_sigval_t _sigval;
+ } _rt;
+
+ /* SIGCHLD */
+ struct {
+ compat_pid_t _pid; /* which child */
+ compat_uid_t _uid; /* sender's uid */
+ int _status; /* exit code */
+ compat_clock_t _utime;
+ compat_clock_t _stime;
+ } _sigchld;
+
+ /* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGEMT */
+ struct {
+ unsigned int _addr; /* faulting insn/memory ref. */
+ } _sigfault;
+
+ /* SIGPOLL */
+ struct {
+ int _band; /* POLL_IN, POLL_OUT, POLL_MSG */
+ int _fd;
+ } _sigpoll;
+ } _sifields;
+} compat_siginfo_t;
+
+#define __old_sigaction32 old_sigaction32
+
+struct __old_sigaction32 {
+ compat_uptr_t sa_handler;
+ compat_old_sigset_t sa_mask;
+ unsigned int sa_flags;
+ compat_uptr_t sa_restorer; /* not used by Linux/SPARC yet */
+};
+
+
+
+struct sigaction32 {
+ compat_uptr_t sa_handler; /* Really a pointer, but need to deal with 32 bits */
+ unsigned int sa_flags;
+ compat_uptr_t sa_restorer; /* Another 32 bit pointer */
+ compat_sigset_t sa_mask; /* A 32 bit mask */
+};
+
+typedef struct sigaltstack_32 {
+ unsigned int ss_sp;
+ int ss_flags;
+ compat_size_t ss_size;
+} stack_32_t;
+
+struct pt_regs32 {
+ unsigned int gpr[32];
+ unsigned int nip;
+ unsigned int msr;
+ unsigned int orig_gpr3; /* Used for restarting system calls */
+ unsigned int ctr;
+ unsigned int link;
+ unsigned int xer;
+ unsigned int ccr;
+ unsigned int mq; /* 601 only (not used at present) */
+ unsigned int trap; /* Reason for being here */
+ unsigned int dar; /* Fault registers */
+ unsigned int dsisr;
+ unsigned int result; /* Result of a system call */
+};
+
+struct sigcontext32 {
+ unsigned int _unused[4];
+ int signal;
+ compat_uptr_t handler;
+ unsigned int oldmask;
+ compat_uptr_t regs; /* 4 byte pointer to the pt_regs32 structure. */
+};
+
+struct mcontext32 {
+ elf_gregset_t32 mc_gregs;
+ elf_fpregset_t mc_fregs;
+ unsigned int mc_pad[2];
+ elf_vrregset_t32 mc_vregs __attribute__((__aligned__(16)));
+ elf_vsrreghalf_t32 mc_vsregs __attribute__((__aligned__(16)));
+};
+
+struct ucontext32 {
+ unsigned int uc_flags;
+ unsigned int uc_link;
+ stack_32_t uc_stack;
+ int uc_pad[7];
+ compat_uptr_t uc_regs; /* points to uc_mcontext field */
+ compat_sigset_t uc_sigmask; /* mask last for extensibility */
+ /* glibc has 1024-bit signal masks, ours are 64-bit */
+ int uc_maskext[30];
+ int uc_pad2[3];
+ struct mcontext32 uc_mcontext;
+};
+
+#endif /* _PPC64_PPC32_H */
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
new file mode 100644
index 00000000..786a2700
--- /dev/null
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -0,0 +1,192 @@
+#include <linux/export.h>
+#include <linux/threads.h>
+#include <linux/smp.h>
+#include <linux/sched.h>
+#include <linux/elfcore.h>
+#include <linux/string.h>
+#include <linux/interrupt.h>
+#include <linux/screen_info.h>
+#include <linux/vt_kern.h>
+#include <linux/nvram.h>
+#include <linux/irq.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/bitops.h>
+
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/cacheflush.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <linux/atomic.h>
+#include <asm/checksum.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include <linux/adb.h>
+#include <linux/cuda.h>
+#include <linux/pmu.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/irq.h>
+#include <asm/pmac_feature.h>
+#include <asm/dma.h>
+#include <asm/machdep.h>
+#include <asm/hw_irq.h>
+#include <asm/nvram.h>
+#include <asm/mmu_context.h>
+#include <asm/backlight.h>
+#include <asm/time.h>
+#include <asm/cputable.h>
+#include <asm/btext.h>
+#include <asm/div64.h>
+#include <asm/signal.h>
+#include <asm/dcr.h>
+#include <asm/ftrace.h>
+#include <asm/switch_to.h>
+
+#ifdef CONFIG_PPC32
+extern void transfer_to_handler(void);
+extern void do_IRQ(struct pt_regs *regs);
+extern void machine_check_exception(struct pt_regs *regs);
+extern void alignment_exception(struct pt_regs *regs);
+extern void program_check_exception(struct pt_regs *regs);
+extern void single_step_exception(struct pt_regs *regs);
+extern int sys_sigreturn(struct pt_regs *regs);
+
+EXPORT_SYMBOL(clear_pages);
+EXPORT_SYMBOL(ISA_DMA_THRESHOLD);
+EXPORT_SYMBOL(DMA_MODE_READ);
+EXPORT_SYMBOL(DMA_MODE_WRITE);
+
+EXPORT_SYMBOL(transfer_to_handler);
+EXPORT_SYMBOL(do_IRQ);
+EXPORT_SYMBOL(machine_check_exception);
+EXPORT_SYMBOL(alignment_exception);
+EXPORT_SYMBOL(program_check_exception);
+EXPORT_SYMBOL(single_step_exception);
+EXPORT_SYMBOL(sys_sigreturn);
+#endif
+
+#ifdef CONFIG_FUNCTION_TRACER
+EXPORT_SYMBOL(_mcount);
+#endif
+
+EXPORT_SYMBOL(strcpy);
+EXPORT_SYMBOL(strncpy);
+EXPORT_SYMBOL(strcat);
+EXPORT_SYMBOL(strlen);
+EXPORT_SYMBOL(strcmp);
+EXPORT_SYMBOL(strncmp);
+
+EXPORT_SYMBOL(csum_partial);
+EXPORT_SYMBOL(csum_partial_copy_generic);
+EXPORT_SYMBOL(ip_fast_csum);
+EXPORT_SYMBOL(csum_tcpudp_magic);
+
+EXPORT_SYMBOL(__copy_tofrom_user);
+EXPORT_SYMBOL(__clear_user);
+EXPORT_SYMBOL(__strncpy_from_user);
+EXPORT_SYMBOL(__strnlen_user);
+EXPORT_SYMBOL(copy_page);
+
+#if defined(CONFIG_PCI) && defined(CONFIG_PPC32)
+EXPORT_SYMBOL(isa_io_base);
+EXPORT_SYMBOL(isa_mem_base);
+EXPORT_SYMBOL(pci_dram_offset);
+#endif /* CONFIG_PCI */
+
+EXPORT_SYMBOL(start_thread);
+EXPORT_SYMBOL(kernel_thread);
+
+EXPORT_SYMBOL(giveup_fpu);
+#ifdef CONFIG_ALTIVEC
+EXPORT_SYMBOL(giveup_altivec);
+#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+EXPORT_SYMBOL(giveup_vsx);
+EXPORT_SYMBOL_GPL(__giveup_vsx);
+#endif /* CONFIG_VSX */
+#ifdef CONFIG_SPE
+EXPORT_SYMBOL(giveup_spe);
+#endif /* CONFIG_SPE */
+
+#ifndef CONFIG_PPC64
+EXPORT_SYMBOL(flush_instruction_cache);
+#endif
+EXPORT_SYMBOL(__flush_icache_range);
+EXPORT_SYMBOL(flush_dcache_range);
+
+#ifdef CONFIG_SMP
+#ifdef CONFIG_PPC32
+EXPORT_SYMBOL(smp_hw_index);
+#endif
+#endif
+
+#ifdef CONFIG_ADB
+EXPORT_SYMBOL(adb_request);
+EXPORT_SYMBOL(adb_register);
+EXPORT_SYMBOL(adb_unregister);
+EXPORT_SYMBOL(adb_poll);
+EXPORT_SYMBOL(adb_try_handler_change);
+#endif /* CONFIG_ADB */
+#ifdef CONFIG_ADB_CUDA
+EXPORT_SYMBOL(cuda_request);
+EXPORT_SYMBOL(cuda_poll);
+#endif /* CONFIG_ADB_CUDA */
+EXPORT_SYMBOL(to_tm);
+
+#ifdef CONFIG_PPC32
+long long __ashrdi3(long long, int);
+long long __ashldi3(long long, int);
+long long __lshrdi3(long long, int);
+EXPORT_SYMBOL(__ashrdi3);
+EXPORT_SYMBOL(__ashldi3);
+EXPORT_SYMBOL(__lshrdi3);
+int __ucmpdi2(unsigned long long, unsigned long long);
+EXPORT_SYMBOL(__ucmpdi2);
+#endif
+
+EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(memmove);
+EXPORT_SYMBOL(memcmp);
+EXPORT_SYMBOL(memchr);
+
+#if defined(CONFIG_FB_VGA16_MODULE)
+EXPORT_SYMBOL(screen_info);
+#endif
+
+#ifdef CONFIG_PPC32
+EXPORT_SYMBOL(timer_interrupt);
+EXPORT_SYMBOL(tb_ticks_per_jiffy);
+EXPORT_SYMBOL(cacheable_memcpy);
+EXPORT_SYMBOL(cacheable_memzero);
+#endif
+
+#ifdef CONFIG_PPC32
+EXPORT_SYMBOL(switch_mmu_context);
+#endif
+
+#ifdef CONFIG_PPC_STD_MMU_32
+extern long mol_trampoline;
+EXPORT_SYMBOL(mol_trampoline); /* For MOL */
+EXPORT_SYMBOL(flush_hash_pages); /* For MOL */
+#ifdef CONFIG_SMP
+extern int mmu_hash_lock;
+EXPORT_SYMBOL(mmu_hash_lock); /* For MOL */
+#endif /* CONFIG_SMP */
+extern long *intercept_table;
+EXPORT_SYMBOL(intercept_table);
+#endif /* CONFIG_PPC_STD_MMU_32 */
+#ifdef CONFIG_PPC_DCR_NATIVE
+EXPORT_SYMBOL(__mtdcr);
+EXPORT_SYMBOL(__mfdcr);
+#endif
+EXPORT_SYMBOL(empty_zero_page);
+
+#ifdef CONFIG_PPC64
+EXPORT_SYMBOL(__arch_hweight8);
+EXPORT_SYMBOL(__arch_hweight16);
+EXPORT_SYMBOL(__arch_hweight32);
+EXPORT_SYMBOL(__arch_hweight64);
+#endif
diff --git a/arch/powerpc/kernel/ppc_save_regs.S b/arch/powerpc/kernel/ppc_save_regs.S
new file mode 100644
index 00000000..1b1787d5
--- /dev/null
+++ b/arch/powerpc/kernel/ppc_save_regs.S
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 1996 Paul Mackerras.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * NOTE: assert(sizeof(buf) > 23 * sizeof(long))
+ */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
+
+/*
+ * Grab the register values as they are now.
+ * This won't do a particularly good job because we really
+ * want our caller's caller's registers, and our caller has
+ * already executed its prologue.
+ * ToDo: We could reach back into the caller's save area to do
+ * a better job of representing the caller's state (note that
+ * that will be different for 32-bit and 64-bit, because of the
+ * different ABIs, though).
+ */
+_GLOBAL(ppc_save_regs)
+ PPC_STL r0,0*SZL(r3)
+ PPC_STL r2,2*SZL(r3)
+ PPC_STL r3,3*SZL(r3)
+ PPC_STL r4,4*SZL(r3)
+ PPC_STL r5,5*SZL(r3)
+ PPC_STL r6,6*SZL(r3)
+ PPC_STL r7,7*SZL(r3)
+ PPC_STL r8,8*SZL(r3)
+ PPC_STL r9,9*SZL(r3)
+ PPC_STL r10,10*SZL(r3)
+ PPC_STL r11,11*SZL(r3)
+ PPC_STL r12,12*SZL(r3)
+ PPC_STL r13,13*SZL(r3)
+ PPC_STL r14,14*SZL(r3)
+ PPC_STL r15,15*SZL(r3)
+ PPC_STL r16,16*SZL(r3)
+ PPC_STL r17,17*SZL(r3)
+ PPC_STL r18,18*SZL(r3)
+ PPC_STL r19,19*SZL(r3)
+ PPC_STL r20,20*SZL(r3)
+ PPC_STL r21,21*SZL(r3)
+ PPC_STL r22,22*SZL(r3)
+ PPC_STL r23,23*SZL(r3)
+ PPC_STL r24,24*SZL(r3)
+ PPC_STL r25,25*SZL(r3)
+ PPC_STL r26,26*SZL(r3)
+ PPC_STL r27,27*SZL(r3)
+ PPC_STL r28,28*SZL(r3)
+ PPC_STL r29,29*SZL(r3)
+ PPC_STL r30,30*SZL(r3)
+ PPC_STL r31,31*SZL(r3)
+ /* go up one stack frame for SP */
+ PPC_LL r4,0(r1)
+ PPC_STL r4,1*SZL(r3)
+ /* get caller's LR */
+ PPC_LL r0,LRSAVE(r4)
+ PPC_STL r0,_NIP-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_LINK-STACK_FRAME_OVERHEAD(r3)
+ mfmsr r0
+ PPC_STL r0,_MSR-STACK_FRAME_OVERHEAD(r3)
+ mfctr r0
+ PPC_STL r0,_CTR-STACK_FRAME_OVERHEAD(r3)
+ mfxer r0
+ PPC_STL r0,_XER-STACK_FRAME_OVERHEAD(r3)
+ mfcr r0
+ PPC_STL r0,_CCR-STACK_FRAME_OVERHEAD(r3)
+ li r0,0
+ PPC_STL r0,_TRAP-STACK_FRAME_OVERHEAD(r3)
+ blr
diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c
new file mode 100644
index 00000000..c8ae3714
--- /dev/null
+++ b/arch/powerpc/kernel/proc_powerpc.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/kernel.h>
+
+#include <asm/machdep.h>
+#include <asm/vdso_datapage.h>
+#include <asm/rtas.h>
+#include <asm/uaccess.h>
+#include <asm/prom.h>
+
+#ifdef CONFIG_PPC64
+
+static loff_t page_map_seek( struct file *file, loff_t off, int whence)
+{
+ loff_t new;
+ struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode);
+
+ switch(whence) {
+ case 0:
+ new = off;
+ break;
+ case 1:
+ new = file->f_pos + off;
+ break;
+ case 2:
+ new = dp->size + off;
+ break;
+ default:
+ return -EINVAL;
+ }
+ if ( new < 0 || new > dp->size )
+ return -EINVAL;
+ return (file->f_pos = new);
+}
+
+static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes,
+ loff_t *ppos)
+{
+ struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode);
+ return simple_read_from_buffer(buf, nbytes, ppos, dp->data, dp->size);
+}
+
+static int page_map_mmap( struct file *file, struct vm_area_struct *vma )
+{
+ struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode);
+
+ if ((vma->vm_end - vma->vm_start) > dp->size)
+ return -EINVAL;
+
+ remap_pfn_range(vma, vma->vm_start, __pa(dp->data) >> PAGE_SHIFT,
+ dp->size, vma->vm_page_prot);
+ return 0;
+}
+
+static const struct file_operations page_map_fops = {
+ .llseek = page_map_seek,
+ .read = page_map_read,
+ .mmap = page_map_mmap
+};
+
+
+static int __init proc_ppc64_init(void)
+{
+ struct proc_dir_entry *pde;
+
+ pde = proc_create_data("powerpc/systemcfg", S_IFREG|S_IRUGO, NULL,
+ &page_map_fops, vdso_data);
+ if (!pde)
+ return 1;
+ pde->size = PAGE_SIZE;
+
+ return 0;
+}
+__initcall(proc_ppc64_init);
+
+#endif /* CONFIG_PPC64 */
+
+/*
+ * Create the ppc64 and ppc64/rtas directories early. This allows us to
+ * assume that they have been previously created in drivers.
+ */
+static int __init proc_ppc64_create(void)
+{
+ struct proc_dir_entry *root;
+
+ root = proc_mkdir("powerpc", NULL);
+ if (!root)
+ return 1;
+
+#ifdef CONFIG_PPC64
+ if (!proc_symlink("ppc64", NULL, "powerpc"))
+ pr_err("Failed to create link /proc/ppc64 -> /proc/powerpc\n");
+#endif
+
+ if (!of_find_node_by_path("/rtas"))
+ return 0;
+
+ if (!proc_mkdir("rtas", root))
+ return 1;
+
+ if (!proc_symlink("rtas", NULL, "powerpc/rtas"))
+ return 1;
+
+ return 0;
+}
+core_initcall(proc_ppc64_create);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
new file mode 100644
index 00000000..4937c969
--- /dev/null
+++ b/arch/powerpc/kernel/process.c
@@ -0,0 +1,1339 @@
+/*
+ * Derived from "arch/i386/kernel/process.c"
+ * Copyright (C) 1995 Linus Torvalds
+ *
+ * Updated and modified by Cort Dougan (cort@cs.nmt.edu) and
+ * Paul Mackerras (paulus@cs.anu.edu.au)
+ *
+ * PowerPC version
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/elf.h>
+#include <linux/init.h>
+#include <linux/prctl.h>
+#include <linux/init_task.h>
+#include <linux/export.h>
+#include <linux/kallsyms.h>
+#include <linux/mqueue.h>
+#include <linux/hardirq.h>
+#include <linux/utsname.h>
+#include <linux/ftrace.h>
+#include <linux/kernel_stat.h>
+#include <linux/personality.h>
+#include <linux/random.h>
+#include <linux/hw_breakpoint.h>
+
+#include <asm/pgtable.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/mmu.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/runlatch.h>
+#include <asm/syscalls.h>
+#include <asm/switch_to.h>
+#include <asm/debug.h>
+#ifdef CONFIG_PPC64
+#include <asm/firmware.h>
+#endif
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
+
+extern unsigned long _get_SP(void);
+
+#ifndef CONFIG_SMP
+struct task_struct *last_task_used_math = NULL;
+struct task_struct *last_task_used_altivec = NULL;
+struct task_struct *last_task_used_vsx = NULL;
+struct task_struct *last_task_used_spe = NULL;
+#endif
+
+/*
+ * Make sure the floating-point register state in the
+ * the thread_struct is up to date for task tsk.
+ */
+void flush_fp_to_thread(struct task_struct *tsk)
+{
+ if (tsk->thread.regs) {
+ /*
+ * We need to disable preemption here because if we didn't,
+ * another process could get scheduled after the regs->msr
+ * test but before we have finished saving the FP registers
+ * to the thread_struct. That process could take over the
+ * FPU, and then when we get scheduled again we would store
+ * bogus values for the remaining FP registers.
+ */
+ preempt_disable();
+ if (tsk->thread.regs->msr & MSR_FP) {
+#ifdef CONFIG_SMP
+ /*
+ * This should only ever be called for current or
+ * for a stopped child process. Since we save away
+ * the FP register state on context switch on SMP,
+ * there is something wrong if a stopped child appears
+ * to still have its FP state in the CPU registers.
+ */
+ BUG_ON(tsk != current);
+#endif
+ giveup_fpu(tsk);
+ }
+ preempt_enable();
+ }
+}
+EXPORT_SYMBOL_GPL(flush_fp_to_thread);
+
+void enable_kernel_fp(void)
+{
+ WARN_ON(preemptible());
+
+#ifdef CONFIG_SMP
+ if (current->thread.regs && (current->thread.regs->msr & MSR_FP))
+ giveup_fpu(current);
+ else
+ giveup_fpu(NULL); /* just enables FP for kernel */
+#else
+ giveup_fpu(last_task_used_math);
+#endif /* CONFIG_SMP */
+}
+EXPORT_SYMBOL(enable_kernel_fp);
+
+#ifdef CONFIG_ALTIVEC
+void enable_kernel_altivec(void)
+{
+ WARN_ON(preemptible());
+
+#ifdef CONFIG_SMP
+ if (current->thread.regs && (current->thread.regs->msr & MSR_VEC))
+ giveup_altivec(current);
+ else
+ giveup_altivec(NULL); /* just enable AltiVec for kernel - force */
+#else
+ giveup_altivec(last_task_used_altivec);
+#endif /* CONFIG_SMP */
+}
+EXPORT_SYMBOL(enable_kernel_altivec);
+
+/*
+ * Make sure the VMX/Altivec register state in the
+ * the thread_struct is up to date for task tsk.
+ */
+void flush_altivec_to_thread(struct task_struct *tsk)
+{
+ if (tsk->thread.regs) {
+ preempt_disable();
+ if (tsk->thread.regs->msr & MSR_VEC) {
+#ifdef CONFIG_SMP
+ BUG_ON(tsk != current);
+#endif
+ giveup_altivec(tsk);
+ }
+ preempt_enable();
+ }
+}
+EXPORT_SYMBOL_GPL(flush_altivec_to_thread);
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef CONFIG_VSX
+#if 0
+/* not currently used, but some crazy RAID module might want to later */
+void enable_kernel_vsx(void)
+{
+ WARN_ON(preemptible());
+
+#ifdef CONFIG_SMP
+ if (current->thread.regs && (current->thread.regs->msr & MSR_VSX))
+ giveup_vsx(current);
+ else
+ giveup_vsx(NULL); /* just enable vsx for kernel - force */
+#else
+ giveup_vsx(last_task_used_vsx);
+#endif /* CONFIG_SMP */
+}
+EXPORT_SYMBOL(enable_kernel_vsx);
+#endif
+
+void giveup_vsx(struct task_struct *tsk)
+{
+ giveup_fpu(tsk);
+ giveup_altivec(tsk);
+ __giveup_vsx(tsk);
+}
+
+void flush_vsx_to_thread(struct task_struct *tsk)
+{
+ if (tsk->thread.regs) {
+ preempt_disable();
+ if (tsk->thread.regs->msr & MSR_VSX) {
+#ifdef CONFIG_SMP
+ BUG_ON(tsk != current);
+#endif
+ giveup_vsx(tsk);
+ }
+ preempt_enable();
+ }
+}
+EXPORT_SYMBOL_GPL(flush_vsx_to_thread);
+#endif /* CONFIG_VSX */
+
+#ifdef CONFIG_SPE
+
+void enable_kernel_spe(void)
+{
+ WARN_ON(preemptible());
+
+#ifdef CONFIG_SMP
+ if (current->thread.regs && (current->thread.regs->msr & MSR_SPE))
+ giveup_spe(current);
+ else
+ giveup_spe(NULL); /* just enable SPE for kernel - force */
+#else
+ giveup_spe(last_task_used_spe);
+#endif /* __SMP __ */
+}
+EXPORT_SYMBOL(enable_kernel_spe);
+
+void flush_spe_to_thread(struct task_struct *tsk)
+{
+ if (tsk->thread.regs) {
+ preempt_disable();
+ if (tsk->thread.regs->msr & MSR_SPE) {
+#ifdef CONFIG_SMP
+ BUG_ON(tsk != current);
+#endif
+ tsk->thread.spefscr = mfspr(SPRN_SPEFSCR);
+ giveup_spe(tsk);
+ }
+ preempt_enable();
+ }
+}
+#endif /* CONFIG_SPE */
+
+#ifndef CONFIG_SMP
+/*
+ * If we are doing lazy switching of CPU state (FP, altivec or SPE),
+ * and the current task has some state, discard it.
+ */
+void discard_lazy_cpu_state(void)
+{
+ preempt_disable();
+ if (last_task_used_math == current)
+ last_task_used_math = NULL;
+#ifdef CONFIG_ALTIVEC
+ if (last_task_used_altivec == current)
+ last_task_used_altivec = NULL;
+#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+ if (last_task_used_vsx == current)
+ last_task_used_vsx = NULL;
+#endif /* CONFIG_VSX */
+#ifdef CONFIG_SPE
+ if (last_task_used_spe == current)
+ last_task_used_spe = NULL;
+#endif
+ preempt_enable();
+}
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+void do_send_trap(struct pt_regs *regs, unsigned long address,
+ unsigned long error_code, int signal_code, int breakpt)
+{
+ siginfo_t info;
+
+ if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
+ 11, SIGSEGV) == NOTIFY_STOP)
+ return;
+
+ /* Deliver the signal to userspace */
+ info.si_signo = SIGTRAP;
+ info.si_errno = breakpt; /* breakpoint or watchpoint id */
+ info.si_code = signal_code;
+ info.si_addr = (void __user *)address;
+ force_sig_info(SIGTRAP, &info, current);
+}
+#else /* !CONFIG_PPC_ADV_DEBUG_REGS */
+void do_dabr(struct pt_regs *regs, unsigned long address,
+ unsigned long error_code)
+{
+ siginfo_t info;
+
+ if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
+ 11, SIGSEGV) == NOTIFY_STOP)
+ return;
+
+ if (debugger_dabr_match(regs))
+ return;
+
+ /* Clear the DABR */
+ set_dabr(0);
+
+ /* Deliver the signal to userspace */
+ info.si_signo = SIGTRAP;
+ info.si_errno = 0;
+ info.si_code = TRAP_HWBKPT;
+ info.si_addr = (void __user *)address;
+ force_sig_info(SIGTRAP, &info, current);
+}
+#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
+
+static DEFINE_PER_CPU(unsigned long, current_dabr);
+
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+/*
+ * Set the debug registers back to their default "safe" values.
+ */
+static void set_debug_reg_defaults(struct thread_struct *thread)
+{
+ thread->iac1 = thread->iac2 = 0;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+ thread->iac3 = thread->iac4 = 0;
+#endif
+ thread->dac1 = thread->dac2 = 0;
+#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
+ thread->dvc1 = thread->dvc2 = 0;
+#endif
+ thread->dbcr0 = 0;
+#ifdef CONFIG_BOOKE
+ /*
+ * Force User/Supervisor bits to b11 (user-only MSR[PR]=1)
+ */
+ thread->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | \
+ DBCR1_IAC3US | DBCR1_IAC4US;
+ /*
+ * Force Data Address Compare User/Supervisor bits to be User-only
+ * (0b11 MSR[PR]=1) and set all other bits in DBCR2 register to be 0.
+ */
+ thread->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;
+#else
+ thread->dbcr1 = 0;
+#endif
+}
+
+static void prime_debug_regs(struct thread_struct *thread)
+{
+ mtspr(SPRN_IAC1, thread->iac1);
+ mtspr(SPRN_IAC2, thread->iac2);
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+ mtspr(SPRN_IAC3, thread->iac3);
+ mtspr(SPRN_IAC4, thread->iac4);
+#endif
+ mtspr(SPRN_DAC1, thread->dac1);
+ mtspr(SPRN_DAC2, thread->dac2);
+#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
+ mtspr(SPRN_DVC1, thread->dvc1);
+ mtspr(SPRN_DVC2, thread->dvc2);
+#endif
+ mtspr(SPRN_DBCR0, thread->dbcr0);
+ mtspr(SPRN_DBCR1, thread->dbcr1);
+#ifdef CONFIG_BOOKE
+ mtspr(SPRN_DBCR2, thread->dbcr2);
+#endif
+}
+/*
+ * Unless neither the old or new thread are making use of the
+ * debug registers, set the debug registers from the values
+ * stored in the new thread.
+ */
+static void switch_booke_debug_regs(struct thread_struct *new_thread)
+{
+ if ((current->thread.dbcr0 & DBCR0_IDM)
+ || (new_thread->dbcr0 & DBCR0_IDM))
+ prime_debug_regs(new_thread);
+}
+#else /* !CONFIG_PPC_ADV_DEBUG_REGS */
+#ifndef CONFIG_HAVE_HW_BREAKPOINT
+static void set_debug_reg_defaults(struct thread_struct *thread)
+{
+ if (thread->dabr) {
+ thread->dabr = 0;
+ set_dabr(0);
+ }
+}
+#endif /* !CONFIG_HAVE_HW_BREAKPOINT */
+#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
+
+int set_dabr(unsigned long dabr)
+{
+ __get_cpu_var(current_dabr) = dabr;
+
+ if (ppc_md.set_dabr)
+ return ppc_md.set_dabr(dabr);
+
+ /* XXX should we have a CPU_FTR_HAS_DABR ? */
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+ mtspr(SPRN_DAC1, dabr);
+#ifdef CONFIG_PPC_47x
+ isync();
+#endif
+#elif defined(CONFIG_PPC_BOOK3S)
+ mtspr(SPRN_DABR, dabr);
+#endif
+
+
+ return 0;
+}
+
+#ifdef CONFIG_PPC64
+DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array);
+#endif
+
+struct task_struct *__switch_to(struct task_struct *prev,
+ struct task_struct *new)
+{
+ struct thread_struct *new_thread, *old_thread;
+ unsigned long flags;
+ struct task_struct *last;
+#ifdef CONFIG_PPC_BOOK3S_64
+ struct ppc64_tlb_batch *batch;
+#endif
+
+#ifdef CONFIG_SMP
+ /* avoid complexity of lazy save/restore of fpu
+ * by just saving it every time we switch out if
+ * this task used the fpu during the last quantum.
+ *
+ * If it tries to use the fpu again, it'll trap and
+ * reload its fp regs. So we don't have to do a restore
+ * every switch, just a save.
+ * -- Cort
+ */
+ if (prev->thread.regs && (prev->thread.regs->msr & MSR_FP))
+ giveup_fpu(prev);
+#ifdef CONFIG_ALTIVEC
+ /*
+ * If the previous thread used altivec in the last quantum
+ * (thus changing altivec regs) then save them.
+ * We used to check the VRSAVE register but not all apps
+ * set it, so we don't rely on it now (and in fact we need
+ * to save & restore VSCR even if VRSAVE == 0). -- paulus
+ *
+ * On SMP we always save/restore altivec regs just to avoid the
+ * complexity of changing processors.
+ * -- Cort
+ */
+ if (prev->thread.regs && (prev->thread.regs->msr & MSR_VEC))
+ giveup_altivec(prev);
+#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+ if (prev->thread.regs && (prev->thread.regs->msr & MSR_VSX))
+ /* VMX and FPU registers are already save here */
+ __giveup_vsx(prev);
+#endif /* CONFIG_VSX */
+#ifdef CONFIG_SPE
+ /*
+ * If the previous thread used spe in the last quantum
+ * (thus changing spe regs) then save them.
+ *
+ * On SMP we always save/restore spe regs just to avoid the
+ * complexity of changing processors.
+ */
+ if ((prev->thread.regs && (prev->thread.regs->msr & MSR_SPE)))
+ giveup_spe(prev);
+#endif /* CONFIG_SPE */
+
+#else /* CONFIG_SMP */
+#ifdef CONFIG_ALTIVEC
+ /* Avoid the trap. On smp this this never happens since
+ * we don't set last_task_used_altivec -- Cort
+ */
+ if (new->thread.regs && last_task_used_altivec == new)
+ new->thread.regs->msr |= MSR_VEC;
+#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+ if (new->thread.regs && last_task_used_vsx == new)
+ new->thread.regs->msr |= MSR_VSX;
+#endif /* CONFIG_VSX */
+#ifdef CONFIG_SPE
+ /* Avoid the trap. On smp this this never happens since
+ * we don't set last_task_used_spe
+ */
+ if (new->thread.regs && last_task_used_spe == new)
+ new->thread.regs->msr |= MSR_SPE;
+#endif /* CONFIG_SPE */
+
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+ switch_booke_debug_regs(&new->thread);
+#else
+/*
+ * For PPC_BOOK3S_64, we use the hw-breakpoint interfaces that would
+ * schedule DABR
+ */
+#ifndef CONFIG_HAVE_HW_BREAKPOINT
+ if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr))
+ set_dabr(new->thread.dabr);
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+#endif
+
+
+ new_thread = &new->thread;
+ old_thread = &current->thread;
+
+#ifdef CONFIG_PPC64
+ /*
+ * Collect processor utilization data per process
+ */
+ if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
+ long unsigned start_tb, current_tb;
+ start_tb = old_thread->start_tb;
+ cu->current_tb = current_tb = mfspr(SPRN_PURR);
+ old_thread->accum_tb += (current_tb - start_tb);
+ new_thread->start_tb = current_tb;
+ }
+#endif /* CONFIG_PPC64 */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ batch = &__get_cpu_var(ppc64_tlb_batch);
+ if (batch->active) {
+ current_thread_info()->local_flags |= _TLF_LAZY_MMU;
+ if (batch->index)
+ __flush_tlb_pending(batch);
+ batch->active = 0;
+ }
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+ local_irq_save(flags);
+
+ account_system_vtime(current);
+ account_process_vtime(current);
+
+ /*
+ * We can't take a PMU exception inside _switch() since there is a
+ * window where the kernel stack SLB and the kernel stack are out
+ * of sync. Hard disable here.
+ */
+ hard_irq_disable();
+ last = _switch(old_thread, new_thread);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (current_thread_info()->local_flags & _TLF_LAZY_MMU) {
+ current_thread_info()->local_flags &= ~_TLF_LAZY_MMU;
+ batch = &__get_cpu_var(ppc64_tlb_batch);
+ batch->active = 1;
+ }
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+ local_irq_restore(flags);
+
+ return last;
+}
+
+static int instructions_to_print = 16;
+
+static void show_instructions(struct pt_regs *regs)
+{
+ int i;
+ unsigned long pc = regs->nip - (instructions_to_print * 3 / 4 *
+ sizeof(int));
+
+ printk("Instruction dump:");
+
+ for (i = 0; i < instructions_to_print; i++) {
+ int instr;
+
+ if (!(i % 8))
+ printk("\n");
+
+#if !defined(CONFIG_BOOKE)
+ /* If executing with the IMMU off, adjust pc rather
+ * than print XXXXXXXX.
+ */
+ if (!(regs->msr & MSR_IR))
+ pc = (unsigned long)phys_to_virt(pc);
+#endif
+
+ /* We use __get_user here *only* to avoid an OOPS on a
+ * bad address because the pc *should* only be a
+ * kernel address.
+ */
+ if (!__kernel_text_address(pc) ||
+ __get_user(instr, (unsigned int __user *)pc)) {
+ printk(KERN_CONT "XXXXXXXX ");
+ } else {
+ if (regs->nip == pc)
+ printk(KERN_CONT "<%08x> ", instr);
+ else
+ printk(KERN_CONT "%08x ", instr);
+ }
+
+ pc += sizeof(int);
+ }
+
+ printk("\n");
+}
+
+static struct regbit {
+ unsigned long bit;
+ const char *name;
+} msr_bits[] = {
+#if defined(CONFIG_PPC64) && !defined(CONFIG_BOOKE)
+ {MSR_SF, "SF"},
+ {MSR_HV, "HV"},
+#endif
+ {MSR_VEC, "VEC"},
+ {MSR_VSX, "VSX"},
+#ifdef CONFIG_BOOKE
+ {MSR_CE, "CE"},
+#endif
+ {MSR_EE, "EE"},
+ {MSR_PR, "PR"},
+ {MSR_FP, "FP"},
+ {MSR_ME, "ME"},
+#ifdef CONFIG_BOOKE
+ {MSR_DE, "DE"},
+#else
+ {MSR_SE, "SE"},
+ {MSR_BE, "BE"},
+#endif
+ {MSR_IR, "IR"},
+ {MSR_DR, "DR"},
+ {MSR_PMM, "PMM"},
+#ifndef CONFIG_BOOKE
+ {MSR_RI, "RI"},
+ {MSR_LE, "LE"},
+#endif
+ {0, NULL}
+};
+
+static void printbits(unsigned long val, struct regbit *bits)
+{
+ const char *sep = "";
+
+ printk("<");
+ for (; bits->bit; ++bits)
+ if (val & bits->bit) {
+ printk("%s%s", sep, bits->name);
+ sep = ",";
+ }
+ printk(">");
+}
+
+#ifdef CONFIG_PPC64
+#define REG "%016lx"
+#define REGS_PER_LINE 4
+#define LAST_VOLATILE 13
+#else
+#define REG "%08lx"
+#define REGS_PER_LINE 8
+#define LAST_VOLATILE 12
+#endif
+
+void show_regs(struct pt_regs * regs)
+{
+ int i, trap;
+
+ printk("NIP: "REG" LR: "REG" CTR: "REG"\n",
+ regs->nip, regs->link, regs->ctr);
+ printk("REGS: %p TRAP: %04lx %s (%s)\n",
+ regs, regs->trap, print_tainted(), init_utsname()->release);
+ printk("MSR: "REG" ", regs->msr);
+ printbits(regs->msr, msr_bits);
+ printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer);
+#ifdef CONFIG_PPC64
+ printk("SOFTE: %ld\n", regs->softe);
+#endif
+ trap = TRAP(regs);
+ if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
+ printk("CFAR: "REG"\n", regs->orig_gpr3);
+ if (trap == 0x300 || trap == 0x600)
+#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
+ printk("DEAR: "REG", ESR: "REG"\n", regs->dar, regs->dsisr);
+#else
+ printk("DAR: "REG", DSISR: %08lx\n", regs->dar, regs->dsisr);
+#endif
+ printk("TASK = %p[%d] '%s' THREAD: %p",
+ current, task_pid_nr(current), current->comm, task_thread_info(current));
+
+#ifdef CONFIG_SMP
+ printk(" CPU: %d", raw_smp_processor_id());
+#endif /* CONFIG_SMP */
+
+ for (i = 0; i < 32; i++) {
+ if ((i % REGS_PER_LINE) == 0)
+ printk("\nGPR%02d: ", i);
+ printk(REG " ", regs->gpr[i]);
+ if (i == LAST_VOLATILE && !FULL_REGS(regs))
+ break;
+ }
+ printk("\n");
+#ifdef CONFIG_KALLSYMS
+ /*
+ * Lookup NIP late so we have the best change of getting the
+ * above info out without failing
+ */
+ printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip);
+ printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link);
+#endif
+ show_stack(current, (unsigned long *) regs->gpr[1]);
+ if (!user_mode(regs))
+ show_instructions(regs);
+}
+
+void exit_thread(void)
+{
+ discard_lazy_cpu_state();
+}
+
+void flush_thread(void)
+{
+ discard_lazy_cpu_state();
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ flush_ptrace_hw_breakpoint(current);
+#else /* CONFIG_HAVE_HW_BREAKPOINT */
+ set_debug_reg_defaults(&current->thread);
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+}
+
+void
+release_thread(struct task_struct *t)
+{
+}
+
+/*
+ * This gets called before we allocate a new thread and copy
+ * the current task into it.
+ */
+void prepare_to_copy(struct task_struct *tsk)
+{
+ flush_fp_to_thread(current);
+ flush_altivec_to_thread(current);
+ flush_vsx_to_thread(current);
+ flush_spe_to_thread(current);
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ flush_ptrace_hw_breakpoint(tsk);
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+}
+
+/*
+ * Copy a thread..
+ */
+extern unsigned long dscr_default; /* defined in arch/powerpc/kernel/sysfs.c */
+
+int copy_thread(unsigned long clone_flags, unsigned long usp,
+ unsigned long unused, struct task_struct *p,
+ struct pt_regs *regs)
+{
+ struct pt_regs *childregs, *kregs;
+ extern void ret_from_fork(void);
+ unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
+
+ CHECK_FULL_REGS(regs);
+ /* Copy registers */
+ sp -= sizeof(struct pt_regs);
+ childregs = (struct pt_regs *) sp;
+ *childregs = *regs;
+ if ((childregs->msr & MSR_PR) == 0) {
+ /* for kernel thread, set `current' and stackptr in new task */
+ childregs->gpr[1] = sp + sizeof(struct pt_regs);
+#ifdef CONFIG_PPC32
+ childregs->gpr[2] = (unsigned long) p;
+#else
+ clear_tsk_thread_flag(p, TIF_32BIT);
+#endif
+ p->thread.regs = NULL; /* no user register state */
+ } else {
+ childregs->gpr[1] = usp;
+ p->thread.regs = childregs;
+ if (clone_flags & CLONE_SETTLS) {
+#ifdef CONFIG_PPC64
+ if (!is_32bit_task())
+ childregs->gpr[13] = childregs->gpr[6];
+ else
+#endif
+ childregs->gpr[2] = childregs->gpr[6];
+ }
+ }
+ childregs->gpr[3] = 0; /* Result from fork() */
+ sp -= STACK_FRAME_OVERHEAD;
+
+ /*
+ * The way this works is that at some point in the future
+ * some task will call _switch to switch to the new task.
+ * That will pop off the stack frame created below and start
+ * the new task running at ret_from_fork. The new task will
+ * do some house keeping and then return from the fork or clone
+ * system call, using the stack frame created above.
+ */
+ sp -= sizeof(struct pt_regs);
+ kregs = (struct pt_regs *) sp;
+ sp -= STACK_FRAME_OVERHEAD;
+ p->thread.ksp = sp;
+ p->thread.ksp_limit = (unsigned long)task_stack_page(p) +
+ _ALIGN_UP(sizeof(struct thread_info), 16);
+
+#ifdef CONFIG_PPC_STD_MMU_64
+ if (mmu_has_feature(MMU_FTR_SLB)) {
+ unsigned long sp_vsid;
+ unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
+
+ if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
+ sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_1T)
+ << SLB_VSID_SHIFT_1T;
+ else
+ sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_256M)
+ << SLB_VSID_SHIFT;
+ sp_vsid |= SLB_VSID_KERNEL | llp;
+ p->thread.ksp_vsid = sp_vsid;
+ }
+#endif /* CONFIG_PPC_STD_MMU_64 */
+#ifdef CONFIG_PPC64
+ if (cpu_has_feature(CPU_FTR_DSCR)) {
+ if (current->thread.dscr_inherit) {
+ p->thread.dscr_inherit = 1;
+ p->thread.dscr = current->thread.dscr;
+ } else if (0 != dscr_default) {
+ p->thread.dscr_inherit = 1;
+ p->thread.dscr = dscr_default;
+ } else {
+ p->thread.dscr_inherit = 0;
+ p->thread.dscr = 0;
+ }
+ }
+#endif
+
+ /*
+ * The PPC64 ABI makes use of a TOC to contain function
+ * pointers. The function (ret_from_except) is actually a pointer
+ * to the TOC entry. The first entry is a pointer to the actual
+ * function.
+ */
+#ifdef CONFIG_PPC64
+ kregs->nip = *((unsigned long *)ret_from_fork);
+#else
+ kregs->nip = (unsigned long)ret_from_fork;
+#endif
+
+ return 0;
+}
+
+/*
+ * Set up a thread for executing a new program
+ */
+void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
+{
+#ifdef CONFIG_PPC64
+ unsigned long load_addr = regs->gpr[2]; /* saved by ELF_PLAT_INIT */
+#endif
+
+ /*
+ * If we exec out of a kernel thread then thread.regs will not be
+ * set. Do it now.
+ */
+ if (!current->thread.regs) {
+ struct pt_regs *regs = task_stack_page(current) + THREAD_SIZE;
+ current->thread.regs = regs - 1;
+ }
+
+ memset(regs->gpr, 0, sizeof(regs->gpr));
+ regs->ctr = 0;
+ regs->link = 0;
+ regs->xer = 0;
+ regs->ccr = 0;
+ regs->gpr[1] = sp;
+
+ /*
+ * We have just cleared all the nonvolatile GPRs, so make
+ * FULL_REGS(regs) return true. This is necessary to allow
+ * ptrace to examine the thread immediately after exec.
+ */
+ regs->trap &= ~1UL;
+
+#ifdef CONFIG_PPC32
+ regs->mq = 0;
+ regs->nip = start;
+ regs->msr = MSR_USER;
+#else
+ if (!is_32bit_task()) {
+ unsigned long entry, toc;
+
+ /* start is a relocated pointer to the function descriptor for
+ * the elf _start routine. The first entry in the function
+ * descriptor is the entry address of _start and the second
+ * entry is the TOC value we need to use.
+ */
+ __get_user(entry, (unsigned long __user *)start);
+ __get_user(toc, (unsigned long __user *)start+1);
+
+ /* Check whether the e_entry function descriptor entries
+ * need to be relocated before we can use them.
+ */
+ if (load_addr != 0) {
+ entry += load_addr;
+ toc += load_addr;
+ }
+ regs->nip = entry;
+ regs->gpr[2] = toc;
+ regs->msr = MSR_USER64;
+ } else {
+ regs->nip = start;
+ regs->gpr[2] = 0;
+ regs->msr = MSR_USER32;
+ }
+#endif
+
+ discard_lazy_cpu_state();
+#ifdef CONFIG_VSX
+ current->thread.used_vsr = 0;
+#endif
+ memset(current->thread.fpr, 0, sizeof(current->thread.fpr));
+ current->thread.fpscr.val = 0;
+#ifdef CONFIG_ALTIVEC
+ memset(current->thread.vr, 0, sizeof(current->thread.vr));
+ memset(&current->thread.vscr, 0, sizeof(current->thread.vscr));
+ current->thread.vscr.u[3] = 0x00010000; /* Java mode disabled */
+ current->thread.vrsave = 0;
+ current->thread.used_vr = 0;
+#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_SPE
+ memset(current->thread.evr, 0, sizeof(current->thread.evr));
+ current->thread.acc = 0;
+ current->thread.spefscr = 0;
+ current->thread.used_spe = 0;
+#endif /* CONFIG_SPE */
+}
+
+#define PR_FP_ALL_EXCEPT (PR_FP_EXC_DIV | PR_FP_EXC_OVF | PR_FP_EXC_UND \
+ | PR_FP_EXC_RES | PR_FP_EXC_INV)
+
+int set_fpexc_mode(struct task_struct *tsk, unsigned int val)
+{
+ struct pt_regs *regs = tsk->thread.regs;
+
+ /* This is a bit hairy. If we are an SPE enabled processor
+ * (have embedded fp) we store the IEEE exception enable flags in
+ * fpexc_mode. fpexc_mode is also used for setting FP exception
+ * mode (asyn, precise, disabled) for 'Classic' FP. */
+ if (val & PR_FP_EXC_SW_ENABLE) {
+#ifdef CONFIG_SPE
+ if (cpu_has_feature(CPU_FTR_SPE)) {
+ tsk->thread.fpexc_mode = val &
+ (PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT);
+ return 0;
+ } else {
+ return -EINVAL;
+ }
+#else
+ return -EINVAL;
+#endif
+ }
+
+ /* on a CONFIG_SPE this does not hurt us. The bits that
+ * __pack_fe01 use do not overlap with bits used for
+ * PR_FP_EXC_SW_ENABLE. Additionally, the MSR[FE0,FE1] bits
+ * on CONFIG_SPE implementations are reserved so writing to
+ * them does not change anything */
+ if (val > PR_FP_EXC_PRECISE)
+ return -EINVAL;
+ tsk->thread.fpexc_mode = __pack_fe01(val);
+ if (regs != NULL && (regs->msr & MSR_FP) != 0)
+ regs->msr = (regs->msr & ~(MSR_FE0|MSR_FE1))
+ | tsk->thread.fpexc_mode;
+ return 0;
+}
+
+int get_fpexc_mode(struct task_struct *tsk, unsigned long adr)
+{
+ unsigned int val;
+
+ if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE)
+#ifdef CONFIG_SPE
+ if (cpu_has_feature(CPU_FTR_SPE))
+ val = tsk->thread.fpexc_mode;
+ else
+ return -EINVAL;
+#else
+ return -EINVAL;
+#endif
+ else
+ val = __unpack_fe01(tsk->thread.fpexc_mode);
+ return put_user(val, (unsigned int __user *) adr);
+}
+
+int set_endian(struct task_struct *tsk, unsigned int val)
+{
+ struct pt_regs *regs = tsk->thread.regs;
+
+ if ((val == PR_ENDIAN_LITTLE && !cpu_has_feature(CPU_FTR_REAL_LE)) ||
+ (val == PR_ENDIAN_PPC_LITTLE && !cpu_has_feature(CPU_FTR_PPC_LE)))
+ return -EINVAL;
+
+ if (regs == NULL)
+ return -EINVAL;
+
+ if (val == PR_ENDIAN_BIG)
+ regs->msr &= ~MSR_LE;
+ else if (val == PR_ENDIAN_LITTLE || val == PR_ENDIAN_PPC_LITTLE)
+ regs->msr |= MSR_LE;
+ else
+ return -EINVAL;
+
+ return 0;
+}
+
+int get_endian(struct task_struct *tsk, unsigned long adr)
+{
+ struct pt_regs *regs = tsk->thread.regs;
+ unsigned int val;
+
+ if (!cpu_has_feature(CPU_FTR_PPC_LE) &&
+ !cpu_has_feature(CPU_FTR_REAL_LE))
+ return -EINVAL;
+
+ if (regs == NULL)
+ return -EINVAL;
+
+ if (regs->msr & MSR_LE) {
+ if (cpu_has_feature(CPU_FTR_REAL_LE))
+ val = PR_ENDIAN_LITTLE;
+ else
+ val = PR_ENDIAN_PPC_LITTLE;
+ } else
+ val = PR_ENDIAN_BIG;
+
+ return put_user(val, (unsigned int __user *)adr);
+}
+
+int set_unalign_ctl(struct task_struct *tsk, unsigned int val)
+{
+ tsk->thread.align_ctl = val;
+ return 0;
+}
+
+int get_unalign_ctl(struct task_struct *tsk, unsigned long adr)
+{
+ return put_user(tsk->thread.align_ctl, (unsigned int __user *)adr);
+}
+
+#define TRUNC_PTR(x) ((typeof(x))(((unsigned long)(x)) & 0xffffffff))
+
+int sys_clone(unsigned long clone_flags, unsigned long usp,
+ int __user *parent_tidp, void __user *child_threadptr,
+ int __user *child_tidp, int p6,
+ struct pt_regs *regs)
+{
+ CHECK_FULL_REGS(regs);
+ if (usp == 0)
+ usp = regs->gpr[1]; /* stack pointer for child */
+#ifdef CONFIG_PPC64
+ if (is_32bit_task()) {
+ parent_tidp = TRUNC_PTR(parent_tidp);
+ child_tidp = TRUNC_PTR(child_tidp);
+ }
+#endif
+ return do_fork(clone_flags, usp, regs, 0, parent_tidp, child_tidp);
+}
+
+int sys_fork(unsigned long p1, unsigned long p2, unsigned long p3,
+ unsigned long p4, unsigned long p5, unsigned long p6,
+ struct pt_regs *regs)
+{
+ CHECK_FULL_REGS(regs);
+ return do_fork(SIGCHLD, regs->gpr[1], regs, 0, NULL, NULL);
+}
+
+int sys_vfork(unsigned long p1, unsigned long p2, unsigned long p3,
+ unsigned long p4, unsigned long p5, unsigned long p6,
+ struct pt_regs *regs)
+{
+ CHECK_FULL_REGS(regs);
+ return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->gpr[1],
+ regs, 0, NULL, NULL);
+}
+
+int sys_execve(unsigned long a0, unsigned long a1, unsigned long a2,
+ unsigned long a3, unsigned long a4, unsigned long a5,
+ struct pt_regs *regs)
+{
+ int error;
+ char *filename;
+
+ filename = getname((const char __user *) a0);
+ error = PTR_ERR(filename);
+ if (IS_ERR(filename))
+ goto out;
+ flush_fp_to_thread(current);
+ flush_altivec_to_thread(current);
+ flush_spe_to_thread(current);
+ error = do_execve(filename,
+ (const char __user *const __user *) a1,
+ (const char __user *const __user *) a2, regs);
+ putname(filename);
+out:
+ return error;
+}
+
+static inline int valid_irq_stack(unsigned long sp, struct task_struct *p,
+ unsigned long nbytes)
+{
+ unsigned long stack_page;
+ unsigned long cpu = task_cpu(p);
+
+ /*
+ * Avoid crashing if the stack has overflowed and corrupted
+ * task_cpu(p), which is in the thread_info struct.
+ */
+ if (cpu < NR_CPUS && cpu_possible(cpu)) {
+ stack_page = (unsigned long) hardirq_ctx[cpu];
+ if (sp >= stack_page + sizeof(struct thread_struct)
+ && sp <= stack_page + THREAD_SIZE - nbytes)
+ return 1;
+
+ stack_page = (unsigned long) softirq_ctx[cpu];
+ if (sp >= stack_page + sizeof(struct thread_struct)
+ && sp <= stack_page + THREAD_SIZE - nbytes)
+ return 1;
+ }
+ return 0;
+}
+
+int validate_sp(unsigned long sp, struct task_struct *p,
+ unsigned long nbytes)
+{
+ unsigned long stack_page = (unsigned long)task_stack_page(p);
+
+ if (sp >= stack_page + sizeof(struct thread_struct)
+ && sp <= stack_page + THREAD_SIZE - nbytes)
+ return 1;
+
+ return valid_irq_stack(sp, p, nbytes);
+}
+
+EXPORT_SYMBOL(validate_sp);
+
+unsigned long get_wchan(struct task_struct *p)
+{
+ unsigned long ip, sp;
+ int count = 0;
+
+ if (!p || p == current || p->state == TASK_RUNNING)
+ return 0;
+
+ sp = p->thread.ksp;
+ if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD))
+ return 0;
+
+ do {
+ sp = *(unsigned long *)sp;
+ if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD))
+ return 0;
+ if (count > 0) {
+ ip = ((unsigned long *)sp)[STACK_FRAME_LR_SAVE];
+ if (!in_sched_functions(ip))
+ return ip;
+ }
+ } while (count++ < 16);
+ return 0;
+}
+
+static int kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH;
+
+void show_stack(struct task_struct *tsk, unsigned long *stack)
+{
+ unsigned long sp, ip, lr, newsp;
+ int count = 0;
+ int firstframe = 1;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ int curr_frame = current->curr_ret_stack;
+ extern void return_to_handler(void);
+ unsigned long rth = (unsigned long)return_to_handler;
+ unsigned long mrth = -1;
+#ifdef CONFIG_PPC64
+ extern void mod_return_to_handler(void);
+ rth = *(unsigned long *)rth;
+ mrth = (unsigned long)mod_return_to_handler;
+ mrth = *(unsigned long *)mrth;
+#endif
+#endif
+
+ sp = (unsigned long) stack;
+ if (tsk == NULL)
+ tsk = current;
+ if (sp == 0) {
+ if (tsk == current)
+ asm("mr %0,1" : "=r" (sp));
+ else
+ sp = tsk->thread.ksp;
+ }
+
+ lr = 0;
+ printk("Call Trace:\n");
+ do {
+ if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD))
+ return;
+
+ stack = (unsigned long *) sp;
+ newsp = stack[0];
+ ip = stack[STACK_FRAME_LR_SAVE];
+ if (!firstframe || ip != lr) {
+ printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip);
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ if ((ip == rth || ip == mrth) && curr_frame >= 0) {
+ printk(" (%pS)",
+ (void *)current->ret_stack[curr_frame].ret);
+ curr_frame--;
+ }
+#endif
+ if (firstframe)
+ printk(" (unreliable)");
+ printk("\n");
+ }
+ firstframe = 0;
+
+ /*
+ * See if this is an exception frame.
+ * We look for the "regshere" marker in the current frame.
+ */
+ if (validate_sp(sp, tsk, STACK_INT_FRAME_SIZE)
+ && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
+ struct pt_regs *regs = (struct pt_regs *)
+ (sp + STACK_FRAME_OVERHEAD);
+ lr = regs->link;
+ printk("--- Exception: %lx at %pS\n LR = %pS\n",
+ regs->trap, (void *)regs->nip, (void *)lr);
+ firstframe = 1;
+ }
+
+ sp = newsp;
+ } while (count++ < kstack_depth_to_print);
+}
+
+void dump_stack(void)
+{
+ show_stack(current, NULL);
+}
+EXPORT_SYMBOL(dump_stack);
+
+#ifdef CONFIG_PPC64
+/* Called with hard IRQs off */
+void __ppc64_runlatch_on(void)
+{
+ struct thread_info *ti = current_thread_info();
+ unsigned long ctrl;
+
+ ctrl = mfspr(SPRN_CTRLF);
+ ctrl |= CTRL_RUNLATCH;
+ mtspr(SPRN_CTRLT, ctrl);
+
+ ti->local_flags |= _TLF_RUNLATCH;
+}
+
+/* Called with hard IRQs off */
+void __ppc64_runlatch_off(void)
+{
+ struct thread_info *ti = current_thread_info();
+ unsigned long ctrl;
+
+ ti->local_flags &= ~_TLF_RUNLATCH;
+
+ ctrl = mfspr(SPRN_CTRLF);
+ ctrl &= ~CTRL_RUNLATCH;
+ mtspr(SPRN_CTRLT, ctrl);
+}
+#endif /* CONFIG_PPC64 */
+
+#if THREAD_SHIFT < PAGE_SHIFT
+
+static struct kmem_cache *thread_info_cache;
+
+struct thread_info *alloc_thread_info_node(struct task_struct *tsk, int node)
+{
+ struct thread_info *ti;
+
+ ti = kmem_cache_alloc_node(thread_info_cache, GFP_KERNEL, node);
+ if (unlikely(ti == NULL))
+ return NULL;
+#ifdef CONFIG_DEBUG_STACK_USAGE
+ memset(ti, 0, THREAD_SIZE);
+#endif
+ return ti;
+}
+
+void free_thread_info(struct thread_info *ti)
+{
+ kmem_cache_free(thread_info_cache, ti);
+}
+
+void thread_info_cache_init(void)
+{
+ thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE,
+ THREAD_SIZE, 0, NULL);
+ BUG_ON(thread_info_cache == NULL);
+}
+
+#endif /* THREAD_SHIFT < PAGE_SHIFT */
+
+unsigned long arch_align_stack(unsigned long sp)
+{
+ if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+ sp -= get_random_int() & ~PAGE_MASK;
+ return sp & ~0xf;
+}
+
+static inline unsigned long brk_rnd(void)
+{
+ unsigned long rnd = 0;
+
+ /* 8MB for 32bit, 1GB for 64bit */
+ if (is_32bit_task())
+ rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT)));
+ else
+ rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT)));
+
+ return rnd << PAGE_SHIFT;
+}
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+ unsigned long base = mm->brk;
+ unsigned long ret;
+
+#ifdef CONFIG_PPC_STD_MMU_64
+ /*
+ * If we are using 1TB segments and we are allowed to randomise
+ * the heap, we can put it above 1TB so it is backed by a 1TB
+ * segment. Otherwise the heap will be in the bottom 1TB
+ * which always uses 256MB segments and this may result in a
+ * performance penalty.
+ */
+ if (!is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T))
+ base = max_t(unsigned long, mm->brk, 1UL << SID_SHIFT_1T);
+#endif
+
+ ret = PAGE_ALIGN(base + brk_rnd());
+
+ if (ret < mm->brk)
+ return mm->brk;
+
+ return ret;
+}
+
+unsigned long randomize_et_dyn(unsigned long base)
+{
+ unsigned long ret = PAGE_ALIGN(base + brk_rnd());
+
+ if (ret < base)
+ return base;
+
+ return ret;
+}
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
new file mode 100644
index 00000000..f191bf02
--- /dev/null
+++ b/arch/powerpc/kernel/prom.c
@@ -0,0 +1,891 @@
+/*
+ * Procedures for creating, accessing and interpreting the device tree.
+ *
+ * Paul Mackerras August 1996.
+ * Copyright (C) 1996-2005 Paul Mackerras.
+ *
+ * Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner.
+ * {engebret|bergner}@us.ibm.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+
+#include <stdarg.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/threads.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/stringify.h>
+#include <linux/delay.h>
+#include <linux/initrd.h>
+#include <linux/bitops.h>
+#include <linux/export.h>
+#include <linux/kexec.h>
+#include <linux/debugfs.h>
+#include <linux/irq.h>
+#include <linux/memblock.h>
+
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/irq.h>
+#include <asm/io.h>
+#include <asm/kdump.h>
+#include <asm/smp.h>
+#include <asm/mmu.h>
+#include <asm/paca.h>
+#include <asm/pgtable.h>
+#include <asm/pci.h>
+#include <asm/iommu.h>
+#include <asm/btext.h>
+#include <asm/sections.h>
+#include <asm/machdep.h>
+#include <asm/pSeries_reconfig.h>
+#include <asm/pci-bridge.h>
+#include <asm/kexec.h>
+#include <asm/opal.h>
+#include <asm/fadump.h>
+
+#include <mm/mmu_decl.h>
+
+#ifdef DEBUG
+#define DBG(fmt...) printk(KERN_ERR fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+#ifdef CONFIG_PPC64
+int __initdata iommu_is_off;
+int __initdata iommu_force_on;
+unsigned long tce_alloc_start, tce_alloc_end;
+u64 ppc64_rma_size;
+#endif
+static phys_addr_t first_memblock_size;
+static int __initdata boot_cpu_count;
+
+static int __init early_parse_mem(char *p)
+{
+ if (!p)
+ return 1;
+
+ memory_limit = PAGE_ALIGN(memparse(p, &p));
+ DBG("memory limit = 0x%llx\n", (unsigned long long)memory_limit);
+
+ return 0;
+}
+early_param("mem", early_parse_mem);
+
+/*
+ * overlaps_initrd - check for overlap with page aligned extension of
+ * initrd.
+ */
+static inline int overlaps_initrd(unsigned long start, unsigned long size)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+ if (!initrd_start)
+ return 0;
+
+ return (start + size) > _ALIGN_DOWN(initrd_start, PAGE_SIZE) &&
+ start <= _ALIGN_UP(initrd_end, PAGE_SIZE);
+#else
+ return 0;
+#endif
+}
+
+/**
+ * move_device_tree - move tree to an unused area, if needed.
+ *
+ * The device tree may be allocated beyond our memory limit, or inside the
+ * crash kernel region for kdump, or within the page aligned range of initrd.
+ * If so, move it out of the way.
+ */
+static void __init move_device_tree(void)
+{
+ unsigned long start, size;
+ void *p;
+
+ DBG("-> move_device_tree\n");
+
+ start = __pa(initial_boot_params);
+ size = be32_to_cpu(initial_boot_params->totalsize);
+
+ if ((memory_limit && (start + size) > PHYSICAL_START + memory_limit) ||
+ overlaps_crashkernel(start, size) ||
+ overlaps_initrd(start, size)) {
+ p = __va(memblock_alloc(size, PAGE_SIZE));
+ memcpy(p, initial_boot_params, size);
+ initial_boot_params = (struct boot_param_header *)p;
+ DBG("Moved device tree to 0x%p\n", p);
+ }
+
+ DBG("<- move_device_tree\n");
+}
+
+/*
+ * ibm,pa-features is a per-cpu property that contains a string of
+ * attribute descriptors, each of which has a 2 byte header plus up
+ * to 254 bytes worth of processor attribute bits. First header
+ * byte specifies the number of bytes following the header.
+ * Second header byte is an "attribute-specifier" type, of which
+ * zero is the only currently-defined value.
+ * Implementation: Pass in the byte and bit offset for the feature
+ * that we are interested in. The function will return -1 if the
+ * pa-features property is missing, or a 1/0 to indicate if the feature
+ * is supported/not supported. Note that the bit numbers are
+ * big-endian to match the definition in PAPR.
+ */
+static struct ibm_pa_feature {
+ unsigned long cpu_features; /* CPU_FTR_xxx bit */
+ unsigned long mmu_features; /* MMU_FTR_xxx bit */
+ unsigned int cpu_user_ftrs; /* PPC_FEATURE_xxx bit */
+ unsigned char pabyte; /* byte number in ibm,pa-features */
+ unsigned char pabit; /* bit number (big-endian) */
+ unsigned char invert; /* if 1, pa bit set => clear feature */
+} ibm_pa_features[] __initdata = {
+ {0, 0, PPC_FEATURE_HAS_MMU, 0, 0, 0},
+ {0, 0, PPC_FEATURE_HAS_FPU, 0, 1, 0},
+ {0, MMU_FTR_SLB, 0, 0, 2, 0},
+ {CPU_FTR_CTRL, 0, 0, 0, 3, 0},
+ {CPU_FTR_NOEXECUTE, 0, 0, 0, 6, 0},
+ {CPU_FTR_NODSISRALIGN, 0, 0, 1, 1, 1},
+ {0, MMU_FTR_CI_LARGE_PAGE, 0, 1, 2, 0},
+ {CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 5, 0, 0},
+};
+
+static void __init scan_features(unsigned long node, unsigned char *ftrs,
+ unsigned long tablelen,
+ struct ibm_pa_feature *fp,
+ unsigned long ft_size)
+{
+ unsigned long i, len, bit;
+
+ /* find descriptor with type == 0 */
+ for (;;) {
+ if (tablelen < 3)
+ return;
+ len = 2 + ftrs[0];
+ if (tablelen < len)
+ return; /* descriptor 0 not found */
+ if (ftrs[1] == 0)
+ break;
+ tablelen -= len;
+ ftrs += len;
+ }
+
+ /* loop over bits we know about */
+ for (i = 0; i < ft_size; ++i, ++fp) {
+ if (fp->pabyte >= ftrs[0])
+ continue;
+ bit = (ftrs[2 + fp->pabyte] >> (7 - fp->pabit)) & 1;
+ if (bit ^ fp->invert) {
+ cur_cpu_spec->cpu_features |= fp->cpu_features;
+ cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftrs;
+ cur_cpu_spec->mmu_features |= fp->mmu_features;
+ } else {
+ cur_cpu_spec->cpu_features &= ~fp->cpu_features;
+ cur_cpu_spec->cpu_user_features &= ~fp->cpu_user_ftrs;
+ cur_cpu_spec->mmu_features &= ~fp->mmu_features;
+ }
+ }
+}
+
+static void __init check_cpu_pa_features(unsigned long node)
+{
+ unsigned char *pa_ftrs;
+ unsigned long tablelen;
+
+ pa_ftrs = of_get_flat_dt_prop(node, "ibm,pa-features", &tablelen);
+ if (pa_ftrs == NULL)
+ return;
+
+ scan_features(node, pa_ftrs, tablelen,
+ ibm_pa_features, ARRAY_SIZE(ibm_pa_features));
+}
+
+#ifdef CONFIG_PPC_STD_MMU_64
+static void __init check_cpu_slb_size(unsigned long node)
+{
+ u32 *slb_size_ptr;
+
+ slb_size_ptr = of_get_flat_dt_prop(node, "slb-size", NULL);
+ if (slb_size_ptr != NULL) {
+ mmu_slb_size = *slb_size_ptr;
+ return;
+ }
+ slb_size_ptr = of_get_flat_dt_prop(node, "ibm,slb-size", NULL);
+ if (slb_size_ptr != NULL) {
+ mmu_slb_size = *slb_size_ptr;
+ }
+}
+#else
+#define check_cpu_slb_size(node) do { } while(0)
+#endif
+
+static struct feature_property {
+ const char *name;
+ u32 min_value;
+ unsigned long cpu_feature;
+ unsigned long cpu_user_ftr;
+} feature_properties[] __initdata = {
+#ifdef CONFIG_ALTIVEC
+ {"altivec", 0, CPU_FTR_ALTIVEC, PPC_FEATURE_HAS_ALTIVEC},
+ {"ibm,vmx", 1, CPU_FTR_ALTIVEC, PPC_FEATURE_HAS_ALTIVEC},
+#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+ /* Yes, this _really_ is ibm,vmx == 2 to enable VSX */
+ {"ibm,vmx", 2, CPU_FTR_VSX, PPC_FEATURE_HAS_VSX},
+#endif /* CONFIG_VSX */
+#ifdef CONFIG_PPC64
+ {"ibm,dfp", 1, 0, PPC_FEATURE_HAS_DFP},
+ {"ibm,purr", 1, CPU_FTR_PURR, 0},
+ {"ibm,spurr", 1, CPU_FTR_SPURR, 0},
+#endif /* CONFIG_PPC64 */
+};
+
+#if defined(CONFIG_44x) && defined(CONFIG_PPC_FPU)
+static inline void identical_pvr_fixup(unsigned long node)
+{
+ unsigned int pvr;
+ char *model = of_get_flat_dt_prop(node, "model", NULL);
+
+ /*
+ * Since 440GR(x)/440EP(x) processors have the same pvr,
+ * we check the node path and set bit 28 in the cur_cpu_spec
+ * pvr for EP(x) processor version. This bit is always 0 in
+ * the "real" pvr. Then we call identify_cpu again with
+ * the new logical pvr to enable FPU support.
+ */
+ if (model && strstr(model, "440EP")) {
+ pvr = cur_cpu_spec->pvr_value | 0x8;
+ identify_cpu(0, pvr);
+ DBG("Using logical pvr %x for %s\n", pvr, model);
+ }
+}
+#else
+#define identical_pvr_fixup(node) do { } while(0)
+#endif
+
+static void __init check_cpu_feature_properties(unsigned long node)
+{
+ unsigned long i;
+ struct feature_property *fp = feature_properties;
+ const u32 *prop;
+
+ for (i = 0; i < ARRAY_SIZE(feature_properties); ++i, ++fp) {
+ prop = of_get_flat_dt_prop(node, fp->name, NULL);
+ if (prop && *prop >= fp->min_value) {
+ cur_cpu_spec->cpu_features |= fp->cpu_feature;
+ cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftr;
+ }
+ }
+}
+
+static int __init early_init_dt_scan_cpus(unsigned long node,
+ const char *uname, int depth,
+ void *data)
+{
+ char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+ const u32 *prop;
+ const u32 *intserv;
+ int i, nthreads;
+ unsigned long len;
+ int found = -1;
+ int found_thread = 0;
+
+ /* We are scanning "cpu" nodes only */
+ if (type == NULL || strcmp(type, "cpu") != 0)
+ return 0;
+
+ /* Get physical cpuid */
+ intserv = of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", &len);
+ if (intserv) {
+ nthreads = len / sizeof(int);
+ } else {
+ intserv = of_get_flat_dt_prop(node, "reg", NULL);
+ nthreads = 1;
+ }
+
+ /*
+ * Now see if any of these threads match our boot cpu.
+ * NOTE: This must match the parsing done in smp_setup_cpu_maps.
+ */
+ for (i = 0; i < nthreads; i++) {
+ /*
+ * version 2 of the kexec param format adds the phys cpuid of
+ * booted proc.
+ */
+ if (initial_boot_params->version >= 2) {
+ if (intserv[i] == initial_boot_params->boot_cpuid_phys) {
+ found = boot_cpu_count;
+ found_thread = i;
+ }
+ } else {
+ /*
+ * Check if it's the boot-cpu, set it's hw index now,
+ * unfortunately this format did not support booting
+ * off secondary threads.
+ */
+ if (of_get_flat_dt_prop(node,
+ "linux,boot-cpu", NULL) != NULL)
+ found = boot_cpu_count;
+ }
+#ifdef CONFIG_SMP
+ /* logical cpu id is always 0 on UP kernels */
+ boot_cpu_count++;
+#endif
+ }
+
+ if (found >= 0) {
+ DBG("boot cpu: logical %d physical %d\n", found,
+ intserv[found_thread]);
+ boot_cpuid = found;
+ set_hard_smp_processor_id(found, intserv[found_thread]);
+
+ /*
+ * PAPR defines "logical" PVR values for cpus that
+ * meet various levels of the architecture:
+ * 0x0f000001 Architecture version 2.04
+ * 0x0f000002 Architecture version 2.05
+ * If the cpu-version property in the cpu node contains
+ * such a value, we call identify_cpu again with the
+ * logical PVR value in order to use the cpu feature
+ * bits appropriate for the architecture level.
+ *
+ * A POWER6 partition in "POWER6 architected" mode
+ * uses the 0x0f000002 PVR value; in POWER5+ mode
+ * it uses 0x0f000001.
+ */
+ prop = of_get_flat_dt_prop(node, "cpu-version", NULL);
+ if (prop && (*prop & 0xff000000) == 0x0f000000)
+ identify_cpu(0, *prop);
+
+ identical_pvr_fixup(node);
+ }
+
+ check_cpu_feature_properties(node);
+ check_cpu_pa_features(node);
+ check_cpu_slb_size(node);
+
+#ifdef CONFIG_PPC_PSERIES
+ if (nthreads > 1)
+ cur_cpu_spec->cpu_features |= CPU_FTR_SMT;
+ else
+ cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT;
+#endif
+
+ return 0;
+}
+
+int __init early_init_dt_scan_chosen_ppc(unsigned long node, const char *uname,
+ int depth, void *data)
+{
+ unsigned long *lprop;
+
+ /* Use common scan routine to determine if this is the chosen node */
+ if (early_init_dt_scan_chosen(node, uname, depth, data) == 0)
+ return 0;
+
+#ifdef CONFIG_PPC64
+ /* check if iommu is forced on or off */
+ if (of_get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL)
+ iommu_is_off = 1;
+ if (of_get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL)
+ iommu_force_on = 1;
+#endif
+
+ /* mem=x on the command line is the preferred mechanism */
+ lprop = of_get_flat_dt_prop(node, "linux,memory-limit", NULL);
+ if (lprop)
+ memory_limit = *lprop;
+
+#ifdef CONFIG_PPC64
+ lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-start", NULL);
+ if (lprop)
+ tce_alloc_start = *lprop;
+ lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-end", NULL);
+ if (lprop)
+ tce_alloc_end = *lprop;
+#endif
+
+#ifdef CONFIG_KEXEC
+ lprop = of_get_flat_dt_prop(node, "linux,crashkernel-base", NULL);
+ if (lprop)
+ crashk_res.start = *lprop;
+
+ lprop = of_get_flat_dt_prop(node, "linux,crashkernel-size", NULL);
+ if (lprop)
+ crashk_res.end = crashk_res.start + *lprop - 1;
+#endif
+
+ /* break now */
+ return 1;
+}
+
+#ifdef CONFIG_PPC_PSERIES
+/*
+ * Interpret the ibm,dynamic-memory property in the
+ * /ibm,dynamic-reconfiguration-memory node.
+ * This contains a list of memory blocks along with NUMA affinity
+ * information.
+ */
+static int __init early_init_dt_scan_drconf_memory(unsigned long node)
+{
+ __be32 *dm, *ls, *usm;
+ unsigned long l, n, flags;
+ u64 base, size, memblock_size;
+ unsigned int is_kexec_kdump = 0, rngs;
+
+ ls = of_get_flat_dt_prop(node, "ibm,lmb-size", &l);
+ if (ls == NULL || l < dt_root_size_cells * sizeof(__be32))
+ return 0;
+ memblock_size = dt_mem_next_cell(dt_root_size_cells, &ls);
+
+ dm = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &l);
+ if (dm == NULL || l < sizeof(__be32))
+ return 0;
+
+ n = *dm++; /* number of entries */
+ if (l < (n * (dt_root_addr_cells + 4) + 1) * sizeof(__be32))
+ return 0;
+
+ /* check if this is a kexec/kdump kernel. */
+ usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory",
+ &l);
+ if (usm != NULL)
+ is_kexec_kdump = 1;
+
+ for (; n != 0; --n) {
+ base = dt_mem_next_cell(dt_root_addr_cells, &dm);
+ flags = dm[3];
+ /* skip DRC index, pad, assoc. list index, flags */
+ dm += 4;
+ /* skip this block if the reserved bit is set in flags (0x80)
+ or if the block is not assigned to this partition (0x8) */
+ if ((flags & 0x80) || !(flags & 0x8))
+ continue;
+ size = memblock_size;
+ rngs = 1;
+ if (is_kexec_kdump) {
+ /*
+ * For each memblock in ibm,dynamic-memory, a corresponding
+ * entry in linux,drconf-usable-memory property contains
+ * a counter 'p' followed by 'p' (base, size) duple.
+ * Now read the counter from
+ * linux,drconf-usable-memory property
+ */
+ rngs = dt_mem_next_cell(dt_root_size_cells, &usm);
+ if (!rngs) /* there are no (base, size) duple */
+ continue;
+ }
+ do {
+ if (is_kexec_kdump) {
+ base = dt_mem_next_cell(dt_root_addr_cells,
+ &usm);
+ size = dt_mem_next_cell(dt_root_size_cells,
+ &usm);
+ }
+ if (iommu_is_off) {
+ if (base >= 0x80000000ul)
+ continue;
+ if ((base + size) > 0x80000000ul)
+ size = 0x80000000ul - base;
+ }
+ memblock_add(base, size);
+ } while (--rngs);
+ }
+ memblock_dump_all();
+ return 0;
+}
+#else
+#define early_init_dt_scan_drconf_memory(node) 0
+#endif /* CONFIG_PPC_PSERIES */
+
+static int __init early_init_dt_scan_memory_ppc(unsigned long node,
+ const char *uname,
+ int depth, void *data)
+{
+ if (depth == 1 &&
+ strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0)
+ return early_init_dt_scan_drconf_memory(node);
+
+ return early_init_dt_scan_memory(node, uname, depth, data);
+}
+
+void __init early_init_dt_add_memory_arch(u64 base, u64 size)
+{
+#ifdef CONFIG_PPC64
+ if (iommu_is_off) {
+ if (base >= 0x80000000ul)
+ return;
+ if ((base + size) > 0x80000000ul)
+ size = 0x80000000ul - base;
+ }
+#endif
+ /* Keep track of the beginning of memory -and- the size of
+ * the very first block in the device-tree as it represents
+ * the RMA on ppc64 server
+ */
+ if (base < memstart_addr) {
+ memstart_addr = base;
+ first_memblock_size = size;
+ }
+
+ /* Add the chunk to the MEMBLOCK list */
+ memblock_add(base, size);
+}
+
+void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
+{
+ return __va(memblock_alloc(size, align));
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void __init early_init_dt_setup_initrd_arch(unsigned long start,
+ unsigned long end)
+{
+ initrd_start = (unsigned long)__va(start);
+ initrd_end = (unsigned long)__va(end);
+ initrd_below_start_ok = 1;
+}
+#endif
+
+static void __init early_reserve_mem(void)
+{
+ u64 base, size;
+ u64 *reserve_map;
+ unsigned long self_base;
+ unsigned long self_size;
+
+ reserve_map = (u64 *)(((unsigned long)initial_boot_params) +
+ initial_boot_params->off_mem_rsvmap);
+
+ /* before we do anything, lets reserve the dt blob */
+ self_base = __pa((unsigned long)initial_boot_params);
+ self_size = initial_boot_params->totalsize;
+ memblock_reserve(self_base, self_size);
+
+#ifdef CONFIG_BLK_DEV_INITRD
+ /* then reserve the initrd, if any */
+ if (initrd_start && (initrd_end > initrd_start))
+ memblock_reserve(_ALIGN_DOWN(__pa(initrd_start), PAGE_SIZE),
+ _ALIGN_UP(initrd_end, PAGE_SIZE) -
+ _ALIGN_DOWN(initrd_start, PAGE_SIZE));
+#endif /* CONFIG_BLK_DEV_INITRD */
+
+#ifdef CONFIG_PPC32
+ /*
+ * Handle the case where we might be booting from an old kexec
+ * image that setup the mem_rsvmap as pairs of 32-bit values
+ */
+ if (*reserve_map > 0xffffffffull) {
+ u32 base_32, size_32;
+ u32 *reserve_map_32 = (u32 *)reserve_map;
+
+ while (1) {
+ base_32 = *(reserve_map_32++);
+ size_32 = *(reserve_map_32++);
+ if (size_32 == 0)
+ break;
+ /* skip if the reservation is for the blob */
+ if (base_32 == self_base && size_32 == self_size)
+ continue;
+ DBG("reserving: %x -> %x\n", base_32, size_32);
+ memblock_reserve(base_32, size_32);
+ }
+ return;
+ }
+#endif
+ while (1) {
+ base = *(reserve_map++);
+ size = *(reserve_map++);
+ if (size == 0)
+ break;
+ DBG("reserving: %llx -> %llx\n", base, size);
+ memblock_reserve(base, size);
+ }
+}
+
+void __init early_init_devtree(void *params)
+{
+ phys_addr_t limit;
+
+ DBG(" -> early_init_devtree(%p)\n", params);
+
+ /* Setup flat device-tree pointer */
+ initial_boot_params = params;
+
+#ifdef CONFIG_PPC_RTAS
+ /* Some machines might need RTAS info for debugging, grab it now. */
+ of_scan_flat_dt(early_init_dt_scan_rtas, NULL);
+#endif
+
+#ifdef CONFIG_PPC_POWERNV
+ /* Some machines might need OPAL info for debugging, grab it now. */
+ of_scan_flat_dt(early_init_dt_scan_opal, NULL);
+#endif
+
+#ifdef CONFIG_FA_DUMP
+ /* scan tree to see if dump is active during last boot */
+ of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL);
+#endif
+
+ /* Pre-initialize the cmd_line with the content of boot_commmand_line,
+ * which will be empty except when the content of the variable has
+ * been overriden by a bootloading mechanism. This happens typically
+ * with HAL takeover
+ */
+ strlcpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE);
+
+ /* Retrieve various informations from the /chosen node of the
+ * device-tree, including the platform type, initrd location and
+ * size, TCE reserve, and more ...
+ */
+ of_scan_flat_dt(early_init_dt_scan_chosen_ppc, cmd_line);
+
+ /* Scan memory nodes and rebuild MEMBLOCKs */
+ of_scan_flat_dt(early_init_dt_scan_root, NULL);
+ of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
+
+ /* Save command line for /proc/cmdline and then parse parameters */
+ strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE);
+ parse_early_param();
+
+ /* make sure we've parsed cmdline for mem= before this */
+ if (memory_limit)
+ first_memblock_size = min(first_memblock_size, memory_limit);
+ setup_initial_memory_limit(memstart_addr, first_memblock_size);
+ /* Reserve MEMBLOCK regions used by kernel, initrd, dt, etc... */
+ memblock_reserve(PHYSICAL_START, __pa(klimit) - PHYSICAL_START);
+ /* If relocatable, reserve first 32k for interrupt vectors etc. */
+ if (PHYSICAL_START > MEMORY_START)
+ memblock_reserve(MEMORY_START, 0x8000);
+ reserve_kdump_trampoline();
+#ifdef CONFIG_FA_DUMP
+ /*
+ * If we fail to reserve memory for firmware-assisted dump then
+ * fallback to kexec based kdump.
+ */
+ if (fadump_reserve_mem() == 0)
+#endif
+ reserve_crashkernel();
+ early_reserve_mem();
+
+ /*
+ * Ensure that total memory size is page-aligned, because otherwise
+ * mark_bootmem() gets upset.
+ */
+ limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE);
+ memblock_enforce_memory_limit(limit);
+
+ memblock_allow_resize();
+ memblock_dump_all();
+
+ DBG("Phys. mem: %llx\n", memblock_phys_mem_size());
+
+ /* We may need to relocate the flat tree, do it now.
+ * FIXME .. and the initrd too? */
+ move_device_tree();
+
+ allocate_pacas();
+
+ DBG("Scanning CPUs ...\n");
+
+ /* Retrieve CPU related informations from the flat tree
+ * (altivec support, boot CPU ID, ...)
+ */
+ of_scan_flat_dt(early_init_dt_scan_cpus, NULL);
+
+#if defined(CONFIG_SMP) && defined(CONFIG_PPC64)
+ /* We'll later wait for secondaries to check in; there are
+ * NCPUS-1 non-boot CPUs :-)
+ */
+ spinning_secondaries = boot_cpu_count - 1;
+#endif
+
+ DBG(" <- early_init_devtree()\n");
+}
+
+/*******
+ *
+ * New implementation of the OF "find" APIs, return a refcounted
+ * object, call of_node_put() when done. The device tree and list
+ * are protected by a rw_lock.
+ *
+ * Note that property management will need some locking as well,
+ * this isn't dealt with yet.
+ *
+ *******/
+
+/**
+ * of_find_next_cache_node - Find a node's subsidiary cache
+ * @np: node of type "cpu" or "cache"
+ *
+ * Returns a node pointer with refcount incremented, use
+ * of_node_put() on it when done. Caller should hold a reference
+ * to np.
+ */
+struct device_node *of_find_next_cache_node(struct device_node *np)
+{
+ struct device_node *child;
+ const phandle *handle;
+
+ handle = of_get_property(np, "l2-cache", NULL);
+ if (!handle)
+ handle = of_get_property(np, "next-level-cache", NULL);
+
+ if (handle)
+ return of_find_node_by_phandle(*handle);
+
+ /* OF on pmac has nodes instead of properties named "l2-cache"
+ * beneath CPU nodes.
+ */
+ if (!strcmp(np->type, "cpu"))
+ for_each_child_of_node(np, child)
+ if (!strcmp(child->type, "cache"))
+ return child;
+
+ return NULL;
+}
+
+#ifdef CONFIG_PPC_PSERIES
+/*
+ * Fix up the uninitialized fields in a new device node:
+ * name, type and pci-specific fields
+ */
+
+static int of_finish_dynamic_node(struct device_node *node)
+{
+ struct device_node *parent = of_get_parent(node);
+ int err = 0;
+ const phandle *ibm_phandle;
+
+ node->name = of_get_property(node, "name", NULL);
+ node->type = of_get_property(node, "device_type", NULL);
+
+ if (!node->name)
+ node->name = "<NULL>";
+ if (!node->type)
+ node->type = "<NULL>";
+
+ if (!parent) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ /* We don't support that function on PowerMac, at least
+ * not yet
+ */
+ if (machine_is(powermac))
+ return -ENODEV;
+
+ /* fix up new node's phandle field */
+ if ((ibm_phandle = of_get_property(node, "ibm,phandle", NULL)))
+ node->phandle = *ibm_phandle;
+
+out:
+ of_node_put(parent);
+ return err;
+}
+
+static int prom_reconfig_notifier(struct notifier_block *nb,
+ unsigned long action, void *node)
+{
+ int err;
+
+ switch (action) {
+ case PSERIES_RECONFIG_ADD:
+ err = of_finish_dynamic_node(node);
+ if (err < 0)
+ printk(KERN_ERR "finish_node returned %d\n", err);
+ break;
+ default:
+ err = 0;
+ break;
+ }
+ return notifier_from_errno(err);
+}
+
+static struct notifier_block prom_reconfig_nb = {
+ .notifier_call = prom_reconfig_notifier,
+ .priority = 10, /* This one needs to run first */
+};
+
+static int __init prom_reconfig_setup(void)
+{
+ return pSeries_reconfig_notifier_register(&prom_reconfig_nb);
+}
+__initcall(prom_reconfig_setup);
+#endif
+
+/* Find the device node for a given logical cpu number, also returns the cpu
+ * local thread number (index in ibm,interrupt-server#s) if relevant and
+ * asked for (non NULL)
+ */
+struct device_node *of_get_cpu_node(int cpu, unsigned int *thread)
+{
+ int hardid;
+ struct device_node *np;
+
+ hardid = get_hard_smp_processor_id(cpu);
+
+ for_each_node_by_type(np, "cpu") {
+ const u32 *intserv;
+ unsigned int plen, t;
+
+ /* Check for ibm,ppc-interrupt-server#s. If it doesn't exist
+ * fallback to "reg" property and assume no threads
+ */
+ intserv = of_get_property(np, "ibm,ppc-interrupt-server#s",
+ &plen);
+ if (intserv == NULL) {
+ const u32 *reg = of_get_property(np, "reg", NULL);
+ if (reg == NULL)
+ continue;
+ if (*reg == hardid) {
+ if (thread)
+ *thread = 0;
+ return np;
+ }
+ } else {
+ plen /= sizeof(u32);
+ for (t = 0; t < plen; t++) {
+ if (hardid == intserv[t]) {
+ if (thread)
+ *thread = t;
+ return np;
+ }
+ }
+ }
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(of_get_cpu_node);
+
+#if defined(CONFIG_DEBUG_FS) && defined(DEBUG)
+static struct debugfs_blob_wrapper flat_dt_blob;
+
+static int __init export_flat_device_tree(void)
+{
+ struct dentry *d;
+
+ flat_dt_blob.data = initial_boot_params;
+ flat_dt_blob.size = initial_boot_params->totalsize;
+
+ d = debugfs_create_blob("flat-device-tree", S_IFREG | S_IRUSR,
+ powerpc_debugfs_root, &flat_dt_blob);
+ if (!d)
+ return 1;
+
+ return 0;
+}
+__initcall(export_flat_device_tree);
+#endif
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
new file mode 100644
index 00000000..99860273
--- /dev/null
+++ b/arch/powerpc/kernel/prom_init.c
@@ -0,0 +1,2997 @@
+/*
+ * Procedures for interfacing to Open Firmware.
+ *
+ * Paul Mackerras August 1996.
+ * Copyright (C) 1996-2005 Paul Mackerras.
+ *
+ * Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner.
+ * {engebret|bergner}@us.ibm.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG_PROM
+
+#include <stdarg.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/threads.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/proc_fs.h>
+#include <linux/stringify.h>
+#include <linux/delay.h>
+#include <linux/initrd.h>
+#include <linux/bitops.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/irq.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/mmu.h>
+#include <asm/pgtable.h>
+#include <asm/pci.h>
+#include <asm/iommu.h>
+#include <asm/btext.h>
+#include <asm/sections.h>
+#include <asm/machdep.h>
+#include <asm/opal.h>
+
+#include <linux/linux_logo.h>
+
+/*
+ * Eventually bump that one up
+ */
+#define DEVTREE_CHUNK_SIZE 0x100000
+
+/*
+ * This is the size of the local memory reserve map that gets copied
+ * into the boot params passed to the kernel. That size is totally
+ * flexible as the kernel just reads the list until it encounters an
+ * entry with size 0, so it can be changed without breaking binary
+ * compatibility
+ */
+#define MEM_RESERVE_MAP_SIZE 8
+
+/*
+ * prom_init() is called very early on, before the kernel text
+ * and data have been mapped to KERNELBASE. At this point the code
+ * is running at whatever address it has been loaded at.
+ * On ppc32 we compile with -mrelocatable, which means that references
+ * to extern and static variables get relocated automatically.
+ * On ppc64 we have to relocate the references explicitly with
+ * RELOC. (Note that strings count as static variables.)
+ *
+ * Because OF may have mapped I/O devices into the area starting at
+ * KERNELBASE, particularly on CHRP machines, we can't safely call
+ * OF once the kernel has been mapped to KERNELBASE. Therefore all
+ * OF calls must be done within prom_init().
+ *
+ * ADDR is used in calls to call_prom. The 4th and following
+ * arguments to call_prom should be 32-bit values.
+ * On ppc64, 64 bit values are truncated to 32 bits (and
+ * fortunately don't get interpreted as two arguments).
+ */
+#ifdef CONFIG_PPC64
+#define RELOC(x) (*PTRRELOC(&(x)))
+#define ADDR(x) (u32) add_reloc_offset((unsigned long)(x))
+#define OF_WORKAROUNDS 0
+#else
+#define RELOC(x) (x)
+#define ADDR(x) (u32) (x)
+#define OF_WORKAROUNDS of_workarounds
+int of_workarounds;
+#endif
+
+#define OF_WA_CLAIM 1 /* do phys/virt claim separately, then map */
+#define OF_WA_LONGTRAIL 2 /* work around longtrail bugs */
+
+#define PROM_BUG() do { \
+ prom_printf("kernel BUG at %s line 0x%x!\n", \
+ RELOC(__FILE__), __LINE__); \
+ __asm__ __volatile__(".long " BUG_ILLEGAL_INSTR); \
+} while (0)
+
+#ifdef DEBUG_PROM
+#define prom_debug(x...) prom_printf(x)
+#else
+#define prom_debug(x...)
+#endif
+
+
+typedef u32 prom_arg_t;
+
+struct prom_args {
+ u32 service;
+ u32 nargs;
+ u32 nret;
+ prom_arg_t args[10];
+};
+
+struct prom_t {
+ ihandle root;
+ phandle chosen;
+ int cpu;
+ ihandle stdout;
+ ihandle mmumap;
+ ihandle memory;
+};
+
+struct mem_map_entry {
+ u64 base;
+ u64 size;
+};
+
+typedef u32 cell_t;
+
+extern void __start(unsigned long r3, unsigned long r4, unsigned long r5,
+ unsigned long r6, unsigned long r7, unsigned long r8,
+ unsigned long r9);
+
+#ifdef CONFIG_PPC64
+extern int enter_prom(struct prom_args *args, unsigned long entry);
+#else
+static inline int enter_prom(struct prom_args *args, unsigned long entry)
+{
+ return ((int (*)(struct prom_args *))entry)(args);
+}
+#endif
+
+extern void copy_and_flush(unsigned long dest, unsigned long src,
+ unsigned long size, unsigned long offset);
+
+/* prom structure */
+static struct prom_t __initdata prom;
+
+static unsigned long prom_entry __initdata;
+
+#define PROM_SCRATCH_SIZE 256
+
+static char __initdata of_stdout_device[256];
+static char __initdata prom_scratch[PROM_SCRATCH_SIZE];
+
+static unsigned long __initdata dt_header_start;
+static unsigned long __initdata dt_struct_start, dt_struct_end;
+static unsigned long __initdata dt_string_start, dt_string_end;
+
+static unsigned long __initdata prom_initrd_start, prom_initrd_end;
+
+#ifdef CONFIG_PPC64
+static int __initdata prom_iommu_force_on;
+static int __initdata prom_iommu_off;
+static unsigned long __initdata prom_tce_alloc_start;
+static unsigned long __initdata prom_tce_alloc_end;
+#endif
+
+/* Platforms codes are now obsolete in the kernel. Now only used within this
+ * file and ultimately gone too. Feel free to change them if you need, they
+ * are not shared with anything outside of this file anymore
+ */
+#define PLATFORM_PSERIES 0x0100
+#define PLATFORM_PSERIES_LPAR 0x0101
+#define PLATFORM_LPAR 0x0001
+#define PLATFORM_POWERMAC 0x0400
+#define PLATFORM_GENERIC 0x0500
+#define PLATFORM_OPAL 0x0600
+
+static int __initdata of_platform;
+
+static char __initdata prom_cmd_line[COMMAND_LINE_SIZE];
+
+static unsigned long __initdata prom_memory_limit;
+
+static unsigned long __initdata alloc_top;
+static unsigned long __initdata alloc_top_high;
+static unsigned long __initdata alloc_bottom;
+static unsigned long __initdata rmo_top;
+static unsigned long __initdata ram_top;
+
+static struct mem_map_entry __initdata mem_reserve_map[MEM_RESERVE_MAP_SIZE];
+static int __initdata mem_reserve_cnt;
+
+static cell_t __initdata regbuf[1024];
+
+
+/*
+ * Error results ... some OF calls will return "-1" on error, some
+ * will return 0, some will return either. To simplify, here are
+ * macros to use with any ihandle or phandle return value to check if
+ * it is valid
+ */
+
+#define PROM_ERROR (-1u)
+#define PHANDLE_VALID(p) ((p) != 0 && (p) != PROM_ERROR)
+#define IHANDLE_VALID(i) ((i) != 0 && (i) != PROM_ERROR)
+
+
+/* This is the one and *ONLY* place where we actually call open
+ * firmware.
+ */
+
+static int __init call_prom(const char *service, int nargs, int nret, ...)
+{
+ int i;
+ struct prom_args args;
+ va_list list;
+
+ args.service = ADDR(service);
+ args.nargs = nargs;
+ args.nret = nret;
+
+ va_start(list, nret);
+ for (i = 0; i < nargs; i++)
+ args.args[i] = va_arg(list, prom_arg_t);
+ va_end(list);
+
+ for (i = 0; i < nret; i++)
+ args.args[nargs+i] = 0;
+
+ if (enter_prom(&args, RELOC(prom_entry)) < 0)
+ return PROM_ERROR;
+
+ return (nret > 0) ? args.args[nargs] : 0;
+}
+
+static int __init call_prom_ret(const char *service, int nargs, int nret,
+ prom_arg_t *rets, ...)
+{
+ int i;
+ struct prom_args args;
+ va_list list;
+
+ args.service = ADDR(service);
+ args.nargs = nargs;
+ args.nret = nret;
+
+ va_start(list, rets);
+ for (i = 0; i < nargs; i++)
+ args.args[i] = va_arg(list, prom_arg_t);
+ va_end(list);
+
+ for (i = 0; i < nret; i++)
+ args.args[nargs+i] = 0;
+
+ if (enter_prom(&args, RELOC(prom_entry)) < 0)
+ return PROM_ERROR;
+
+ if (rets != NULL)
+ for (i = 1; i < nret; ++i)
+ rets[i-1] = args.args[nargs+i];
+
+ return (nret > 0) ? args.args[nargs] : 0;
+}
+
+
+static void __init prom_print(const char *msg)
+{
+ const char *p, *q;
+ struct prom_t *_prom = &RELOC(prom);
+
+ if (_prom->stdout == 0)
+ return;
+
+ for (p = msg; *p != 0; p = q) {
+ for (q = p; *q != 0 && *q != '\n'; ++q)
+ ;
+ if (q > p)
+ call_prom("write", 3, 1, _prom->stdout, p, q - p);
+ if (*q == 0)
+ break;
+ ++q;
+ call_prom("write", 3, 1, _prom->stdout, ADDR("\r\n"), 2);
+ }
+}
+
+
+static void __init prom_print_hex(unsigned long val)
+{
+ int i, nibbles = sizeof(val)*2;
+ char buf[sizeof(val)*2+1];
+ struct prom_t *_prom = &RELOC(prom);
+
+ for (i = nibbles-1; i >= 0; i--) {
+ buf[i] = (val & 0xf) + '0';
+ if (buf[i] > '9')
+ buf[i] += ('a'-'0'-10);
+ val >>= 4;
+ }
+ buf[nibbles] = '\0';
+ call_prom("write", 3, 1, _prom->stdout, buf, nibbles);
+}
+
+/* max number of decimal digits in an unsigned long */
+#define UL_DIGITS 21
+static void __init prom_print_dec(unsigned long val)
+{
+ int i, size;
+ char buf[UL_DIGITS+1];
+ struct prom_t *_prom = &RELOC(prom);
+
+ for (i = UL_DIGITS-1; i >= 0; i--) {
+ buf[i] = (val % 10) + '0';
+ val = val/10;
+ if (val == 0)
+ break;
+ }
+ /* shift stuff down */
+ size = UL_DIGITS - i;
+ call_prom("write", 3, 1, _prom->stdout, buf+i, size);
+}
+
+static void __init prom_printf(const char *format, ...)
+{
+ const char *p, *q, *s;
+ va_list args;
+ unsigned long v;
+ long vs;
+ struct prom_t *_prom = &RELOC(prom);
+
+ va_start(args, format);
+#ifdef CONFIG_PPC64
+ format = PTRRELOC(format);
+#endif
+ for (p = format; *p != 0; p = q) {
+ for (q = p; *q != 0 && *q != '\n' && *q != '%'; ++q)
+ ;
+ if (q > p)
+ call_prom("write", 3, 1, _prom->stdout, p, q - p);
+ if (*q == 0)
+ break;
+ if (*q == '\n') {
+ ++q;
+ call_prom("write", 3, 1, _prom->stdout,
+ ADDR("\r\n"), 2);
+ continue;
+ }
+ ++q;
+ if (*q == 0)
+ break;
+ switch (*q) {
+ case 's':
+ ++q;
+ s = va_arg(args, const char *);
+ prom_print(s);
+ break;
+ case 'x':
+ ++q;
+ v = va_arg(args, unsigned long);
+ prom_print_hex(v);
+ break;
+ case 'd':
+ ++q;
+ vs = va_arg(args, int);
+ if (vs < 0) {
+ prom_print(RELOC("-"));
+ vs = -vs;
+ }
+ prom_print_dec(vs);
+ break;
+ case 'l':
+ ++q;
+ if (*q == 0)
+ break;
+ else if (*q == 'x') {
+ ++q;
+ v = va_arg(args, unsigned long);
+ prom_print_hex(v);
+ } else if (*q == 'u') { /* '%lu' */
+ ++q;
+ v = va_arg(args, unsigned long);
+ prom_print_dec(v);
+ } else if (*q == 'd') { /* %ld */
+ ++q;
+ vs = va_arg(args, long);
+ if (vs < 0) {
+ prom_print(RELOC("-"));
+ vs = -vs;
+ }
+ prom_print_dec(vs);
+ }
+ break;
+ }
+ }
+}
+
+
+static unsigned int __init prom_claim(unsigned long virt, unsigned long size,
+ unsigned long align)
+{
+ struct prom_t *_prom = &RELOC(prom);
+
+ if (align == 0 && (OF_WORKAROUNDS & OF_WA_CLAIM)) {
+ /*
+ * Old OF requires we claim physical and virtual separately
+ * and then map explicitly (assuming virtual mode)
+ */
+ int ret;
+ prom_arg_t result;
+
+ ret = call_prom_ret("call-method", 5, 2, &result,
+ ADDR("claim"), _prom->memory,
+ align, size, virt);
+ if (ret != 0 || result == -1)
+ return -1;
+ ret = call_prom_ret("call-method", 5, 2, &result,
+ ADDR("claim"), _prom->mmumap,
+ align, size, virt);
+ if (ret != 0) {
+ call_prom("call-method", 4, 1, ADDR("release"),
+ _prom->memory, size, virt);
+ return -1;
+ }
+ /* the 0x12 is M (coherence) + PP == read/write */
+ call_prom("call-method", 6, 1,
+ ADDR("map"), _prom->mmumap, 0x12, size, virt, virt);
+ return virt;
+ }
+ return call_prom("claim", 3, 1, (prom_arg_t)virt, (prom_arg_t)size,
+ (prom_arg_t)align);
+}
+
+static void __init __attribute__((noreturn)) prom_panic(const char *reason)
+{
+#ifdef CONFIG_PPC64
+ reason = PTRRELOC(reason);
+#endif
+ prom_print(reason);
+ /* Do not call exit because it clears the screen on pmac
+ * it also causes some sort of double-fault on early pmacs */
+ if (RELOC(of_platform) == PLATFORM_POWERMAC)
+ asm("trap\n");
+
+ /* ToDo: should put up an SRC here on pSeries */
+ call_prom("exit", 0, 0);
+
+ for (;;) /* should never get here */
+ ;
+}
+
+
+static int __init prom_next_node(phandle *nodep)
+{
+ phandle node;
+
+ if ((node = *nodep) != 0
+ && (*nodep = call_prom("child", 1, 1, node)) != 0)
+ return 1;
+ if ((*nodep = call_prom("peer", 1, 1, node)) != 0)
+ return 1;
+ for (;;) {
+ if ((node = call_prom("parent", 1, 1, node)) == 0)
+ return 0;
+ if ((*nodep = call_prom("peer", 1, 1, node)) != 0)
+ return 1;
+ }
+}
+
+static int inline prom_getprop(phandle node, const char *pname,
+ void *value, size_t valuelen)
+{
+ return call_prom("getprop", 4, 1, node, ADDR(pname),
+ (u32)(unsigned long) value, (u32) valuelen);
+}
+
+static int inline prom_getproplen(phandle node, const char *pname)
+{
+ return call_prom("getproplen", 2, 1, node, ADDR(pname));
+}
+
+static void add_string(char **str, const char *q)
+{
+ char *p = *str;
+
+ while (*q)
+ *p++ = *q++;
+ *p++ = ' ';
+ *str = p;
+}
+
+static char *tohex(unsigned int x)
+{
+ static char digits[] = "0123456789abcdef";
+ static char result[9];
+ int i;
+
+ result[8] = 0;
+ i = 8;
+ do {
+ --i;
+ result[i] = digits[x & 0xf];
+ x >>= 4;
+ } while (x != 0 && i > 0);
+ return &result[i];
+}
+
+static int __init prom_setprop(phandle node, const char *nodename,
+ const char *pname, void *value, size_t valuelen)
+{
+ char cmd[256], *p;
+
+ if (!(OF_WORKAROUNDS & OF_WA_LONGTRAIL))
+ return call_prom("setprop", 4, 1, node, ADDR(pname),
+ (u32)(unsigned long) value, (u32) valuelen);
+
+ /* gah... setprop doesn't work on longtrail, have to use interpret */
+ p = cmd;
+ add_string(&p, "dev");
+ add_string(&p, nodename);
+ add_string(&p, tohex((u32)(unsigned long) value));
+ add_string(&p, tohex(valuelen));
+ add_string(&p, tohex(ADDR(pname)));
+ add_string(&p, tohex(strlen(RELOC(pname))));
+ add_string(&p, "property");
+ *p = 0;
+ return call_prom("interpret", 1, 1, (u32)(unsigned long) cmd);
+}
+
+/* We can't use the standard versions because of RELOC headaches. */
+#define isxdigit(c) (('0' <= (c) && (c) <= '9') \
+ || ('a' <= (c) && (c) <= 'f') \
+ || ('A' <= (c) && (c) <= 'F'))
+
+#define isdigit(c) ('0' <= (c) && (c) <= '9')
+#define islower(c) ('a' <= (c) && (c) <= 'z')
+#define toupper(c) (islower(c) ? ((c) - 'a' + 'A') : (c))
+
+unsigned long prom_strtoul(const char *cp, const char **endp)
+{
+ unsigned long result = 0, base = 10, value;
+
+ if (*cp == '0') {
+ base = 8;
+ cp++;
+ if (toupper(*cp) == 'X') {
+ cp++;
+ base = 16;
+ }
+ }
+
+ while (isxdigit(*cp) &&
+ (value = isdigit(*cp) ? *cp - '0' : toupper(*cp) - 'A' + 10) < base) {
+ result = result * base + value;
+ cp++;
+ }
+
+ if (endp)
+ *endp = cp;
+
+ return result;
+}
+
+unsigned long prom_memparse(const char *ptr, const char **retptr)
+{
+ unsigned long ret = prom_strtoul(ptr, retptr);
+ int shift = 0;
+
+ /*
+ * We can't use a switch here because GCC *may* generate a
+ * jump table which won't work, because we're not running at
+ * the address we're linked at.
+ */
+ if ('G' == **retptr || 'g' == **retptr)
+ shift = 30;
+
+ if ('M' == **retptr || 'm' == **retptr)
+ shift = 20;
+
+ if ('K' == **retptr || 'k' == **retptr)
+ shift = 10;
+
+ if (shift) {
+ ret <<= shift;
+ (*retptr)++;
+ }
+
+ return ret;
+}
+
+/*
+ * Early parsing of the command line passed to the kernel, used for
+ * "mem=x" and the options that affect the iommu
+ */
+static void __init early_cmdline_parse(void)
+{
+ struct prom_t *_prom = &RELOC(prom);
+ const char *opt;
+
+ char *p;
+ int l = 0;
+
+ RELOC(prom_cmd_line[0]) = 0;
+ p = RELOC(prom_cmd_line);
+ if ((long)_prom->chosen > 0)
+ l = prom_getprop(_prom->chosen, "bootargs", p, COMMAND_LINE_SIZE-1);
+#ifdef CONFIG_CMDLINE
+ if (l <= 0 || p[0] == '\0') /* dbl check */
+ strlcpy(RELOC(prom_cmd_line),
+ RELOC(CONFIG_CMDLINE), sizeof(prom_cmd_line));
+#endif /* CONFIG_CMDLINE */
+ prom_printf("command line: %s\n", RELOC(prom_cmd_line));
+
+#ifdef CONFIG_PPC64
+ opt = strstr(RELOC(prom_cmd_line), RELOC("iommu="));
+ if (opt) {
+ prom_printf("iommu opt is: %s\n", opt);
+ opt += 6;
+ while (*opt && *opt == ' ')
+ opt++;
+ if (!strncmp(opt, RELOC("off"), 3))
+ RELOC(prom_iommu_off) = 1;
+ else if (!strncmp(opt, RELOC("force"), 5))
+ RELOC(prom_iommu_force_on) = 1;
+ }
+#endif
+ opt = strstr(RELOC(prom_cmd_line), RELOC("mem="));
+ if (opt) {
+ opt += 4;
+ RELOC(prom_memory_limit) = prom_memparse(opt, (const char **)&opt);
+#ifdef CONFIG_PPC64
+ /* Align to 16 MB == size of ppc64 large page */
+ RELOC(prom_memory_limit) = ALIGN(RELOC(prom_memory_limit), 0x1000000);
+#endif
+ }
+}
+
+#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
+/*
+ * There are two methods for telling firmware what our capabilities are.
+ * Newer machines have an "ibm,client-architecture-support" method on the
+ * root node. For older machines, we have to call the "process-elf-header"
+ * method in the /packages/elf-loader node, passing it a fake 32-bit
+ * ELF header containing a couple of PT_NOTE sections that contain
+ * structures that contain various information.
+ */
+
+/*
+ * New method - extensible architecture description vector.
+ *
+ * Because the description vector contains a mix of byte and word
+ * values, we declare it as an unsigned char array, and use this
+ * macro to put word values in.
+ */
+#define W(x) ((x) >> 24) & 0xff, ((x) >> 16) & 0xff, \
+ ((x) >> 8) & 0xff, (x) & 0xff
+
+/* Option vector bits - generic bits in byte 1 */
+#define OV_IGNORE 0x80 /* ignore this vector */
+#define OV_CESSATION_POLICY 0x40 /* halt if unsupported option present*/
+
+/* Option vector 1: processor architectures supported */
+#define OV1_PPC_2_00 0x80 /* set if we support PowerPC 2.00 */
+#define OV1_PPC_2_01 0x40 /* set if we support PowerPC 2.01 */
+#define OV1_PPC_2_02 0x20 /* set if we support PowerPC 2.02 */
+#define OV1_PPC_2_03 0x10 /* set if we support PowerPC 2.03 */
+#define OV1_PPC_2_04 0x08 /* set if we support PowerPC 2.04 */
+#define OV1_PPC_2_05 0x04 /* set if we support PowerPC 2.05 */
+#define OV1_PPC_2_06 0x02 /* set if we support PowerPC 2.06 */
+
+/* Option vector 2: Open Firmware options supported */
+#define OV2_REAL_MODE 0x20 /* set if we want OF in real mode */
+
+/* Option vector 3: processor options supported */
+#define OV3_FP 0x80 /* floating point */
+#define OV3_VMX 0x40 /* VMX/Altivec */
+#define OV3_DFP 0x20 /* decimal FP */
+
+/* Option vector 5: PAPR/OF options supported */
+#define OV5_LPAR 0x80 /* logical partitioning supported */
+#define OV5_SPLPAR 0x40 /* shared-processor LPAR supported */
+/* ibm,dynamic-reconfiguration-memory property supported */
+#define OV5_DRCONF_MEMORY 0x20
+#define OV5_LARGE_PAGES 0x10 /* large pages supported */
+#define OV5_DONATE_DEDICATE_CPU 0x02 /* donate dedicated CPU support */
+/* PCIe/MSI support. Without MSI full PCIe is not supported */
+#ifdef CONFIG_PCI_MSI
+#define OV5_MSI 0x01 /* PCIe/MSI support */
+#else
+#define OV5_MSI 0x00
+#endif /* CONFIG_PCI_MSI */
+#ifdef CONFIG_PPC_SMLPAR
+#define OV5_CMO 0x80 /* Cooperative Memory Overcommitment */
+#define OV5_XCMO 0x40 /* Page Coalescing */
+#else
+#define OV5_CMO 0x00
+#define OV5_XCMO 0x00
+#endif
+#define OV5_TYPE1_AFFINITY 0x80 /* Type 1 NUMA affinity */
+
+/* Option Vector 6: IBM PAPR hints */
+#define OV6_LINUX 0x02 /* Linux is our OS */
+
+/*
+ * The architecture vector has an array of PVR mask/value pairs,
+ * followed by # option vectors - 1, followed by the option vectors.
+ */
+static unsigned char ibm_architecture_vec[] = {
+ W(0xfffe0000), W(0x003a0000), /* POWER5/POWER5+ */
+ W(0xffff0000), W(0x003e0000), /* POWER6 */
+ W(0xffff0000), W(0x003f0000), /* POWER7 */
+ W(0xffffffff), W(0x0f000003), /* all 2.06-compliant */
+ W(0xffffffff), W(0x0f000002), /* all 2.05-compliant */
+ W(0xfffffffe), W(0x0f000001), /* all 2.04-compliant and earlier */
+ 6 - 1, /* 6 option vectors */
+
+ /* option vector 1: processor architectures supported */
+ 3 - 2, /* length */
+ 0, /* don't ignore, don't halt */
+ OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 |
+ OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06,
+
+ /* option vector 2: Open Firmware options supported */
+ 34 - 2, /* length */
+ OV2_REAL_MODE,
+ 0, 0,
+ W(0xffffffff), /* real_base */
+ W(0xffffffff), /* real_size */
+ W(0xffffffff), /* virt_base */
+ W(0xffffffff), /* virt_size */
+ W(0xffffffff), /* load_base */
+ W(256), /* 256MB min RMA */
+ W(0xffffffff), /* full client load */
+ 0, /* min RMA percentage of total RAM */
+ 48, /* max log_2(hash table size) */
+
+ /* option vector 3: processor options supported */
+ 3 - 2, /* length */
+ 0, /* don't ignore, don't halt */
+ OV3_FP | OV3_VMX | OV3_DFP,
+
+ /* option vector 4: IBM PAPR implementation */
+ 2 - 2, /* length */
+ 0, /* don't halt */
+
+ /* option vector 5: PAPR/OF options */
+ 13 - 2, /* length */
+ 0, /* don't ignore, don't halt */
+ OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY |
+ OV5_DONATE_DEDICATE_CPU | OV5_MSI,
+ 0,
+ OV5_CMO | OV5_XCMO,
+ OV5_TYPE1_AFFINITY,
+ 0,
+ 0,
+ 0,
+ /* WARNING: The offset of the "number of cores" field below
+ * must match by the macro below. Update the definition if
+ * the structure layout changes.
+ */
+#define IBM_ARCH_VEC_NRCORES_OFFSET 100
+ W(NR_CPUS), /* number of cores supported */
+
+ /* option vector 6: IBM PAPR hints */
+ 4 - 2, /* length */
+ 0,
+ 0,
+ OV6_LINUX,
+
+};
+
+/* Old method - ELF header with PT_NOTE sections */
+static struct fake_elf {
+ Elf32_Ehdr elfhdr;
+ Elf32_Phdr phdr[2];
+ struct chrpnote {
+ u32 namesz;
+ u32 descsz;
+ u32 type;
+ char name[8]; /* "PowerPC" */
+ struct chrpdesc {
+ u32 real_mode;
+ u32 real_base;
+ u32 real_size;
+ u32 virt_base;
+ u32 virt_size;
+ u32 load_base;
+ } chrpdesc;
+ } chrpnote;
+ struct rpanote {
+ u32 namesz;
+ u32 descsz;
+ u32 type;
+ char name[24]; /* "IBM,RPA-Client-Config" */
+ struct rpadesc {
+ u32 lpar_affinity;
+ u32 min_rmo_size;
+ u32 min_rmo_percent;
+ u32 max_pft_size;
+ u32 splpar;
+ u32 min_load;
+ u32 new_mem_def;
+ u32 ignore_me;
+ } rpadesc;
+ } rpanote;
+} fake_elf = {
+ .elfhdr = {
+ .e_ident = { 0x7f, 'E', 'L', 'F',
+ ELFCLASS32, ELFDATA2MSB, EV_CURRENT },
+ .e_type = ET_EXEC, /* yeah right */
+ .e_machine = EM_PPC,
+ .e_version = EV_CURRENT,
+ .e_phoff = offsetof(struct fake_elf, phdr),
+ .e_phentsize = sizeof(Elf32_Phdr),
+ .e_phnum = 2
+ },
+ .phdr = {
+ [0] = {
+ .p_type = PT_NOTE,
+ .p_offset = offsetof(struct fake_elf, chrpnote),
+ .p_filesz = sizeof(struct chrpnote)
+ }, [1] = {
+ .p_type = PT_NOTE,
+ .p_offset = offsetof(struct fake_elf, rpanote),
+ .p_filesz = sizeof(struct rpanote)
+ }
+ },
+ .chrpnote = {
+ .namesz = sizeof("PowerPC"),
+ .descsz = sizeof(struct chrpdesc),
+ .type = 0x1275,
+ .name = "PowerPC",
+ .chrpdesc = {
+ .real_mode = ~0U, /* ~0 means "don't care" */
+ .real_base = ~0U,
+ .real_size = ~0U,
+ .virt_base = ~0U,
+ .virt_size = ~0U,
+ .load_base = ~0U
+ },
+ },
+ .rpanote = {
+ .namesz = sizeof("IBM,RPA-Client-Config"),
+ .descsz = sizeof(struct rpadesc),
+ .type = 0x12759999,
+ .name = "IBM,RPA-Client-Config",
+ .rpadesc = {
+ .lpar_affinity = 0,
+ .min_rmo_size = 64, /* in megabytes */
+ .min_rmo_percent = 0,
+ .max_pft_size = 48, /* 2^48 bytes max PFT size */
+ .splpar = 1,
+ .min_load = ~0U,
+ .new_mem_def = 0
+ }
+ }
+};
+
+static int __init prom_count_smt_threads(void)
+{
+ phandle node;
+ char type[64];
+ unsigned int plen;
+
+ /* Pick up th first CPU node we can find */
+ for (node = 0; prom_next_node(&node); ) {
+ type[0] = 0;
+ prom_getprop(node, "device_type", type, sizeof(type));
+
+ if (strcmp(type, RELOC("cpu")))
+ continue;
+ /*
+ * There is an entry for each smt thread, each entry being
+ * 4 bytes long. All cpus should have the same number of
+ * smt threads, so return after finding the first.
+ */
+ plen = prom_getproplen(node, "ibm,ppc-interrupt-server#s");
+ if (plen == PROM_ERROR)
+ break;
+ plen >>= 2;
+ prom_debug("Found %lu smt threads per core\n", (unsigned long)plen);
+
+ /* Sanity check */
+ if (plen < 1 || plen > 64) {
+ prom_printf("Threads per core %lu out of bounds, assuming 1\n",
+ (unsigned long)plen);
+ return 1;
+ }
+ return plen;
+ }
+ prom_debug("No threads found, assuming 1 per core\n");
+
+ return 1;
+
+}
+
+
+static void __init prom_send_capabilities(void)
+{
+ ihandle elfloader, root;
+ prom_arg_t ret;
+ u32 *cores;
+
+ root = call_prom("open", 1, 1, ADDR("/"));
+ if (root != 0) {
+ /* We need to tell the FW about the number of cores we support.
+ *
+ * To do that, we count the number of threads on the first core
+ * (we assume this is the same for all cores) and use it to
+ * divide NR_CPUS.
+ */
+ cores = (u32 *)PTRRELOC(&ibm_architecture_vec[IBM_ARCH_VEC_NRCORES_OFFSET]);
+ if (*cores != NR_CPUS) {
+ prom_printf("WARNING ! "
+ "ibm_architecture_vec structure inconsistent: %lu!\n",
+ *cores);
+ } else {
+ *cores = DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads());
+ prom_printf("Max number of cores passed to firmware: %lu (NR_CPUS = %lu)\n",
+ *cores, NR_CPUS);
+ }
+
+ /* try calling the ibm,client-architecture-support method */
+ prom_printf("Calling ibm,client-architecture-support...");
+ if (call_prom_ret("call-method", 3, 2, &ret,
+ ADDR("ibm,client-architecture-support"),
+ root,
+ ADDR(ibm_architecture_vec)) == 0) {
+ /* the call exists... */
+ if (ret)
+ prom_printf("\nWARNING: ibm,client-architecture"
+ "-support call FAILED!\n");
+ call_prom("close", 1, 0, root);
+ prom_printf(" done\n");
+ return;
+ }
+ call_prom("close", 1, 0, root);
+ prom_printf(" not implemented\n");
+ }
+
+ /* no ibm,client-architecture-support call, try the old way */
+ elfloader = call_prom("open", 1, 1, ADDR("/packages/elf-loader"));
+ if (elfloader == 0) {
+ prom_printf("couldn't open /packages/elf-loader\n");
+ return;
+ }
+ call_prom("call-method", 3, 1, ADDR("process-elf-header"),
+ elfloader, ADDR(&fake_elf));
+ call_prom("close", 1, 0, elfloader);
+}
+#endif
+
+/*
+ * Memory allocation strategy... our layout is normally:
+ *
+ * at 14Mb or more we have vmlinux, then a gap and initrd. In some
+ * rare cases, initrd might end up being before the kernel though.
+ * We assume this won't override the final kernel at 0, we have no
+ * provision to handle that in this version, but it should hopefully
+ * never happen.
+ *
+ * alloc_top is set to the top of RMO, eventually shrink down if the
+ * TCEs overlap
+ *
+ * alloc_bottom is set to the top of kernel/initrd
+ *
+ * from there, allocations are done this way : rtas is allocated
+ * topmost, and the device-tree is allocated from the bottom. We try
+ * to grow the device-tree allocation as we progress. If we can't,
+ * then we fail, we don't currently have a facility to restart
+ * elsewhere, but that shouldn't be necessary.
+ *
+ * Note that calls to reserve_mem have to be done explicitly, memory
+ * allocated with either alloc_up or alloc_down isn't automatically
+ * reserved.
+ */
+
+
+/*
+ * Allocates memory in the RMO upward from the kernel/initrd
+ *
+ * When align is 0, this is a special case, it means to allocate in place
+ * at the current location of alloc_bottom or fail (that is basically
+ * extending the previous allocation). Used for the device-tree flattening
+ */
+static unsigned long __init alloc_up(unsigned long size, unsigned long align)
+{
+ unsigned long base = RELOC(alloc_bottom);
+ unsigned long addr = 0;
+
+ if (align)
+ base = _ALIGN_UP(base, align);
+ prom_debug("alloc_up(%x, %x)\n", size, align);
+ if (RELOC(ram_top) == 0)
+ prom_panic("alloc_up() called with mem not initialized\n");
+
+ if (align)
+ base = _ALIGN_UP(RELOC(alloc_bottom), align);
+ else
+ base = RELOC(alloc_bottom);
+
+ for(; (base + size) <= RELOC(alloc_top);
+ base = _ALIGN_UP(base + 0x100000, align)) {
+ prom_debug(" trying: 0x%x\n\r", base);
+ addr = (unsigned long)prom_claim(base, size, 0);
+ if (addr != PROM_ERROR && addr != 0)
+ break;
+ addr = 0;
+ if (align == 0)
+ break;
+ }
+ if (addr == 0)
+ return 0;
+ RELOC(alloc_bottom) = addr + size;
+
+ prom_debug(" -> %x\n", addr);
+ prom_debug(" alloc_bottom : %x\n", RELOC(alloc_bottom));
+ prom_debug(" alloc_top : %x\n", RELOC(alloc_top));
+ prom_debug(" alloc_top_hi : %x\n", RELOC(alloc_top_high));
+ prom_debug(" rmo_top : %x\n", RELOC(rmo_top));
+ prom_debug(" ram_top : %x\n", RELOC(ram_top));
+
+ return addr;
+}
+
+/*
+ * Allocates memory downward, either from top of RMO, or if highmem
+ * is set, from the top of RAM. Note that this one doesn't handle
+ * failures. It does claim memory if highmem is not set.
+ */
+static unsigned long __init alloc_down(unsigned long size, unsigned long align,
+ int highmem)
+{
+ unsigned long base, addr = 0;
+
+ prom_debug("alloc_down(%x, %x, %s)\n", size, align,
+ highmem ? RELOC("(high)") : RELOC("(low)"));
+ if (RELOC(ram_top) == 0)
+ prom_panic("alloc_down() called with mem not initialized\n");
+
+ if (highmem) {
+ /* Carve out storage for the TCE table. */
+ addr = _ALIGN_DOWN(RELOC(alloc_top_high) - size, align);
+ if (addr <= RELOC(alloc_bottom))
+ return 0;
+ /* Will we bump into the RMO ? If yes, check out that we
+ * didn't overlap existing allocations there, if we did,
+ * we are dead, we must be the first in town !
+ */
+ if (addr < RELOC(rmo_top)) {
+ /* Good, we are first */
+ if (RELOC(alloc_top) == RELOC(rmo_top))
+ RELOC(alloc_top) = RELOC(rmo_top) = addr;
+ else
+ return 0;
+ }
+ RELOC(alloc_top_high) = addr;
+ goto bail;
+ }
+
+ base = _ALIGN_DOWN(RELOC(alloc_top) - size, align);
+ for (; base > RELOC(alloc_bottom);
+ base = _ALIGN_DOWN(base - 0x100000, align)) {
+ prom_debug(" trying: 0x%x\n\r", base);
+ addr = (unsigned long)prom_claim(base, size, 0);
+ if (addr != PROM_ERROR && addr != 0)
+ break;
+ addr = 0;
+ }
+ if (addr == 0)
+ return 0;
+ RELOC(alloc_top) = addr;
+
+ bail:
+ prom_debug(" -> %x\n", addr);
+ prom_debug(" alloc_bottom : %x\n", RELOC(alloc_bottom));
+ prom_debug(" alloc_top : %x\n", RELOC(alloc_top));
+ prom_debug(" alloc_top_hi : %x\n", RELOC(alloc_top_high));
+ prom_debug(" rmo_top : %x\n", RELOC(rmo_top));
+ prom_debug(" ram_top : %x\n", RELOC(ram_top));
+
+ return addr;
+}
+
+/*
+ * Parse a "reg" cell
+ */
+static unsigned long __init prom_next_cell(int s, cell_t **cellp)
+{
+ cell_t *p = *cellp;
+ unsigned long r = 0;
+
+ /* Ignore more than 2 cells */
+ while (s > sizeof(unsigned long) / 4) {
+ p++;
+ s--;
+ }
+ r = *p++;
+#ifdef CONFIG_PPC64
+ if (s > 1) {
+ r <<= 32;
+ r |= *(p++);
+ }
+#endif
+ *cellp = p;
+ return r;
+}
+
+/*
+ * Very dumb function for adding to the memory reserve list, but
+ * we don't need anything smarter at this point
+ *
+ * XXX Eventually check for collisions. They should NEVER happen.
+ * If problems seem to show up, it would be a good start to track
+ * them down.
+ */
+static void __init reserve_mem(u64 base, u64 size)
+{
+ u64 top = base + size;
+ unsigned long cnt = RELOC(mem_reserve_cnt);
+
+ if (size == 0)
+ return;
+
+ /* We need to always keep one empty entry so that we
+ * have our terminator with "size" set to 0 since we are
+ * dumb and just copy this entire array to the boot params
+ */
+ base = _ALIGN_DOWN(base, PAGE_SIZE);
+ top = _ALIGN_UP(top, PAGE_SIZE);
+ size = top - base;
+
+ if (cnt >= (MEM_RESERVE_MAP_SIZE - 1))
+ prom_panic("Memory reserve map exhausted !\n");
+ RELOC(mem_reserve_map)[cnt].base = base;
+ RELOC(mem_reserve_map)[cnt].size = size;
+ RELOC(mem_reserve_cnt) = cnt + 1;
+}
+
+/*
+ * Initialize memory allocation mechanism, parse "memory" nodes and
+ * obtain that way the top of memory and RMO to setup out local allocator
+ */
+static void __init prom_init_mem(void)
+{
+ phandle node;
+ char *path, type[64];
+ unsigned int plen;
+ cell_t *p, *endp;
+ struct prom_t *_prom = &RELOC(prom);
+ u32 rac, rsc;
+
+ /*
+ * We iterate the memory nodes to find
+ * 1) top of RMO (first node)
+ * 2) top of memory
+ */
+ rac = 2;
+ prom_getprop(_prom->root, "#address-cells", &rac, sizeof(rac));
+ rsc = 1;
+ prom_getprop(_prom->root, "#size-cells", &rsc, sizeof(rsc));
+ prom_debug("root_addr_cells: %x\n", (unsigned long) rac);
+ prom_debug("root_size_cells: %x\n", (unsigned long) rsc);
+
+ prom_debug("scanning memory:\n");
+ path = RELOC(prom_scratch);
+
+ for (node = 0; prom_next_node(&node); ) {
+ type[0] = 0;
+ prom_getprop(node, "device_type", type, sizeof(type));
+
+ if (type[0] == 0) {
+ /*
+ * CHRP Longtrail machines have no device_type
+ * on the memory node, so check the name instead...
+ */
+ prom_getprop(node, "name", type, sizeof(type));
+ }
+ if (strcmp(type, RELOC("memory")))
+ continue;
+
+ plen = prom_getprop(node, "reg", RELOC(regbuf), sizeof(regbuf));
+ if (plen > sizeof(regbuf)) {
+ prom_printf("memory node too large for buffer !\n");
+ plen = sizeof(regbuf);
+ }
+ p = RELOC(regbuf);
+ endp = p + (plen / sizeof(cell_t));
+
+#ifdef DEBUG_PROM
+ memset(path, 0, PROM_SCRATCH_SIZE);
+ call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1);
+ prom_debug(" node %s :\n", path);
+#endif /* DEBUG_PROM */
+
+ while ((endp - p) >= (rac + rsc)) {
+ unsigned long base, size;
+
+ base = prom_next_cell(rac, &p);
+ size = prom_next_cell(rsc, &p);
+
+ if (size == 0)
+ continue;
+ prom_debug(" %x %x\n", base, size);
+ if (base == 0 && (RELOC(of_platform) & PLATFORM_LPAR))
+ RELOC(rmo_top) = size;
+ if ((base + size) > RELOC(ram_top))
+ RELOC(ram_top) = base + size;
+ }
+ }
+
+ RELOC(alloc_bottom) = PAGE_ALIGN((unsigned long)&RELOC(_end) + 0x4000);
+
+ /*
+ * If prom_memory_limit is set we reduce the upper limits *except* for
+ * alloc_top_high. This must be the real top of RAM so we can put
+ * TCE's up there.
+ */
+
+ RELOC(alloc_top_high) = RELOC(ram_top);
+
+ if (RELOC(prom_memory_limit)) {
+ if (RELOC(prom_memory_limit) <= RELOC(alloc_bottom)) {
+ prom_printf("Ignoring mem=%x <= alloc_bottom.\n",
+ RELOC(prom_memory_limit));
+ RELOC(prom_memory_limit) = 0;
+ } else if (RELOC(prom_memory_limit) >= RELOC(ram_top)) {
+ prom_printf("Ignoring mem=%x >= ram_top.\n",
+ RELOC(prom_memory_limit));
+ RELOC(prom_memory_limit) = 0;
+ } else {
+ RELOC(ram_top) = RELOC(prom_memory_limit);
+ RELOC(rmo_top) = min(RELOC(rmo_top), RELOC(prom_memory_limit));
+ }
+ }
+
+ /*
+ * Setup our top alloc point, that is top of RMO or top of
+ * segment 0 when running non-LPAR.
+ * Some RS64 machines have buggy firmware where claims up at
+ * 1GB fail. Cap at 768MB as a workaround.
+ * Since 768MB is plenty of room, and we need to cap to something
+ * reasonable on 32-bit, cap at 768MB on all machines.
+ */
+ if (!RELOC(rmo_top))
+ RELOC(rmo_top) = RELOC(ram_top);
+ RELOC(rmo_top) = min(0x30000000ul, RELOC(rmo_top));
+ RELOC(alloc_top) = RELOC(rmo_top);
+ RELOC(alloc_top_high) = RELOC(ram_top);
+
+ /*
+ * Check if we have an initrd after the kernel but still inside
+ * the RMO. If we do move our bottom point to after it.
+ */
+ if (RELOC(prom_initrd_start) &&
+ RELOC(prom_initrd_start) < RELOC(rmo_top) &&
+ RELOC(prom_initrd_end) > RELOC(alloc_bottom))
+ RELOC(alloc_bottom) = PAGE_ALIGN(RELOC(prom_initrd_end));
+
+ prom_printf("memory layout at init:\n");
+ prom_printf(" memory_limit : %x (16 MB aligned)\n", RELOC(prom_memory_limit));
+ prom_printf(" alloc_bottom : %x\n", RELOC(alloc_bottom));
+ prom_printf(" alloc_top : %x\n", RELOC(alloc_top));
+ prom_printf(" alloc_top_hi : %x\n", RELOC(alloc_top_high));
+ prom_printf(" rmo_top : %x\n", RELOC(rmo_top));
+ prom_printf(" ram_top : %x\n", RELOC(ram_top));
+}
+
+static void __init prom_close_stdin(void)
+{
+ struct prom_t *_prom = &RELOC(prom);
+ ihandle val;
+
+ if (prom_getprop(_prom->chosen, "stdin", &val, sizeof(val)) > 0)
+ call_prom("close", 1, 0, val);
+}
+
+#ifdef CONFIG_PPC_POWERNV
+
+static u64 __initdata prom_opal_size;
+static u64 __initdata prom_opal_align;
+static int __initdata prom_rtas_start_cpu;
+static u64 __initdata prom_rtas_data;
+static u64 __initdata prom_rtas_entry;
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
+static u64 __initdata prom_opal_base;
+static u64 __initdata prom_opal_entry;
+#endif
+
+/* XXX Don't change this structure without updating opal-takeover.S */
+static struct opal_secondary_data {
+ s64 ack; /* 0 */
+ u64 go; /* 8 */
+ struct opal_takeover_args args; /* 16 */
+} opal_secondary_data;
+
+extern char opal_secondary_entry;
+
+static void prom_query_opal(void)
+{
+ long rc;
+
+ /* We must not query for OPAL presence on a machine that
+ * supports TNK takeover (970 blades), as this uses the same
+ * h-call with different arguments and will crash
+ */
+ if (PHANDLE_VALID(call_prom("finddevice", 1, 1,
+ ADDR("/tnk-memory-map")))) {
+ prom_printf("TNK takeover detected, skipping OPAL check\n");
+ return;
+ }
+
+ prom_printf("Querying for OPAL presence... ");
+ rc = opal_query_takeover(&RELOC(prom_opal_size),
+ &RELOC(prom_opal_align));
+ prom_debug("(rc = %ld) ", rc);
+ if (rc != 0) {
+ prom_printf("not there.\n");
+ return;
+ }
+ RELOC(of_platform) = PLATFORM_OPAL;
+ prom_printf(" there !\n");
+ prom_debug(" opal_size = 0x%lx\n", RELOC(prom_opal_size));
+ prom_debug(" opal_align = 0x%lx\n", RELOC(prom_opal_align));
+ if (RELOC(prom_opal_align) < 0x10000)
+ RELOC(prom_opal_align) = 0x10000;
+}
+
+static int prom_rtas_call(int token, int nargs, int nret, int *outputs, ...)
+{
+ struct rtas_args rtas_args;
+ va_list list;
+ int i;
+
+ rtas_args.token = token;
+ rtas_args.nargs = nargs;
+ rtas_args.nret = nret;
+ rtas_args.rets = (rtas_arg_t *)&(rtas_args.args[nargs]);
+ va_start(list, outputs);
+ for (i = 0; i < nargs; ++i)
+ rtas_args.args[i] = va_arg(list, rtas_arg_t);
+ va_end(list);
+
+ for (i = 0; i < nret; ++i)
+ rtas_args.rets[i] = 0;
+
+ opal_enter_rtas(&rtas_args, RELOC(prom_rtas_data),
+ RELOC(prom_rtas_entry));
+
+ if (nret > 1 && outputs != NULL)
+ for (i = 0; i < nret-1; ++i)
+ outputs[i] = rtas_args.rets[i+1];
+ return (nret > 0)? rtas_args.rets[0]: 0;
+}
+
+static void __init prom_opal_hold_cpus(void)
+{
+ int i, cnt, cpu, rc;
+ long j;
+ phandle node;
+ char type[64];
+ u32 servers[8];
+ struct prom_t *_prom = &RELOC(prom);
+ void *entry = (unsigned long *)&RELOC(opal_secondary_entry);
+ struct opal_secondary_data *data = &RELOC(opal_secondary_data);
+
+ prom_debug("prom_opal_hold_cpus: start...\n");
+ prom_debug(" - entry = 0x%x\n", entry);
+ prom_debug(" - data = 0x%x\n", data);
+
+ data->ack = -1;
+ data->go = 0;
+
+ /* look for cpus */
+ for (node = 0; prom_next_node(&node); ) {
+ type[0] = 0;
+ prom_getprop(node, "device_type", type, sizeof(type));
+ if (strcmp(type, RELOC("cpu")) != 0)
+ continue;
+
+ /* Skip non-configured cpus. */
+ if (prom_getprop(node, "status", type, sizeof(type)) > 0)
+ if (strcmp(type, RELOC("okay")) != 0)
+ continue;
+
+ cnt = prom_getprop(node, "ibm,ppc-interrupt-server#s", servers,
+ sizeof(servers));
+ if (cnt == PROM_ERROR)
+ break;
+ cnt >>= 2;
+ for (i = 0; i < cnt; i++) {
+ cpu = servers[i];
+ prom_debug("CPU %d ... ", cpu);
+ if (cpu == _prom->cpu) {
+ prom_debug("booted !\n");
+ continue;
+ }
+ prom_debug("starting ... ");
+
+ /* Init the acknowledge var which will be reset by
+ * the secondary cpu when it awakens from its OF
+ * spinloop.
+ */
+ data->ack = -1;
+ rc = prom_rtas_call(RELOC(prom_rtas_start_cpu), 3, 1,
+ NULL, cpu, entry, data);
+ prom_debug("rtas rc=%d ...", rc);
+
+ for (j = 0; j < 100000000 && data->ack == -1; j++) {
+ HMT_low();
+ mb();
+ }
+ HMT_medium();
+ if (data->ack != -1)
+ prom_debug("done, PIR=0x%x\n", data->ack);
+ else
+ prom_debug("timeout !\n");
+ }
+ }
+ prom_debug("prom_opal_hold_cpus: end...\n");
+}
+
+static void prom_opal_takeover(void)
+{
+ struct opal_secondary_data *data = &RELOC(opal_secondary_data);
+ struct opal_takeover_args *args = &data->args;
+ u64 align = RELOC(prom_opal_align);
+ u64 top_addr, opal_addr;
+
+ args->k_image = (u64)RELOC(_stext);
+ args->k_size = _end - _stext;
+ args->k_entry = 0;
+ args->k_entry2 = 0x60;
+
+ top_addr = _ALIGN_UP(args->k_size, align);
+
+ if (RELOC(prom_initrd_start) != 0) {
+ args->rd_image = RELOC(prom_initrd_start);
+ args->rd_size = RELOC(prom_initrd_end) - args->rd_image;
+ args->rd_loc = top_addr;
+ top_addr = _ALIGN_UP(args->rd_loc + args->rd_size, align);
+ }
+
+ /* Pickup an address for the HAL. We want to go really high
+ * up to avoid problem with future kexecs. On the other hand
+ * we don't want to be all over the TCEs on P5IOC2 machines
+ * which are going to be up there too. We assume the machine
+ * has plenty of memory, and we ask for the HAL for now to
+ * be just below the 1G point, or above the initrd
+ */
+ opal_addr = _ALIGN_DOWN(0x40000000 - RELOC(prom_opal_size), align);
+ if (opal_addr < top_addr)
+ opal_addr = top_addr;
+ args->hal_addr = opal_addr;
+
+ /* Copy the command line to the kernel image */
+ strlcpy(RELOC(boot_command_line), RELOC(prom_cmd_line),
+ COMMAND_LINE_SIZE);
+
+ prom_debug(" k_image = 0x%lx\n", args->k_image);
+ prom_debug(" k_size = 0x%lx\n", args->k_size);
+ prom_debug(" k_entry = 0x%lx\n", args->k_entry);
+ prom_debug(" k_entry2 = 0x%lx\n", args->k_entry2);
+ prom_debug(" hal_addr = 0x%lx\n", args->hal_addr);
+ prom_debug(" rd_image = 0x%lx\n", args->rd_image);
+ prom_debug(" rd_size = 0x%lx\n", args->rd_size);
+ prom_debug(" rd_loc = 0x%lx\n", args->rd_loc);
+ prom_printf("Performing OPAL takeover,this can take a few minutes..\n");
+ prom_close_stdin();
+ mb();
+ data->go = 1;
+ for (;;)
+ opal_do_takeover(args);
+}
+
+/*
+ * Allocate room for and instantiate OPAL
+ */
+static void __init prom_instantiate_opal(void)
+{
+ phandle opal_node;
+ ihandle opal_inst;
+ u64 base, entry;
+ u64 size = 0, align = 0x10000;
+ u32 rets[2];
+
+ prom_debug("prom_instantiate_opal: start...\n");
+
+ opal_node = call_prom("finddevice", 1, 1, ADDR("/ibm,opal"));
+ prom_debug("opal_node: %x\n", opal_node);
+ if (!PHANDLE_VALID(opal_node))
+ return;
+
+ prom_getprop(opal_node, "opal-runtime-size", &size, sizeof(size));
+ if (size == 0)
+ return;
+ prom_getprop(opal_node, "opal-runtime-alignment", &align,
+ sizeof(align));
+
+ base = alloc_down(size, align, 0);
+ if (base == 0) {
+ prom_printf("OPAL allocation failed !\n");
+ return;
+ }
+
+ opal_inst = call_prom("open", 1, 1, ADDR("/ibm,opal"));
+ if (!IHANDLE_VALID(opal_inst)) {
+ prom_printf("opening opal package failed (%x)\n", opal_inst);
+ return;
+ }
+
+ prom_printf("instantiating opal at 0x%x...", base);
+
+ if (call_prom_ret("call-method", 4, 3, rets,
+ ADDR("load-opal-runtime"),
+ opal_inst,
+ base >> 32, base & 0xffffffff) != 0
+ || (rets[0] == 0 && rets[1] == 0)) {
+ prom_printf(" failed\n");
+ return;
+ }
+ entry = (((u64)rets[0]) << 32) | rets[1];
+
+ prom_printf(" done\n");
+
+ reserve_mem(base, size);
+
+ prom_debug("opal base = 0x%x\n", base);
+ prom_debug("opal align = 0x%x\n", align);
+ prom_debug("opal entry = 0x%x\n", entry);
+ prom_debug("opal size = 0x%x\n", (long)size);
+
+ prom_setprop(opal_node, "/ibm,opal", "opal-base-address",
+ &base, sizeof(base));
+ prom_setprop(opal_node, "/ibm,opal", "opal-entry-address",
+ &entry, sizeof(entry));
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
+ RELOC(prom_opal_base) = base;
+ RELOC(prom_opal_entry) = entry;
+#endif
+ prom_debug("prom_instantiate_opal: end...\n");
+}
+
+#endif /* CONFIG_PPC_POWERNV */
+
+/*
+ * Allocate room for and instantiate RTAS
+ */
+static void __init prom_instantiate_rtas(void)
+{
+ phandle rtas_node;
+ ihandle rtas_inst;
+ u32 base, entry = 0;
+ u32 size = 0;
+
+ prom_debug("prom_instantiate_rtas: start...\n");
+
+ rtas_node = call_prom("finddevice", 1, 1, ADDR("/rtas"));
+ prom_debug("rtas_node: %x\n", rtas_node);
+ if (!PHANDLE_VALID(rtas_node))
+ return;
+
+ prom_getprop(rtas_node, "rtas-size", &size, sizeof(size));
+ if (size == 0)
+ return;
+
+ base = alloc_down(size, PAGE_SIZE, 0);
+ if (base == 0)
+ prom_panic("Could not allocate memory for RTAS\n");
+
+ rtas_inst = call_prom("open", 1, 1, ADDR("/rtas"));
+ if (!IHANDLE_VALID(rtas_inst)) {
+ prom_printf("opening rtas package failed (%x)\n", rtas_inst);
+ return;
+ }
+
+ prom_printf("instantiating rtas at 0x%x...", base);
+
+ if (call_prom_ret("call-method", 3, 2, &entry,
+ ADDR("instantiate-rtas"),
+ rtas_inst, base) != 0
+ || entry == 0) {
+ prom_printf(" failed\n");
+ return;
+ }
+ prom_printf(" done\n");
+
+ reserve_mem(base, size);
+
+ prom_setprop(rtas_node, "/rtas", "linux,rtas-base",
+ &base, sizeof(base));
+ prom_setprop(rtas_node, "/rtas", "linux,rtas-entry",
+ &entry, sizeof(entry));
+
+#ifdef CONFIG_PPC_POWERNV
+ /* PowerVN takeover hack */
+ RELOC(prom_rtas_data) = base;
+ RELOC(prom_rtas_entry) = entry;
+ prom_getprop(rtas_node, "start-cpu", &RELOC(prom_rtas_start_cpu), 4);
+#endif
+ prom_debug("rtas base = 0x%x\n", base);
+ prom_debug("rtas entry = 0x%x\n", entry);
+ prom_debug("rtas size = 0x%x\n", (long)size);
+
+ prom_debug("prom_instantiate_rtas: end...\n");
+}
+
+#ifdef CONFIG_PPC64
+/*
+ * Allocate room for and initialize TCE tables
+ */
+static void __init prom_initialize_tce_table(void)
+{
+ phandle node;
+ ihandle phb_node;
+ char compatible[64], type[64], model[64];
+ char *path = RELOC(prom_scratch);
+ u64 base, align;
+ u32 minalign, minsize;
+ u64 tce_entry, *tce_entryp;
+ u64 local_alloc_top, local_alloc_bottom;
+ u64 i;
+
+ if (RELOC(prom_iommu_off))
+ return;
+
+ prom_debug("starting prom_initialize_tce_table\n");
+
+ /* Cache current top of allocs so we reserve a single block */
+ local_alloc_top = RELOC(alloc_top_high);
+ local_alloc_bottom = local_alloc_top;
+
+ /* Search all nodes looking for PHBs. */
+ for (node = 0; prom_next_node(&node); ) {
+ compatible[0] = 0;
+ type[0] = 0;
+ model[0] = 0;
+ prom_getprop(node, "compatible",
+ compatible, sizeof(compatible));
+ prom_getprop(node, "device_type", type, sizeof(type));
+ prom_getprop(node, "model", model, sizeof(model));
+
+ if ((type[0] == 0) || (strstr(type, RELOC("pci")) == NULL))
+ continue;
+
+ /* Keep the old logic intact to avoid regression. */
+ if (compatible[0] != 0) {
+ if ((strstr(compatible, RELOC("python")) == NULL) &&
+ (strstr(compatible, RELOC("Speedwagon")) == NULL) &&
+ (strstr(compatible, RELOC("Winnipeg")) == NULL))
+ continue;
+ } else if (model[0] != 0) {
+ if ((strstr(model, RELOC("ython")) == NULL) &&
+ (strstr(model, RELOC("peedwagon")) == NULL) &&
+ (strstr(model, RELOC("innipeg")) == NULL))
+ continue;
+ }
+
+ if (prom_getprop(node, "tce-table-minalign", &minalign,
+ sizeof(minalign)) == PROM_ERROR)
+ minalign = 0;
+ if (prom_getprop(node, "tce-table-minsize", &minsize,
+ sizeof(minsize)) == PROM_ERROR)
+ minsize = 4UL << 20;
+
+ /*
+ * Even though we read what OF wants, we just set the table
+ * size to 4 MB. This is enough to map 2GB of PCI DMA space.
+ * By doing this, we avoid the pitfalls of trying to DMA to
+ * MMIO space and the DMA alias hole.
+ *
+ * On POWER4, firmware sets the TCE region by assuming
+ * each TCE table is 8MB. Using this memory for anything
+ * else will impact performance, so we always allocate 8MB.
+ * Anton
+ */
+ if (__is_processor(PV_POWER4) || __is_processor(PV_POWER4p))
+ minsize = 8UL << 20;
+ else
+ minsize = 4UL << 20;
+
+ /* Align to the greater of the align or size */
+ align = max(minalign, minsize);
+ base = alloc_down(minsize, align, 1);
+ if (base == 0)
+ prom_panic("ERROR, cannot find space for TCE table.\n");
+ if (base < local_alloc_bottom)
+ local_alloc_bottom = base;
+
+ /* It seems OF doesn't null-terminate the path :-( */
+ memset(path, 0, PROM_SCRATCH_SIZE);
+ /* Call OF to setup the TCE hardware */
+ if (call_prom("package-to-path", 3, 1, node,
+ path, PROM_SCRATCH_SIZE-1) == PROM_ERROR) {
+ prom_printf("package-to-path failed\n");
+ }
+
+ /* Save away the TCE table attributes for later use. */
+ prom_setprop(node, path, "linux,tce-base", &base, sizeof(base));
+ prom_setprop(node, path, "linux,tce-size", &minsize, sizeof(minsize));
+
+ prom_debug("TCE table: %s\n", path);
+ prom_debug("\tnode = 0x%x\n", node);
+ prom_debug("\tbase = 0x%x\n", base);
+ prom_debug("\tsize = 0x%x\n", minsize);
+
+ /* Initialize the table to have a one-to-one mapping
+ * over the allocated size.
+ */
+ tce_entryp = (u64 *)base;
+ for (i = 0; i < (minsize >> 3) ;tce_entryp++, i++) {
+ tce_entry = (i << PAGE_SHIFT);
+ tce_entry |= 0x3;
+ *tce_entryp = tce_entry;
+ }
+
+ prom_printf("opening PHB %s", path);
+ phb_node = call_prom("open", 1, 1, path);
+ if (phb_node == 0)
+ prom_printf("... failed\n");
+ else
+ prom_printf("... done\n");
+
+ call_prom("call-method", 6, 0, ADDR("set-64-bit-addressing"),
+ phb_node, -1, minsize,
+ (u32) base, (u32) (base >> 32));
+ call_prom("close", 1, 0, phb_node);
+ }
+
+ reserve_mem(local_alloc_bottom, local_alloc_top - local_alloc_bottom);
+
+ /* These are only really needed if there is a memory limit in
+ * effect, but we don't know so export them always. */
+ RELOC(prom_tce_alloc_start) = local_alloc_bottom;
+ RELOC(prom_tce_alloc_end) = local_alloc_top;
+
+ /* Flag the first invalid entry */
+ prom_debug("ending prom_initialize_tce_table\n");
+}
+#endif
+
+/*
+ * With CHRP SMP we need to use the OF to start the other processors.
+ * We can't wait until smp_boot_cpus (the OF is trashed by then)
+ * so we have to put the processors into a holding pattern controlled
+ * by the kernel (not OF) before we destroy the OF.
+ *
+ * This uses a chunk of low memory, puts some holding pattern
+ * code there and sends the other processors off to there until
+ * smp_boot_cpus tells them to do something. The holding pattern
+ * checks that address until its cpu # is there, when it is that
+ * cpu jumps to __secondary_start(). smp_boot_cpus() takes care
+ * of setting those values.
+ *
+ * We also use physical address 0x4 here to tell when a cpu
+ * is in its holding pattern code.
+ *
+ * -- Cort
+ */
+/*
+ * We want to reference the copy of __secondary_hold_* in the
+ * 0 - 0x100 address range
+ */
+#define LOW_ADDR(x) (((unsigned long) &(x)) & 0xff)
+
+static void __init prom_hold_cpus(void)
+{
+ unsigned long i;
+ unsigned int reg;
+ phandle node;
+ char type[64];
+ struct prom_t *_prom = &RELOC(prom);
+ unsigned long *spinloop
+ = (void *) LOW_ADDR(__secondary_hold_spinloop);
+ unsigned long *acknowledge
+ = (void *) LOW_ADDR(__secondary_hold_acknowledge);
+ unsigned long secondary_hold = LOW_ADDR(__secondary_hold);
+
+ prom_debug("prom_hold_cpus: start...\n");
+ prom_debug(" 1) spinloop = 0x%x\n", (unsigned long)spinloop);
+ prom_debug(" 1) *spinloop = 0x%x\n", *spinloop);
+ prom_debug(" 1) acknowledge = 0x%x\n",
+ (unsigned long)acknowledge);
+ prom_debug(" 1) *acknowledge = 0x%x\n", *acknowledge);
+ prom_debug(" 1) secondary_hold = 0x%x\n", secondary_hold);
+
+ /* Set the common spinloop variable, so all of the secondary cpus
+ * will block when they are awakened from their OF spinloop.
+ * This must occur for both SMP and non SMP kernels, since OF will
+ * be trashed when we move the kernel.
+ */
+ *spinloop = 0;
+
+ /* look for cpus */
+ for (node = 0; prom_next_node(&node); ) {
+ type[0] = 0;
+ prom_getprop(node, "device_type", type, sizeof(type));
+ if (strcmp(type, RELOC("cpu")) != 0)
+ continue;
+
+ /* Skip non-configured cpus. */
+ if (prom_getprop(node, "status", type, sizeof(type)) > 0)
+ if (strcmp(type, RELOC("okay")) != 0)
+ continue;
+
+ reg = -1;
+ prom_getprop(node, "reg", &reg, sizeof(reg));
+
+ prom_debug("cpu hw idx = %lu\n", reg);
+
+ /* Init the acknowledge var which will be reset by
+ * the secondary cpu when it awakens from its OF
+ * spinloop.
+ */
+ *acknowledge = (unsigned long)-1;
+
+ if (reg != _prom->cpu) {
+ /* Primary Thread of non-boot cpu or any thread */
+ prom_printf("starting cpu hw idx %lu... ", reg);
+ call_prom("start-cpu", 3, 0, node,
+ secondary_hold, reg);
+
+ for (i = 0; (i < 100000000) &&
+ (*acknowledge == ((unsigned long)-1)); i++ )
+ mb();
+
+ if (*acknowledge == reg)
+ prom_printf("done\n");
+ else
+ prom_printf("failed: %x\n", *acknowledge);
+ }
+#ifdef CONFIG_SMP
+ else
+ prom_printf("boot cpu hw idx %lu\n", reg);
+#endif /* CONFIG_SMP */
+ }
+
+ prom_debug("prom_hold_cpus: end...\n");
+}
+
+
+static void __init prom_init_client_services(unsigned long pp)
+{
+ struct prom_t *_prom = &RELOC(prom);
+
+ /* Get a handle to the prom entry point before anything else */
+ RELOC(prom_entry) = pp;
+
+ /* get a handle for the stdout device */
+ _prom->chosen = call_prom("finddevice", 1, 1, ADDR("/chosen"));
+ if (!PHANDLE_VALID(_prom->chosen))
+ prom_panic("cannot find chosen"); /* msg won't be printed :( */
+
+ /* get device tree root */
+ _prom->root = call_prom("finddevice", 1, 1, ADDR("/"));
+ if (!PHANDLE_VALID(_prom->root))
+ prom_panic("cannot find device tree root"); /* msg won't be printed :( */
+
+ _prom->mmumap = 0;
+}
+
+#ifdef CONFIG_PPC32
+/*
+ * For really old powermacs, we need to map things we claim.
+ * For that, we need the ihandle of the mmu.
+ * Also, on the longtrail, we need to work around other bugs.
+ */
+static void __init prom_find_mmu(void)
+{
+ struct prom_t *_prom = &RELOC(prom);
+ phandle oprom;
+ char version[64];
+
+ oprom = call_prom("finddevice", 1, 1, ADDR("/openprom"));
+ if (!PHANDLE_VALID(oprom))
+ return;
+ if (prom_getprop(oprom, "model", version, sizeof(version)) <= 0)
+ return;
+ version[sizeof(version) - 1] = 0;
+ /* XXX might need to add other versions here */
+ if (strcmp(version, "Open Firmware, 1.0.5") == 0)
+ of_workarounds = OF_WA_CLAIM;
+ else if (strncmp(version, "FirmWorks,3.", 12) == 0) {
+ of_workarounds = OF_WA_CLAIM | OF_WA_LONGTRAIL;
+ call_prom("interpret", 1, 1, "dev /memory 0 to allow-reclaim");
+ } else
+ return;
+ _prom->memory = call_prom("open", 1, 1, ADDR("/memory"));
+ prom_getprop(_prom->chosen, "mmu", &_prom->mmumap,
+ sizeof(_prom->mmumap));
+ if (!IHANDLE_VALID(_prom->memory) || !IHANDLE_VALID(_prom->mmumap))
+ of_workarounds &= ~OF_WA_CLAIM; /* hmmm */
+}
+#else
+#define prom_find_mmu()
+#endif
+
+static void __init prom_init_stdout(void)
+{
+ struct prom_t *_prom = &RELOC(prom);
+ char *path = RELOC(of_stdout_device);
+ char type[16];
+ u32 val;
+
+ if (prom_getprop(_prom->chosen, "stdout", &val, sizeof(val)) <= 0)
+ prom_panic("cannot find stdout");
+
+ _prom->stdout = val;
+
+ /* Get the full OF pathname of the stdout device */
+ memset(path, 0, 256);
+ call_prom("instance-to-path", 3, 1, _prom->stdout, path, 255);
+ val = call_prom("instance-to-package", 1, 1, _prom->stdout);
+ prom_setprop(_prom->chosen, "/chosen", "linux,stdout-package",
+ &val, sizeof(val));
+ prom_printf("OF stdout device is: %s\n", RELOC(of_stdout_device));
+ prom_setprop(_prom->chosen, "/chosen", "linux,stdout-path",
+ path, strlen(path) + 1);
+
+ /* If it's a display, note it */
+ memset(type, 0, sizeof(type));
+ prom_getprop(val, "device_type", type, sizeof(type));
+ if (strcmp(type, RELOC("display")) == 0)
+ prom_setprop(val, path, "linux,boot-display", NULL, 0);
+}
+
+static int __init prom_find_machine_type(void)
+{
+ struct prom_t *_prom = &RELOC(prom);
+ char compat[256];
+ int len, i = 0;
+#ifdef CONFIG_PPC64
+ phandle rtas;
+ int x;
+#endif
+
+ /* Look for a PowerMac or a Cell */
+ len = prom_getprop(_prom->root, "compatible",
+ compat, sizeof(compat)-1);
+ if (len > 0) {
+ compat[len] = 0;
+ while (i < len) {
+ char *p = &compat[i];
+ int sl = strlen(p);
+ if (sl == 0)
+ break;
+ if (strstr(p, RELOC("Power Macintosh")) ||
+ strstr(p, RELOC("MacRISC")))
+ return PLATFORM_POWERMAC;
+#ifdef CONFIG_PPC64
+ /* We must make sure we don't detect the IBM Cell
+ * blades as pSeries due to some firmware issues,
+ * so we do it here.
+ */
+ if (strstr(p, RELOC("IBM,CBEA")) ||
+ strstr(p, RELOC("IBM,CPBW-1.0")))
+ return PLATFORM_GENERIC;
+#endif /* CONFIG_PPC64 */
+ i += sl + 1;
+ }
+ }
+#ifdef CONFIG_PPC64
+ /* Try to detect OPAL */
+ if (PHANDLE_VALID(call_prom("finddevice", 1, 1, ADDR("/ibm,opal"))))
+ return PLATFORM_OPAL;
+
+ /* Try to figure out if it's an IBM pSeries or any other
+ * PAPR compliant platform. We assume it is if :
+ * - /device_type is "chrp" (please, do NOT use that for future
+ * non-IBM designs !
+ * - it has /rtas
+ */
+ len = prom_getprop(_prom->root, "device_type",
+ compat, sizeof(compat)-1);
+ if (len <= 0)
+ return PLATFORM_GENERIC;
+ if (strcmp(compat, RELOC("chrp")))
+ return PLATFORM_GENERIC;
+
+ /* Default to pSeries. We need to know if we are running LPAR */
+ rtas = call_prom("finddevice", 1, 1, ADDR("/rtas"));
+ if (!PHANDLE_VALID(rtas))
+ return PLATFORM_GENERIC;
+ x = prom_getproplen(rtas, "ibm,hypertas-functions");
+ if (x != PROM_ERROR) {
+ prom_debug("Hypertas detected, assuming LPAR !\n");
+ return PLATFORM_PSERIES_LPAR;
+ }
+ return PLATFORM_PSERIES;
+#else
+ return PLATFORM_GENERIC;
+#endif
+}
+
+static int __init prom_set_color(ihandle ih, int i, int r, int g, int b)
+{
+ return call_prom("call-method", 6, 1, ADDR("color!"), ih, i, b, g, r);
+}
+
+/*
+ * If we have a display that we don't know how to drive,
+ * we will want to try to execute OF's open method for it
+ * later. However, OF will probably fall over if we do that
+ * we've taken over the MMU.
+ * So we check whether we will need to open the display,
+ * and if so, open it now.
+ */
+static void __init prom_check_displays(void)
+{
+ char type[16], *path;
+ phandle node;
+ ihandle ih;
+ int i;
+
+ static unsigned char default_colors[] = {
+ 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xaa,
+ 0x00, 0xaa, 0x00,
+ 0x00, 0xaa, 0xaa,
+ 0xaa, 0x00, 0x00,
+ 0xaa, 0x00, 0xaa,
+ 0xaa, 0xaa, 0x00,
+ 0xaa, 0xaa, 0xaa,
+ 0x55, 0x55, 0x55,
+ 0x55, 0x55, 0xff,
+ 0x55, 0xff, 0x55,
+ 0x55, 0xff, 0xff,
+ 0xff, 0x55, 0x55,
+ 0xff, 0x55, 0xff,
+ 0xff, 0xff, 0x55,
+ 0xff, 0xff, 0xff
+ };
+ const unsigned char *clut;
+
+ prom_debug("Looking for displays\n");
+ for (node = 0; prom_next_node(&node); ) {
+ memset(type, 0, sizeof(type));
+ prom_getprop(node, "device_type", type, sizeof(type));
+ if (strcmp(type, RELOC("display")) != 0)
+ continue;
+
+ /* It seems OF doesn't null-terminate the path :-( */
+ path = RELOC(prom_scratch);
+ memset(path, 0, PROM_SCRATCH_SIZE);
+
+ /*
+ * leave some room at the end of the path for appending extra
+ * arguments
+ */
+ if (call_prom("package-to-path", 3, 1, node, path,
+ PROM_SCRATCH_SIZE-10) == PROM_ERROR)
+ continue;
+ prom_printf("found display : %s, opening... ", path);
+
+ ih = call_prom("open", 1, 1, path);
+ if (ih == 0) {
+ prom_printf("failed\n");
+ continue;
+ }
+
+ /* Success */
+ prom_printf("done\n");
+ prom_setprop(node, path, "linux,opened", NULL, 0);
+
+ /* Setup a usable color table when the appropriate
+ * method is available. Should update this to set-colors */
+ clut = RELOC(default_colors);
+ for (i = 0; i < 16; i++, clut += 3)
+ if (prom_set_color(ih, i, clut[0], clut[1],
+ clut[2]) != 0)
+ break;
+
+#ifdef CONFIG_LOGO_LINUX_CLUT224
+ clut = PTRRELOC(RELOC(logo_linux_clut224.clut));
+ for (i = 0; i < RELOC(logo_linux_clut224.clutsize); i++, clut += 3)
+ if (prom_set_color(ih, i + 32, clut[0], clut[1],
+ clut[2]) != 0)
+ break;
+#endif /* CONFIG_LOGO_LINUX_CLUT224 */
+ }
+}
+
+
+/* Return (relocated) pointer to this much memory: moves initrd if reqd. */
+static void __init *make_room(unsigned long *mem_start, unsigned long *mem_end,
+ unsigned long needed, unsigned long align)
+{
+ void *ret;
+
+ *mem_start = _ALIGN(*mem_start, align);
+ while ((*mem_start + needed) > *mem_end) {
+ unsigned long room, chunk;
+
+ prom_debug("Chunk exhausted, claiming more at %x...\n",
+ RELOC(alloc_bottom));
+ room = RELOC(alloc_top) - RELOC(alloc_bottom);
+ if (room > DEVTREE_CHUNK_SIZE)
+ room = DEVTREE_CHUNK_SIZE;
+ if (room < PAGE_SIZE)
+ prom_panic("No memory for flatten_device_tree "
+ "(no room)\n");
+ chunk = alloc_up(room, 0);
+ if (chunk == 0)
+ prom_panic("No memory for flatten_device_tree "
+ "(claim failed)\n");
+ *mem_end = chunk + room;
+ }
+
+ ret = (void *)*mem_start;
+ *mem_start += needed;
+
+ return ret;
+}
+
+#define dt_push_token(token, mem_start, mem_end) \
+ do { *((u32 *)make_room(mem_start, mem_end, 4, 4)) = token; } while(0)
+
+static unsigned long __init dt_find_string(char *str)
+{
+ char *s, *os;
+
+ s = os = (char *)RELOC(dt_string_start);
+ s += 4;
+ while (s < (char *)RELOC(dt_string_end)) {
+ if (strcmp(s, str) == 0)
+ return s - os;
+ s += strlen(s) + 1;
+ }
+ return 0;
+}
+
+/*
+ * The Open Firmware 1275 specification states properties must be 31 bytes or
+ * less, however not all firmwares obey this. Make it 64 bytes to be safe.
+ */
+#define MAX_PROPERTY_NAME 64
+
+static void __init scan_dt_build_strings(phandle node,
+ unsigned long *mem_start,
+ unsigned long *mem_end)
+{
+ char *prev_name, *namep, *sstart;
+ unsigned long soff;
+ phandle child;
+
+ sstart = (char *)RELOC(dt_string_start);
+
+ /* get and store all property names */
+ prev_name = RELOC("");
+ for (;;) {
+ /* 64 is max len of name including nul. */
+ namep = make_room(mem_start, mem_end, MAX_PROPERTY_NAME, 1);
+ if (call_prom("nextprop", 3, 1, node, prev_name, namep) != 1) {
+ /* No more nodes: unwind alloc */
+ *mem_start = (unsigned long)namep;
+ break;
+ }
+
+ /* skip "name" */
+ if (strcmp(namep, RELOC("name")) == 0) {
+ *mem_start = (unsigned long)namep;
+ prev_name = RELOC("name");
+ continue;
+ }
+ /* get/create string entry */
+ soff = dt_find_string(namep);
+ if (soff != 0) {
+ *mem_start = (unsigned long)namep;
+ namep = sstart + soff;
+ } else {
+ /* Trim off some if we can */
+ *mem_start = (unsigned long)namep + strlen(namep) + 1;
+ RELOC(dt_string_end) = *mem_start;
+ }
+ prev_name = namep;
+ }
+
+ /* do all our children */
+ child = call_prom("child", 1, 1, node);
+ while (child != 0) {
+ scan_dt_build_strings(child, mem_start, mem_end);
+ child = call_prom("peer", 1, 1, child);
+ }
+}
+
+static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
+ unsigned long *mem_end)
+{
+ phandle child;
+ char *namep, *prev_name, *sstart, *p, *ep, *lp, *path;
+ unsigned long soff;
+ unsigned char *valp;
+ static char pname[MAX_PROPERTY_NAME];
+ int l, room, has_phandle = 0;
+
+ dt_push_token(OF_DT_BEGIN_NODE, mem_start, mem_end);
+
+ /* get the node's full name */
+ namep = (char *)*mem_start;
+ room = *mem_end - *mem_start;
+ if (room > 255)
+ room = 255;
+ l = call_prom("package-to-path", 3, 1, node, namep, room);
+ if (l >= 0) {
+ /* Didn't fit? Get more room. */
+ if (l >= room) {
+ if (l >= *mem_end - *mem_start)
+ namep = make_room(mem_start, mem_end, l+1, 1);
+ call_prom("package-to-path", 3, 1, node, namep, l);
+ }
+ namep[l] = '\0';
+
+ /* Fixup an Apple bug where they have bogus \0 chars in the
+ * middle of the path in some properties, and extract
+ * the unit name (everything after the last '/').
+ */
+ for (lp = p = namep, ep = namep + l; p < ep; p++) {
+ if (*p == '/')
+ lp = namep;
+ else if (*p != 0)
+ *lp++ = *p;
+ }
+ *lp = 0;
+ *mem_start = _ALIGN((unsigned long)lp + 1, 4);
+ }
+
+ /* get it again for debugging */
+ path = RELOC(prom_scratch);
+ memset(path, 0, PROM_SCRATCH_SIZE);
+ call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1);
+
+ /* get and store all properties */
+ prev_name = RELOC("");
+ sstart = (char *)RELOC(dt_string_start);
+ for (;;) {
+ if (call_prom("nextprop", 3, 1, node, prev_name,
+ RELOC(pname)) != 1)
+ break;
+
+ /* skip "name" */
+ if (strcmp(RELOC(pname), RELOC("name")) == 0) {
+ prev_name = RELOC("name");
+ continue;
+ }
+
+ /* find string offset */
+ soff = dt_find_string(RELOC(pname));
+ if (soff == 0) {
+ prom_printf("WARNING: Can't find string index for"
+ " <%s>, node %s\n", RELOC(pname), path);
+ break;
+ }
+ prev_name = sstart + soff;
+
+ /* get length */
+ l = call_prom("getproplen", 2, 1, node, RELOC(pname));
+
+ /* sanity checks */
+ if (l == PROM_ERROR)
+ continue;
+
+ /* push property head */
+ dt_push_token(OF_DT_PROP, mem_start, mem_end);
+ dt_push_token(l, mem_start, mem_end);
+ dt_push_token(soff, mem_start, mem_end);
+
+ /* push property content */
+ valp = make_room(mem_start, mem_end, l, 4);
+ call_prom("getprop", 4, 1, node, RELOC(pname), valp, l);
+ *mem_start = _ALIGN(*mem_start, 4);
+
+ if (!strcmp(RELOC(pname), RELOC("phandle")))
+ has_phandle = 1;
+ }
+
+ /* Add a "linux,phandle" property if no "phandle" property already
+ * existed (can happen with OPAL)
+ */
+ if (!has_phandle) {
+ soff = dt_find_string(RELOC("linux,phandle"));
+ if (soff == 0)
+ prom_printf("WARNING: Can't find string index for"
+ " <linux-phandle> node %s\n", path);
+ else {
+ dt_push_token(OF_DT_PROP, mem_start, mem_end);
+ dt_push_token(4, mem_start, mem_end);
+ dt_push_token(soff, mem_start, mem_end);
+ valp = make_room(mem_start, mem_end, 4, 4);
+ *(u32 *)valp = node;
+ }
+ }
+
+ /* do all our children */
+ child = call_prom("child", 1, 1, node);
+ while (child != 0) {
+ scan_dt_build_struct(child, mem_start, mem_end);
+ child = call_prom("peer", 1, 1, child);
+ }
+
+ dt_push_token(OF_DT_END_NODE, mem_start, mem_end);
+}
+
+static void __init flatten_device_tree(void)
+{
+ phandle root;
+ unsigned long mem_start, mem_end, room;
+ struct boot_param_header *hdr;
+ struct prom_t *_prom = &RELOC(prom);
+ char *namep;
+ u64 *rsvmap;
+
+ /*
+ * Check how much room we have between alloc top & bottom (+/- a
+ * few pages), crop to 1MB, as this is our "chunk" size
+ */
+ room = RELOC(alloc_top) - RELOC(alloc_bottom) - 0x4000;
+ if (room > DEVTREE_CHUNK_SIZE)
+ room = DEVTREE_CHUNK_SIZE;
+ prom_debug("starting device tree allocs at %x\n", RELOC(alloc_bottom));
+
+ /* Now try to claim that */
+ mem_start = (unsigned long)alloc_up(room, PAGE_SIZE);
+ if (mem_start == 0)
+ prom_panic("Can't allocate initial device-tree chunk\n");
+ mem_end = mem_start + room;
+
+ /* Get root of tree */
+ root = call_prom("peer", 1, 1, (phandle)0);
+ if (root == (phandle)0)
+ prom_panic ("couldn't get device tree root\n");
+
+ /* Build header and make room for mem rsv map */
+ mem_start = _ALIGN(mem_start, 4);
+ hdr = make_room(&mem_start, &mem_end,
+ sizeof(struct boot_param_header), 4);
+ RELOC(dt_header_start) = (unsigned long)hdr;
+ rsvmap = make_room(&mem_start, &mem_end, sizeof(mem_reserve_map), 8);
+
+ /* Start of strings */
+ mem_start = PAGE_ALIGN(mem_start);
+ RELOC(dt_string_start) = mem_start;
+ mem_start += 4; /* hole */
+
+ /* Add "linux,phandle" in there, we'll need it */
+ namep = make_room(&mem_start, &mem_end, 16, 1);
+ strcpy(namep, RELOC("linux,phandle"));
+ mem_start = (unsigned long)namep + strlen(namep) + 1;
+
+ /* Build string array */
+ prom_printf("Building dt strings...\n");
+ scan_dt_build_strings(root, &mem_start, &mem_end);
+ RELOC(dt_string_end) = mem_start;
+
+ /* Build structure */
+ mem_start = PAGE_ALIGN(mem_start);
+ RELOC(dt_struct_start) = mem_start;
+ prom_printf("Building dt structure...\n");
+ scan_dt_build_struct(root, &mem_start, &mem_end);
+ dt_push_token(OF_DT_END, &mem_start, &mem_end);
+ RELOC(dt_struct_end) = PAGE_ALIGN(mem_start);
+
+ /* Finish header */
+ hdr->boot_cpuid_phys = _prom->cpu;
+ hdr->magic = OF_DT_HEADER;
+ hdr->totalsize = RELOC(dt_struct_end) - RELOC(dt_header_start);
+ hdr->off_dt_struct = RELOC(dt_struct_start) - RELOC(dt_header_start);
+ hdr->off_dt_strings = RELOC(dt_string_start) - RELOC(dt_header_start);
+ hdr->dt_strings_size = RELOC(dt_string_end) - RELOC(dt_string_start);
+ hdr->off_mem_rsvmap = ((unsigned long)rsvmap) - RELOC(dt_header_start);
+ hdr->version = OF_DT_VERSION;
+ /* Version 16 is not backward compatible */
+ hdr->last_comp_version = 0x10;
+
+ /* Copy the reserve map in */
+ memcpy(rsvmap, RELOC(mem_reserve_map), sizeof(mem_reserve_map));
+
+#ifdef DEBUG_PROM
+ {
+ int i;
+ prom_printf("reserved memory map:\n");
+ for (i = 0; i < RELOC(mem_reserve_cnt); i++)
+ prom_printf(" %x - %x\n",
+ RELOC(mem_reserve_map)[i].base,
+ RELOC(mem_reserve_map)[i].size);
+ }
+#endif
+ /* Bump mem_reserve_cnt to cause further reservations to fail
+ * since it's too late.
+ */
+ RELOC(mem_reserve_cnt) = MEM_RESERVE_MAP_SIZE;
+
+ prom_printf("Device tree strings 0x%x -> 0x%x\n",
+ RELOC(dt_string_start), RELOC(dt_string_end));
+ prom_printf("Device tree struct 0x%x -> 0x%x\n",
+ RELOC(dt_struct_start), RELOC(dt_struct_end));
+
+}
+
+#ifdef CONFIG_PPC_MAPLE
+/* PIBS Version 1.05.0000 04/26/2005 has an incorrect /ht/isa/ranges property.
+ * The values are bad, and it doesn't even have the right number of cells. */
+static void __init fixup_device_tree_maple(void)
+{
+ phandle isa;
+ u32 rloc = 0x01002000; /* IO space; PCI device = 4 */
+ u32 isa_ranges[6];
+ char *name;
+
+ name = "/ht@0/isa@4";
+ isa = call_prom("finddevice", 1, 1, ADDR(name));
+ if (!PHANDLE_VALID(isa)) {
+ name = "/ht@0/isa@6";
+ isa = call_prom("finddevice", 1, 1, ADDR(name));
+ rloc = 0x01003000; /* IO space; PCI device = 6 */
+ }
+ if (!PHANDLE_VALID(isa))
+ return;
+
+ if (prom_getproplen(isa, "ranges") != 12)
+ return;
+ if (prom_getprop(isa, "ranges", isa_ranges, sizeof(isa_ranges))
+ == PROM_ERROR)
+ return;
+
+ if (isa_ranges[0] != 0x1 ||
+ isa_ranges[1] != 0xf4000000 ||
+ isa_ranges[2] != 0x00010000)
+ return;
+
+ prom_printf("Fixing up bogus ISA range on Maple/Apache...\n");
+
+ isa_ranges[0] = 0x1;
+ isa_ranges[1] = 0x0;
+ isa_ranges[2] = rloc;
+ isa_ranges[3] = 0x0;
+ isa_ranges[4] = 0x0;
+ isa_ranges[5] = 0x00010000;
+ prom_setprop(isa, name, "ranges",
+ isa_ranges, sizeof(isa_ranges));
+}
+
+#define CPC925_MC_START 0xf8000000
+#define CPC925_MC_LENGTH 0x1000000
+/* The values for memory-controller don't have right number of cells */
+static void __init fixup_device_tree_maple_memory_controller(void)
+{
+ phandle mc;
+ u32 mc_reg[4];
+ char *name = "/hostbridge@f8000000";
+ struct prom_t *_prom = &RELOC(prom);
+ u32 ac, sc;
+
+ mc = call_prom("finddevice", 1, 1, ADDR(name));
+ if (!PHANDLE_VALID(mc))
+ return;
+
+ if (prom_getproplen(mc, "reg") != 8)
+ return;
+
+ prom_getprop(_prom->root, "#address-cells", &ac, sizeof(ac));
+ prom_getprop(_prom->root, "#size-cells", &sc, sizeof(sc));
+ if ((ac != 2) || (sc != 2))
+ return;
+
+ if (prom_getprop(mc, "reg", mc_reg, sizeof(mc_reg)) == PROM_ERROR)
+ return;
+
+ if (mc_reg[0] != CPC925_MC_START || mc_reg[1] != CPC925_MC_LENGTH)
+ return;
+
+ prom_printf("Fixing up bogus hostbridge on Maple...\n");
+
+ mc_reg[0] = 0x0;
+ mc_reg[1] = CPC925_MC_START;
+ mc_reg[2] = 0x0;
+ mc_reg[3] = CPC925_MC_LENGTH;
+ prom_setprop(mc, name, "reg", mc_reg, sizeof(mc_reg));
+}
+#else
+#define fixup_device_tree_maple()
+#define fixup_device_tree_maple_memory_controller()
+#endif
+
+#ifdef CONFIG_PPC_CHRP
+/*
+ * Pegasos and BriQ lacks the "ranges" property in the isa node
+ * Pegasos needs decimal IRQ 14/15, not hexadecimal
+ * Pegasos has the IDE configured in legacy mode, but advertised as native
+ */
+static void __init fixup_device_tree_chrp(void)
+{
+ phandle ph;
+ u32 prop[6];
+ u32 rloc = 0x01006000; /* IO space; PCI device = 12 */
+ char *name;
+ int rc;
+
+ name = "/pci@80000000/isa@c";
+ ph = call_prom("finddevice", 1, 1, ADDR(name));
+ if (!PHANDLE_VALID(ph)) {
+ name = "/pci@ff500000/isa@6";
+ ph = call_prom("finddevice", 1, 1, ADDR(name));
+ rloc = 0x01003000; /* IO space; PCI device = 6 */
+ }
+ if (PHANDLE_VALID(ph)) {
+ rc = prom_getproplen(ph, "ranges");
+ if (rc == 0 || rc == PROM_ERROR) {
+ prom_printf("Fixing up missing ISA range on Pegasos...\n");
+
+ prop[0] = 0x1;
+ prop[1] = 0x0;
+ prop[2] = rloc;
+ prop[3] = 0x0;
+ prop[4] = 0x0;
+ prop[5] = 0x00010000;
+ prom_setprop(ph, name, "ranges", prop, sizeof(prop));
+ }
+ }
+
+ name = "/pci@80000000/ide@C,1";
+ ph = call_prom("finddevice", 1, 1, ADDR(name));
+ if (PHANDLE_VALID(ph)) {
+ prom_printf("Fixing up IDE interrupt on Pegasos...\n");
+ prop[0] = 14;
+ prop[1] = 0x0;
+ prom_setprop(ph, name, "interrupts", prop, 2*sizeof(u32));
+ prom_printf("Fixing up IDE class-code on Pegasos...\n");
+ rc = prom_getprop(ph, "class-code", prop, sizeof(u32));
+ if (rc == sizeof(u32)) {
+ prop[0] &= ~0x5;
+ prom_setprop(ph, name, "class-code", prop, sizeof(u32));
+ }
+ }
+}
+#else
+#define fixup_device_tree_chrp()
+#endif
+
+#if defined(CONFIG_PPC64) && defined(CONFIG_PPC_PMAC)
+static void __init fixup_device_tree_pmac(void)
+{
+ phandle u3, i2c, mpic;
+ u32 u3_rev;
+ u32 interrupts[2];
+ u32 parent;
+
+ /* Some G5s have a missing interrupt definition, fix it up here */
+ u3 = call_prom("finddevice", 1, 1, ADDR("/u3@0,f8000000"));
+ if (!PHANDLE_VALID(u3))
+ return;
+ i2c = call_prom("finddevice", 1, 1, ADDR("/u3@0,f8000000/i2c@f8001000"));
+ if (!PHANDLE_VALID(i2c))
+ return;
+ mpic = call_prom("finddevice", 1, 1, ADDR("/u3@0,f8000000/mpic@f8040000"));
+ if (!PHANDLE_VALID(mpic))
+ return;
+
+ /* check if proper rev of u3 */
+ if (prom_getprop(u3, "device-rev", &u3_rev, sizeof(u3_rev))
+ == PROM_ERROR)
+ return;
+ if (u3_rev < 0x35 || u3_rev > 0x39)
+ return;
+ /* does it need fixup ? */
+ if (prom_getproplen(i2c, "interrupts") > 0)
+ return;
+
+ prom_printf("fixing up bogus interrupts for u3 i2c...\n");
+
+ /* interrupt on this revision of u3 is number 0 and level */
+ interrupts[0] = 0;
+ interrupts[1] = 1;
+ prom_setprop(i2c, "/u3@0,f8000000/i2c@f8001000", "interrupts",
+ &interrupts, sizeof(interrupts));
+ parent = (u32)mpic;
+ prom_setprop(i2c, "/u3@0,f8000000/i2c@f8001000", "interrupt-parent",
+ &parent, sizeof(parent));
+}
+#else
+#define fixup_device_tree_pmac()
+#endif
+
+#ifdef CONFIG_PPC_EFIKA
+/*
+ * The MPC5200 FEC driver requires an phy-handle property to tell it how
+ * to talk to the phy. If the phy-handle property is missing, then this
+ * function is called to add the appropriate nodes and link it to the
+ * ethernet node.
+ */
+static void __init fixup_device_tree_efika_add_phy(void)
+{
+ u32 node;
+ char prop[64];
+ int rv;
+
+ /* Check if /builtin/ethernet exists - bail if it doesn't */
+ node = call_prom("finddevice", 1, 1, ADDR("/builtin/ethernet"));
+ if (!PHANDLE_VALID(node))
+ return;
+
+ /* Check if the phy-handle property exists - bail if it does */
+ rv = prom_getprop(node, "phy-handle", prop, sizeof(prop));
+ if (!rv)
+ return;
+
+ /*
+ * At this point the ethernet device doesn't have a phy described.
+ * Now we need to add the missing phy node and linkage
+ */
+
+ /* Check for an MDIO bus node - if missing then create one */
+ node = call_prom("finddevice", 1, 1, ADDR("/builtin/mdio"));
+ if (!PHANDLE_VALID(node)) {
+ prom_printf("Adding Ethernet MDIO node\n");
+ call_prom("interpret", 1, 1,
+ " s\" /builtin\" find-device"
+ " new-device"
+ " 1 encode-int s\" #address-cells\" property"
+ " 0 encode-int s\" #size-cells\" property"
+ " s\" mdio\" device-name"
+ " s\" fsl,mpc5200b-mdio\" encode-string"
+ " s\" compatible\" property"
+ " 0xf0003000 0x400 reg"
+ " 0x2 encode-int"
+ " 0x5 encode-int encode+"
+ " 0x3 encode-int encode+"
+ " s\" interrupts\" property"
+ " finish-device");
+ };
+
+ /* Check for a PHY device node - if missing then create one and
+ * give it's phandle to the ethernet node */
+ node = call_prom("finddevice", 1, 1,
+ ADDR("/builtin/mdio/ethernet-phy"));
+ if (!PHANDLE_VALID(node)) {
+ prom_printf("Adding Ethernet PHY node\n");
+ call_prom("interpret", 1, 1,
+ " s\" /builtin/mdio\" find-device"
+ " new-device"
+ " s\" ethernet-phy\" device-name"
+ " 0x10 encode-int s\" reg\" property"
+ " my-self"
+ " ihandle>phandle"
+ " finish-device"
+ " s\" /builtin/ethernet\" find-device"
+ " encode-int"
+ " s\" phy-handle\" property"
+ " device-end");
+ }
+}
+
+static void __init fixup_device_tree_efika(void)
+{
+ int sound_irq[3] = { 2, 2, 0 };
+ int bcomm_irq[3*16] = { 3,0,0, 3,1,0, 3,2,0, 3,3,0,
+ 3,4,0, 3,5,0, 3,6,0, 3,7,0,
+ 3,8,0, 3,9,0, 3,10,0, 3,11,0,
+ 3,12,0, 3,13,0, 3,14,0, 3,15,0 };
+ u32 node;
+ char prop[64];
+ int rv, len;
+
+ /* Check if we're really running on a EFIKA */
+ node = call_prom("finddevice", 1, 1, ADDR("/"));
+ if (!PHANDLE_VALID(node))
+ return;
+
+ rv = prom_getprop(node, "model", prop, sizeof(prop));
+ if (rv == PROM_ERROR)
+ return;
+ if (strcmp(prop, "EFIKA5K2"))
+ return;
+
+ prom_printf("Applying EFIKA device tree fixups\n");
+
+ /* Claiming to be 'chrp' is death */
+ node = call_prom("finddevice", 1, 1, ADDR("/"));
+ rv = prom_getprop(node, "device_type", prop, sizeof(prop));
+ if (rv != PROM_ERROR && (strcmp(prop, "chrp") == 0))
+ prom_setprop(node, "/", "device_type", "efika", sizeof("efika"));
+
+ /* CODEGEN,description is exposed in /proc/cpuinfo so
+ fix that too */
+ rv = prom_getprop(node, "CODEGEN,description", prop, sizeof(prop));
+ if (rv != PROM_ERROR && (strstr(prop, "CHRP")))
+ prom_setprop(node, "/", "CODEGEN,description",
+ "Efika 5200B PowerPC System",
+ sizeof("Efika 5200B PowerPC System"));
+
+ /* Fixup bestcomm interrupts property */
+ node = call_prom("finddevice", 1, 1, ADDR("/builtin/bestcomm"));
+ if (PHANDLE_VALID(node)) {
+ len = prom_getproplen(node, "interrupts");
+ if (len == 12) {
+ prom_printf("Fixing bestcomm interrupts property\n");
+ prom_setprop(node, "/builtin/bestcom", "interrupts",
+ bcomm_irq, sizeof(bcomm_irq));
+ }
+ }
+
+ /* Fixup sound interrupts property */
+ node = call_prom("finddevice", 1, 1, ADDR("/builtin/sound"));
+ if (PHANDLE_VALID(node)) {
+ rv = prom_getprop(node, "interrupts", prop, sizeof(prop));
+ if (rv == PROM_ERROR) {
+ prom_printf("Adding sound interrupts property\n");
+ prom_setprop(node, "/builtin/sound", "interrupts",
+ sound_irq, sizeof(sound_irq));
+ }
+ }
+
+ /* Make sure ethernet phy-handle property exists */
+ fixup_device_tree_efika_add_phy();
+}
+#else
+#define fixup_device_tree_efika()
+#endif
+
+static void __init fixup_device_tree(void)
+{
+ fixup_device_tree_maple();
+ fixup_device_tree_maple_memory_controller();
+ fixup_device_tree_chrp();
+ fixup_device_tree_pmac();
+ fixup_device_tree_efika();
+}
+
+static void __init prom_find_boot_cpu(void)
+{
+ struct prom_t *_prom = &RELOC(prom);
+ u32 getprop_rval;
+ ihandle prom_cpu;
+ phandle cpu_pkg;
+
+ _prom->cpu = 0;
+ if (prom_getprop(_prom->chosen, "cpu", &prom_cpu, sizeof(prom_cpu)) <= 0)
+ return;
+
+ cpu_pkg = call_prom("instance-to-package", 1, 1, prom_cpu);
+
+ prom_getprop(cpu_pkg, "reg", &getprop_rval, sizeof(getprop_rval));
+ _prom->cpu = getprop_rval;
+
+ prom_debug("Booting CPU hw index = %lu\n", _prom->cpu);
+}
+
+static void __init prom_check_initrd(unsigned long r3, unsigned long r4)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+ struct prom_t *_prom = &RELOC(prom);
+
+ if (r3 && r4 && r4 != 0xdeadbeef) {
+ unsigned long val;
+
+ RELOC(prom_initrd_start) = is_kernel_addr(r3) ? __pa(r3) : r3;
+ RELOC(prom_initrd_end) = RELOC(prom_initrd_start) + r4;
+
+ val = RELOC(prom_initrd_start);
+ prom_setprop(_prom->chosen, "/chosen", "linux,initrd-start",
+ &val, sizeof(val));
+ val = RELOC(prom_initrd_end);
+ prom_setprop(_prom->chosen, "/chosen", "linux,initrd-end",
+ &val, sizeof(val));
+
+ reserve_mem(RELOC(prom_initrd_start),
+ RELOC(prom_initrd_end) - RELOC(prom_initrd_start));
+
+ prom_debug("initrd_start=0x%x\n", RELOC(prom_initrd_start));
+ prom_debug("initrd_end=0x%x\n", RELOC(prom_initrd_end));
+ }
+#endif /* CONFIG_BLK_DEV_INITRD */
+}
+
+
+/*
+ * We enter here early on, when the Open Firmware prom is still
+ * handling exceptions and the MMU hash table for us.
+ */
+
+unsigned long __init prom_init(unsigned long r3, unsigned long r4,
+ unsigned long pp,
+ unsigned long r6, unsigned long r7,
+ unsigned long kbase)
+{
+ struct prom_t *_prom;
+ unsigned long hdr;
+
+#ifdef CONFIG_PPC32
+ unsigned long offset = reloc_offset();
+ reloc_got2(offset);
+#endif
+
+ _prom = &RELOC(prom);
+
+ /*
+ * First zero the BSS
+ */
+ memset(&RELOC(__bss_start), 0, __bss_stop - __bss_start);
+
+ /*
+ * Init interface to Open Firmware, get some node references,
+ * like /chosen
+ */
+ prom_init_client_services(pp);
+
+ /*
+ * See if this OF is old enough that we need to do explicit maps
+ * and other workarounds
+ */
+ prom_find_mmu();
+
+ /*
+ * Init prom stdout device
+ */
+ prom_init_stdout();
+
+ prom_printf("Preparing to boot %s", RELOC(linux_banner));
+
+ /*
+ * Get default machine type. At this point, we do not differentiate
+ * between pSeries SMP and pSeries LPAR
+ */
+ RELOC(of_platform) = prom_find_machine_type();
+ prom_printf("Detected machine type: %x\n", RELOC(of_platform));
+
+#ifndef CONFIG_NONSTATIC_KERNEL
+ /* Bail if this is a kdump kernel. */
+ if (PHYSICAL_START > 0)
+ prom_panic("Error: You can't boot a kdump kernel from OF!\n");
+#endif
+
+ /*
+ * Check for an initrd
+ */
+ prom_check_initrd(r3, r4);
+
+#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
+ /*
+ * On pSeries, inform the firmware about our capabilities
+ */
+ if (RELOC(of_platform) == PLATFORM_PSERIES ||
+ RELOC(of_platform) == PLATFORM_PSERIES_LPAR)
+ prom_send_capabilities();
+#endif
+
+ /*
+ * Copy the CPU hold code
+ */
+ if (RELOC(of_platform) != PLATFORM_POWERMAC)
+ copy_and_flush(0, kbase, 0x100, 0);
+
+ /*
+ * Do early parsing of command line
+ */
+ early_cmdline_parse();
+
+ /*
+ * Initialize memory management within prom_init
+ */
+ prom_init_mem();
+
+ /*
+ * Determine which cpu is actually running right _now_
+ */
+ prom_find_boot_cpu();
+
+ /*
+ * Initialize display devices
+ */
+ prom_check_displays();
+
+#ifdef CONFIG_PPC64
+ /*
+ * Initialize IOMMU (TCE tables) on pSeries. Do that before anything else
+ * that uses the allocator, we need to make sure we get the top of memory
+ * available for us here...
+ */
+ if (RELOC(of_platform) == PLATFORM_PSERIES)
+ prom_initialize_tce_table();
+#endif
+
+ /*
+ * On non-powermacs, try to instantiate RTAS. PowerMacs don't
+ * have a usable RTAS implementation.
+ */
+ if (RELOC(of_platform) != PLATFORM_POWERMAC &&
+ RELOC(of_platform) != PLATFORM_OPAL)
+ prom_instantiate_rtas();
+
+#ifdef CONFIG_PPC_POWERNV
+ /* Detect HAL and try instanciating it & doing takeover */
+ if (RELOC(of_platform) == PLATFORM_PSERIES_LPAR) {
+ prom_query_opal();
+ if (RELOC(of_platform) == PLATFORM_OPAL) {
+ prom_opal_hold_cpus();
+ prom_opal_takeover();
+ }
+ } else if (RELOC(of_platform) == PLATFORM_OPAL)
+ prom_instantiate_opal();
+#endif
+
+ /*
+ * On non-powermacs, put all CPUs in spin-loops.
+ *
+ * PowerMacs use a different mechanism to spin CPUs
+ */
+ if (RELOC(of_platform) != PLATFORM_POWERMAC &&
+ RELOC(of_platform) != PLATFORM_OPAL)
+ prom_hold_cpus();
+
+ /*
+ * Fill in some infos for use by the kernel later on
+ */
+ if (RELOC(prom_memory_limit))
+ prom_setprop(_prom->chosen, "/chosen", "linux,memory-limit",
+ &RELOC(prom_memory_limit),
+ sizeof(prom_memory_limit));
+#ifdef CONFIG_PPC64
+ if (RELOC(prom_iommu_off))
+ prom_setprop(_prom->chosen, "/chosen", "linux,iommu-off",
+ NULL, 0);
+
+ if (RELOC(prom_iommu_force_on))
+ prom_setprop(_prom->chosen, "/chosen", "linux,iommu-force-on",
+ NULL, 0);
+
+ if (RELOC(prom_tce_alloc_start)) {
+ prom_setprop(_prom->chosen, "/chosen", "linux,tce-alloc-start",
+ &RELOC(prom_tce_alloc_start),
+ sizeof(prom_tce_alloc_start));
+ prom_setprop(_prom->chosen, "/chosen", "linux,tce-alloc-end",
+ &RELOC(prom_tce_alloc_end),
+ sizeof(prom_tce_alloc_end));
+ }
+#endif
+
+ /*
+ * Fixup any known bugs in the device-tree
+ */
+ fixup_device_tree();
+
+ /*
+ * Now finally create the flattened device-tree
+ */
+ prom_printf("copying OF device tree...\n");
+ flatten_device_tree();
+
+ /*
+ * in case stdin is USB and still active on IBM machines...
+ * Unfortunately quiesce crashes on some powermacs if we have
+ * closed stdin already (in particular the powerbook 101). It
+ * appears that the OPAL version of OFW doesn't like it either.
+ */
+ if (RELOC(of_platform) != PLATFORM_POWERMAC &&
+ RELOC(of_platform) != PLATFORM_OPAL)
+ prom_close_stdin();
+
+ /*
+ * Call OF "quiesce" method to shut down pending DMA's from
+ * devices etc...
+ */
+ prom_printf("Calling quiesce...\n");
+ call_prom("quiesce", 0, 0);
+
+ /*
+ * And finally, call the kernel passing it the flattened device
+ * tree and NULL as r5, thus triggering the new entry point which
+ * is common to us and kexec
+ */
+ hdr = RELOC(dt_header_start);
+
+ /* Don't print anything after quiesce under OPAL, it crashes OFW */
+ if (RELOC(of_platform) != PLATFORM_OPAL) {
+ prom_printf("returning from prom_init\n");
+ prom_debug("->dt_header_start=0x%x\n", hdr);
+ }
+
+#ifdef CONFIG_PPC32
+ reloc_got2(-offset);
+#endif
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
+ /* OPAL early debug gets the OPAL base & entry in r8 and r9 */
+ __start(hdr, kbase, 0, 0, 0,
+ RELOC(prom_opal_base), RELOC(prom_opal_entry));
+#else
+ __start(hdr, kbase, 0, 0, 0, 0, 0);
+#endif
+
+ return 0;
+}
diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
new file mode 100644
index 00000000..70f4286e
--- /dev/null
+++ b/arch/powerpc/kernel/prom_init_check.sh
@@ -0,0 +1,80 @@
+#!/bin/sh
+#
+# Copyright © 2008 IBM Corporation
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version
+# 2 of the License, or (at your option) any later version.
+
+# This script checks prom_init.o to see what external symbols it
+# is using, if it finds symbols not in the whitelist it returns
+# an error. The point of this is to discourage people from
+# intentionally or accidentally adding new code to prom_init.c
+# which has side effects on other parts of the kernel.
+
+# If you really need to reference something from prom_init.o add
+# it to the list below:
+
+WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush
+_end enter_prom memcpy memset reloc_offset __secondary_hold
+__secondary_hold_acknowledge __secondary_hold_spinloop __start
+strcmp strcpy strlcpy strlen strncmp strstr logo_linux_clut224
+reloc_got2 kernstart_addr memstart_addr linux_banner _stext
+opal_query_takeover opal_do_takeover opal_enter_rtas opal_secondary_entry
+boot_command_line"
+
+NM="$1"
+OBJ="$2"
+
+ERROR=0
+
+for UNDEF in $($NM -u $OBJ | awk '{print $2}')
+do
+ # On 64-bit nm gives us the function descriptors, which have
+ # a leading . on the name, so strip it off here.
+ UNDEF="${UNDEF#.}"
+
+ if [ $KBUILD_VERBOSE ]; then
+ if [ $KBUILD_VERBOSE -ne 0 ]; then
+ echo "Checking prom_init.o symbol '$UNDEF'"
+ fi
+ fi
+
+ OK=0
+ for WHITE in $WHITELIST
+ do
+ if [ "$UNDEF" = "$WHITE" ]; then
+ OK=1
+ break
+ fi
+ done
+
+ # ignore register save/restore funcitons
+ if [ "${UNDEF:0:9}" = "_restgpr_" ]; then
+ OK=1
+ fi
+ if [ "${UNDEF:0:10}" = "_restgpr0_" ]; then
+ OK=1
+ fi
+ if [ "${UNDEF:0:11}" = "_rest32gpr_" ]; then
+ OK=1
+ fi
+ if [ "${UNDEF:0:9}" = "_savegpr_" ]; then
+ OK=1
+ fi
+ if [ "${UNDEF:0:10}" = "_savegpr0_" ]; then
+ OK=1
+ fi
+ if [ "${UNDEF:0:11}" = "_save32gpr_" ]; then
+ OK=1
+ fi
+
+ if [ $OK -eq 0 ]; then
+ ERROR=1
+ echo "Error: External symbol '$UNDEF' referenced" \
+ "from prom_init.c" >&2
+ fi
+done
+
+exit $ERROR
diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c
new file mode 100644
index 00000000..4e1331b8
--- /dev/null
+++ b/arch/powerpc/kernel/prom_parse.c
@@ -0,0 +1,34 @@
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/ioport.h>
+#include <linux/etherdevice.h>
+#include <linux/of_address.h>
+#include <asm/prom.h>
+
+void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
+ unsigned long *busno, unsigned long *phys, unsigned long *size)
+{
+ const u32 *dma_window;
+ u32 cells;
+ const unsigned char *prop;
+
+ dma_window = dma_window_prop;
+
+ /* busno is always one cell */
+ *busno = *(dma_window++);
+
+ prop = of_get_property(dn, "ibm,#dma-address-cells", NULL);
+ if (!prop)
+ prop = of_get_property(dn, "#address-cells", NULL);
+
+ cells = prop ? *(u32 *)prop : of_n_addr_cells(dn);
+ *phys = of_read_number(dma_window, cells);
+
+ dma_window += cells;
+
+ prop = of_get_property(dn, "ibm,#dma-size-cells", NULL);
+ cells = prop ? *(u32 *)prop : of_n_size_cells(dn);
+ *size = of_read_number(dma_window, cells);
+}
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
new file mode 100644
index 00000000..8d8e0288
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace.c
@@ -0,0 +1,1757 @@
+/*
+ * PowerPC version
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Derived from "arch/m68k/kernel/ptrace.c"
+ * Copyright (C) 1994 by Hamish Macdonald
+ * Taken from linux/kernel/ptrace.c and modified for M680x0.
+ * linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds
+ *
+ * Modified by Cort Dougan (cort@hq.fsmlabs.com)
+ * and Paul Mackerras (paulus@samba.org).
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file README.legal in the main directory of
+ * this archive for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/regset.h>
+#include <linux/tracehook.h>
+#include <linux/elf.h>
+#include <linux/user.h>
+#include <linux/security.h>
+#include <linux/signal.h>
+#include <linux/seccomp.h>
+#include <linux/audit.h>
+#include <trace/syscall.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/perf_event.h>
+
+#include <asm/uaccess.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/switch_to.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
+/*
+ * The parameter save area on the stack is used to store arguments being passed
+ * to callee function and is located at fixed offset from stack pointer.
+ */
+#ifdef CONFIG_PPC32
+#define PARAMETER_SAVE_AREA_OFFSET 24 /* bytes */
+#else /* CONFIG_PPC32 */
+#define PARAMETER_SAVE_AREA_OFFSET 48 /* bytes */
+#endif
+
+struct pt_regs_offset {
+ const char *name;
+ int offset;
+};
+
+#define STR(s) #s /* convert to string */
+#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
+#define GPR_OFFSET_NAME(num) \
+ {.name = STR(gpr##num), .offset = offsetof(struct pt_regs, gpr[num])}
+#define REG_OFFSET_END {.name = NULL, .offset = 0}
+
+static const struct pt_regs_offset regoffset_table[] = {
+ GPR_OFFSET_NAME(0),
+ GPR_OFFSET_NAME(1),
+ GPR_OFFSET_NAME(2),
+ GPR_OFFSET_NAME(3),
+ GPR_OFFSET_NAME(4),
+ GPR_OFFSET_NAME(5),
+ GPR_OFFSET_NAME(6),
+ GPR_OFFSET_NAME(7),
+ GPR_OFFSET_NAME(8),
+ GPR_OFFSET_NAME(9),
+ GPR_OFFSET_NAME(10),
+ GPR_OFFSET_NAME(11),
+ GPR_OFFSET_NAME(12),
+ GPR_OFFSET_NAME(13),
+ GPR_OFFSET_NAME(14),
+ GPR_OFFSET_NAME(15),
+ GPR_OFFSET_NAME(16),
+ GPR_OFFSET_NAME(17),
+ GPR_OFFSET_NAME(18),
+ GPR_OFFSET_NAME(19),
+ GPR_OFFSET_NAME(20),
+ GPR_OFFSET_NAME(21),
+ GPR_OFFSET_NAME(22),
+ GPR_OFFSET_NAME(23),
+ GPR_OFFSET_NAME(24),
+ GPR_OFFSET_NAME(25),
+ GPR_OFFSET_NAME(26),
+ GPR_OFFSET_NAME(27),
+ GPR_OFFSET_NAME(28),
+ GPR_OFFSET_NAME(29),
+ GPR_OFFSET_NAME(30),
+ GPR_OFFSET_NAME(31),
+ REG_OFFSET_NAME(nip),
+ REG_OFFSET_NAME(msr),
+ REG_OFFSET_NAME(ctr),
+ REG_OFFSET_NAME(link),
+ REG_OFFSET_NAME(xer),
+ REG_OFFSET_NAME(ccr),
+#ifdef CONFIG_PPC64
+ REG_OFFSET_NAME(softe),
+#else
+ REG_OFFSET_NAME(mq),
+#endif
+ REG_OFFSET_NAME(trap),
+ REG_OFFSET_NAME(dar),
+ REG_OFFSET_NAME(dsisr),
+ REG_OFFSET_END,
+};
+
+/**
+ * regs_query_register_offset() - query register offset from its name
+ * @name: the name of a register
+ *
+ * regs_query_register_offset() returns the offset of a register in struct
+ * pt_regs from its name. If the name is invalid, this returns -EINVAL;
+ */
+int regs_query_register_offset(const char *name)
+{
+ const struct pt_regs_offset *roff;
+ for (roff = regoffset_table; roff->name != NULL; roff++)
+ if (!strcmp(roff->name, name))
+ return roff->offset;
+ return -EINVAL;
+}
+
+/**
+ * regs_query_register_name() - query register name from its offset
+ * @offset: the offset of a register in struct pt_regs.
+ *
+ * regs_query_register_name() returns the name of a register from its
+ * offset in struct pt_regs. If the @offset is invalid, this returns NULL;
+ */
+const char *regs_query_register_name(unsigned int offset)
+{
+ const struct pt_regs_offset *roff;
+ for (roff = regoffset_table; roff->name != NULL; roff++)
+ if (roff->offset == offset)
+ return roff->name;
+ return NULL;
+}
+
+/*
+ * does not yet catch signals sent when the child dies.
+ * in exit.c or in signal.c.
+ */
+
+/*
+ * Set of msr bits that gdb can change on behalf of a process.
+ */
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+#define MSR_DEBUGCHANGE 0
+#else
+#define MSR_DEBUGCHANGE (MSR_SE | MSR_BE)
+#endif
+
+/*
+ * Max register writeable via put_reg
+ */
+#ifdef CONFIG_PPC32
+#define PT_MAX_PUT_REG PT_MQ
+#else
+#define PT_MAX_PUT_REG PT_CCR
+#endif
+
+static unsigned long get_user_msr(struct task_struct *task)
+{
+ return task->thread.regs->msr | task->thread.fpexc_mode;
+}
+
+static int set_user_msr(struct task_struct *task, unsigned long msr)
+{
+ task->thread.regs->msr &= ~MSR_DEBUGCHANGE;
+ task->thread.regs->msr |= msr & MSR_DEBUGCHANGE;
+ return 0;
+}
+
+/*
+ * We prevent mucking around with the reserved area of trap
+ * which are used internally by the kernel.
+ */
+static int set_user_trap(struct task_struct *task, unsigned long trap)
+{
+ task->thread.regs->trap = trap & 0xfff0;
+ return 0;
+}
+
+/*
+ * Get contents of register REGNO in task TASK.
+ */
+unsigned long ptrace_get_reg(struct task_struct *task, int regno)
+{
+ if (task->thread.regs == NULL)
+ return -EIO;
+
+ if (regno == PT_MSR)
+ return get_user_msr(task);
+
+ if (regno < (sizeof(struct pt_regs) / sizeof(unsigned long)))
+ return ((unsigned long *)task->thread.regs)[regno];
+
+ return -EIO;
+}
+
+/*
+ * Write contents of register REGNO in task TASK.
+ */
+int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data)
+{
+ if (task->thread.regs == NULL)
+ return -EIO;
+
+ if (regno == PT_MSR)
+ return set_user_msr(task, data);
+ if (regno == PT_TRAP)
+ return set_user_trap(task, data);
+
+ if (regno <= PT_MAX_PUT_REG) {
+ ((unsigned long *)task->thread.regs)[regno] = data;
+ return 0;
+ }
+ return -EIO;
+}
+
+static int gpr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ int i, ret;
+
+ if (target->thread.regs == NULL)
+ return -EIO;
+
+ if (!FULL_REGS(target->thread.regs)) {
+ /* We have a partial register set. Fill 14-31 with bogus values */
+ for (i = 14; i < 32; i++)
+ target->thread.regs->gpr[i] = NV_REG_POISON;
+ }
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ target->thread.regs,
+ 0, offsetof(struct pt_regs, msr));
+ if (!ret) {
+ unsigned long msr = get_user_msr(target);
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr,
+ offsetof(struct pt_regs, msr),
+ offsetof(struct pt_regs, msr) +
+ sizeof(msr));
+ }
+
+ BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
+ offsetof(struct pt_regs, msr) + sizeof(long));
+
+ if (!ret)
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.regs->orig_gpr3,
+ offsetof(struct pt_regs, orig_gpr3),
+ sizeof(struct pt_regs));
+ if (!ret)
+ ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+ sizeof(struct pt_regs), -1);
+
+ return ret;
+}
+
+static int gpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ unsigned long reg;
+ int ret;
+
+ if (target->thread.regs == NULL)
+ return -EIO;
+
+ CHECK_FULL_REGS(target->thread.regs);
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ target->thread.regs,
+ 0, PT_MSR * sizeof(reg));
+
+ if (!ret && count > 0) {
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
+ PT_MSR * sizeof(reg),
+ (PT_MSR + 1) * sizeof(reg));
+ if (!ret)
+ ret = set_user_msr(target, reg);
+ }
+
+ BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
+ offsetof(struct pt_regs, msr) + sizeof(long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.regs->orig_gpr3,
+ PT_ORIG_R3 * sizeof(reg),
+ (PT_MAX_PUT_REG + 1) * sizeof(reg));
+
+ if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret)
+ ret = user_regset_copyin_ignore(
+ &pos, &count, &kbuf, &ubuf,
+ (PT_MAX_PUT_REG + 1) * sizeof(reg),
+ PT_TRAP * sizeof(reg));
+
+ if (!ret && count > 0) {
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
+ PT_TRAP * sizeof(reg),
+ (PT_TRAP + 1) * sizeof(reg));
+ if (!ret)
+ ret = set_user_trap(target, reg);
+ }
+
+ if (!ret)
+ ret = user_regset_copyin_ignore(
+ &pos, &count, &kbuf, &ubuf,
+ (PT_TRAP + 1) * sizeof(reg), -1);
+
+ return ret;
+}
+
+static int fpr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+#ifdef CONFIG_VSX
+ double buf[33];
+ int i;
+#endif
+ flush_fp_to_thread(target);
+
+#ifdef CONFIG_VSX
+ /* copy to local buffer then write that out */
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.TS_FPR(i);
+ memcpy(&buf[32], &target->thread.fpscr, sizeof(double));
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
+
+#else
+ BUILD_BUG_ON(offsetof(struct thread_struct, fpscr) !=
+ offsetof(struct thread_struct, TS_FPR(32)));
+
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fpr, 0, -1);
+#endif
+}
+
+static int fpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+#ifdef CONFIG_VSX
+ double buf[33];
+ int i;
+#endif
+ flush_fp_to_thread(target);
+
+#ifdef CONFIG_VSX
+ /* copy to local buffer then write that out */
+ i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
+ if (i)
+ return i;
+ for (i = 0; i < 32 ; i++)
+ target->thread.TS_FPR(i) = buf[i];
+ memcpy(&target->thread.fpscr, &buf[32], sizeof(double));
+ return 0;
+#else
+ BUILD_BUG_ON(offsetof(struct thread_struct, fpscr) !=
+ offsetof(struct thread_struct, TS_FPR(32)));
+
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fpr, 0, -1);
+#endif
+}
+
+#ifdef CONFIG_ALTIVEC
+/*
+ * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go.
+ * The transfer totals 34 quadword. Quadwords 0-31 contain the
+ * corresponding vector registers. Quadword 32 contains the vscr as the
+ * last word (offset 12) within that quadword. Quadword 33 contains the
+ * vrsave as the first word (offset 0) within the quadword.
+ *
+ * This definition of the VMX state is compatible with the current PPC32
+ * ptrace interface. This allows signal handling and ptrace to use the
+ * same structures. This also simplifies the implementation of a bi-arch
+ * (combined (32- and 64-bit) gdb.
+ */
+
+static int vr_active(struct task_struct *target,
+ const struct user_regset *regset)
+{
+ flush_altivec_to_thread(target);
+ return target->thread.used_vr ? regset->n : 0;
+}
+
+static int vr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ int ret;
+
+ flush_altivec_to_thread(target);
+
+ BUILD_BUG_ON(offsetof(struct thread_struct, vscr) !=
+ offsetof(struct thread_struct, vr[32]));
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.vr, 0,
+ 33 * sizeof(vector128));
+ if (!ret) {
+ /*
+ * Copy out only the low-order word of vrsave.
+ */
+ union {
+ elf_vrreg_t reg;
+ u32 word;
+ } vrsave;
+ memset(&vrsave, 0, sizeof(vrsave));
+ vrsave.word = target->thread.vrsave;
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave,
+ 33 * sizeof(vector128), -1);
+ }
+
+ return ret;
+}
+
+static int vr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ flush_altivec_to_thread(target);
+
+ BUILD_BUG_ON(offsetof(struct thread_struct, vscr) !=
+ offsetof(struct thread_struct, vr[32]));
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.vr, 0, 33 * sizeof(vector128));
+ if (!ret && count > 0) {
+ /*
+ * We use only the first word of vrsave.
+ */
+ union {
+ elf_vrreg_t reg;
+ u32 word;
+ } vrsave;
+ memset(&vrsave, 0, sizeof(vrsave));
+ vrsave.word = target->thread.vrsave;
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave,
+ 33 * sizeof(vector128), -1);
+ if (!ret)
+ target->thread.vrsave = vrsave.word;
+ }
+
+ return ret;
+}
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef CONFIG_VSX
+/*
+ * Currently to set and and get all the vsx state, you need to call
+ * the fp and VMX calls as well. This only get/sets the lower 32
+ * 128bit VSX registers.
+ */
+
+static int vsr_active(struct task_struct *target,
+ const struct user_regset *regset)
+{
+ flush_vsx_to_thread(target);
+ return target->thread.used_vsr ? regset->n : 0;
+}
+
+static int vsr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ double buf[32];
+ int ret, i;
+
+ flush_vsx_to_thread(target);
+
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.fpr[i][TS_VSRLOWOFFSET];
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ buf, 0, 32 * sizeof(double));
+
+ return ret;
+}
+
+static int vsr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ double buf[32];
+ int ret,i;
+
+ flush_vsx_to_thread(target);
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ buf, 0, 32 * sizeof(double));
+ for (i = 0; i < 32 ; i++)
+ target->thread.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+
+
+ return ret;
+}
+#endif /* CONFIG_VSX */
+
+#ifdef CONFIG_SPE
+
+/*
+ * For get_evrregs/set_evrregs functions 'data' has the following layout:
+ *
+ * struct {
+ * u32 evr[32];
+ * u64 acc;
+ * u32 spefscr;
+ * }
+ */
+
+static int evr_active(struct task_struct *target,
+ const struct user_regset *regset)
+{
+ flush_spe_to_thread(target);
+ return target->thread.used_spe ? regset->n : 0;
+}
+
+static int evr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ int ret;
+
+ flush_spe_to_thread(target);
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.evr,
+ 0, sizeof(target->thread.evr));
+
+ BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) !=
+ offsetof(struct thread_struct, spefscr));
+
+ if (!ret)
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.acc,
+ sizeof(target->thread.evr), -1);
+
+ return ret;
+}
+
+static int evr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ flush_spe_to_thread(target);
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.evr,
+ 0, sizeof(target->thread.evr));
+
+ BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) !=
+ offsetof(struct thread_struct, spefscr));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.acc,
+ sizeof(target->thread.evr), -1);
+
+ return ret;
+}
+#endif /* CONFIG_SPE */
+
+
+/*
+ * These are our native regset flavors.
+ */
+enum powerpc_regset {
+ REGSET_GPR,
+ REGSET_FPR,
+#ifdef CONFIG_ALTIVEC
+ REGSET_VMX,
+#endif
+#ifdef CONFIG_VSX
+ REGSET_VSX,
+#endif
+#ifdef CONFIG_SPE
+ REGSET_SPE,
+#endif
+};
+
+static const struct user_regset native_regsets[] = {
+ [REGSET_GPR] = {
+ .core_note_type = NT_PRSTATUS, .n = ELF_NGREG,
+ .size = sizeof(long), .align = sizeof(long),
+ .get = gpr_get, .set = gpr_set
+ },
+ [REGSET_FPR] = {
+ .core_note_type = NT_PRFPREG, .n = ELF_NFPREG,
+ .size = sizeof(double), .align = sizeof(double),
+ .get = fpr_get, .set = fpr_set
+ },
+#ifdef CONFIG_ALTIVEC
+ [REGSET_VMX] = {
+ .core_note_type = NT_PPC_VMX, .n = 34,
+ .size = sizeof(vector128), .align = sizeof(vector128),
+ .active = vr_active, .get = vr_get, .set = vr_set
+ },
+#endif
+#ifdef CONFIG_VSX
+ [REGSET_VSX] = {
+ .core_note_type = NT_PPC_VSX, .n = 32,
+ .size = sizeof(double), .align = sizeof(double),
+ .active = vsr_active, .get = vsr_get, .set = vsr_set
+ },
+#endif
+#ifdef CONFIG_SPE
+ [REGSET_SPE] = {
+ .n = 35,
+ .size = sizeof(u32), .align = sizeof(u32),
+ .active = evr_active, .get = evr_get, .set = evr_set
+ },
+#endif
+};
+
+static const struct user_regset_view user_ppc_native_view = {
+ .name = UTS_MACHINE, .e_machine = ELF_ARCH, .ei_osabi = ELF_OSABI,
+ .regsets = native_regsets, .n = ARRAY_SIZE(native_regsets)
+};
+
+#ifdef CONFIG_PPC64
+#include <linux/compat.h>
+
+static int gpr32_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ const unsigned long *regs = &target->thread.regs->gpr[0];
+ compat_ulong_t *k = kbuf;
+ compat_ulong_t __user *u = ubuf;
+ compat_ulong_t reg;
+ int i;
+
+ if (target->thread.regs == NULL)
+ return -EIO;
+
+ if (!FULL_REGS(target->thread.regs)) {
+ /* We have a partial register set. Fill 14-31 with bogus values */
+ for (i = 14; i < 32; i++)
+ target->thread.regs->gpr[i] = NV_REG_POISON;
+ }
+
+ pos /= sizeof(reg);
+ count /= sizeof(reg);
+
+ if (kbuf)
+ for (; count > 0 && pos < PT_MSR; --count)
+ *k++ = regs[pos++];
+ else
+ for (; count > 0 && pos < PT_MSR; --count)
+ if (__put_user((compat_ulong_t) regs[pos++], u++))
+ return -EFAULT;
+
+ if (count > 0 && pos == PT_MSR) {
+ reg = get_user_msr(target);
+ if (kbuf)
+ *k++ = reg;
+ else if (__put_user(reg, u++))
+ return -EFAULT;
+ ++pos;
+ --count;
+ }
+
+ if (kbuf)
+ for (; count > 0 && pos < PT_REGS_COUNT; --count)
+ *k++ = regs[pos++];
+ else
+ for (; count > 0 && pos < PT_REGS_COUNT; --count)
+ if (__put_user((compat_ulong_t) regs[pos++], u++))
+ return -EFAULT;
+
+ kbuf = k;
+ ubuf = u;
+ pos *= sizeof(reg);
+ count *= sizeof(reg);
+ return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+ PT_REGS_COUNT * sizeof(reg), -1);
+}
+
+static int gpr32_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ unsigned long *regs = &target->thread.regs->gpr[0];
+ const compat_ulong_t *k = kbuf;
+ const compat_ulong_t __user *u = ubuf;
+ compat_ulong_t reg;
+
+ if (target->thread.regs == NULL)
+ return -EIO;
+
+ CHECK_FULL_REGS(target->thread.regs);
+
+ pos /= sizeof(reg);
+ count /= sizeof(reg);
+
+ if (kbuf)
+ for (; count > 0 && pos < PT_MSR; --count)
+ regs[pos++] = *k++;
+ else
+ for (; count > 0 && pos < PT_MSR; --count) {
+ if (__get_user(reg, u++))
+ return -EFAULT;
+ regs[pos++] = reg;
+ }
+
+
+ if (count > 0 && pos == PT_MSR) {
+ if (kbuf)
+ reg = *k++;
+ else if (__get_user(reg, u++))
+ return -EFAULT;
+ set_user_msr(target, reg);
+ ++pos;
+ --count;
+ }
+
+ if (kbuf) {
+ for (; count > 0 && pos <= PT_MAX_PUT_REG; --count)
+ regs[pos++] = *k++;
+ for (; count > 0 && pos < PT_TRAP; --count, ++pos)
+ ++k;
+ } else {
+ for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) {
+ if (__get_user(reg, u++))
+ return -EFAULT;
+ regs[pos++] = reg;
+ }
+ for (; count > 0 && pos < PT_TRAP; --count, ++pos)
+ if (__get_user(reg, u++))
+ return -EFAULT;
+ }
+
+ if (count > 0 && pos == PT_TRAP) {
+ if (kbuf)
+ reg = *k++;
+ else if (__get_user(reg, u++))
+ return -EFAULT;
+ set_user_trap(target, reg);
+ ++pos;
+ --count;
+ }
+
+ kbuf = k;
+ ubuf = u;
+ pos *= sizeof(reg);
+ count *= sizeof(reg);
+ return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+ (PT_TRAP + 1) * sizeof(reg), -1);
+}
+
+/*
+ * These are the regset flavors matching the CONFIG_PPC32 native set.
+ */
+static const struct user_regset compat_regsets[] = {
+ [REGSET_GPR] = {
+ .core_note_type = NT_PRSTATUS, .n = ELF_NGREG,
+ .size = sizeof(compat_long_t), .align = sizeof(compat_long_t),
+ .get = gpr32_get, .set = gpr32_set
+ },
+ [REGSET_FPR] = {
+ .core_note_type = NT_PRFPREG, .n = ELF_NFPREG,
+ .size = sizeof(double), .align = sizeof(double),
+ .get = fpr_get, .set = fpr_set
+ },
+#ifdef CONFIG_ALTIVEC
+ [REGSET_VMX] = {
+ .core_note_type = NT_PPC_VMX, .n = 34,
+ .size = sizeof(vector128), .align = sizeof(vector128),
+ .active = vr_active, .get = vr_get, .set = vr_set
+ },
+#endif
+#ifdef CONFIG_SPE
+ [REGSET_SPE] = {
+ .core_note_type = NT_PPC_SPE, .n = 35,
+ .size = sizeof(u32), .align = sizeof(u32),
+ .active = evr_active, .get = evr_get, .set = evr_set
+ },
+#endif
+};
+
+static const struct user_regset_view user_ppc_compat_view = {
+ .name = "ppc", .e_machine = EM_PPC, .ei_osabi = ELF_OSABI,
+ .regsets = compat_regsets, .n = ARRAY_SIZE(compat_regsets)
+};
+#endif /* CONFIG_PPC64 */
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+#ifdef CONFIG_PPC64
+ if (test_tsk_thread_flag(task, TIF_32BIT))
+ return &user_ppc_compat_view;
+#endif
+ return &user_ppc_native_view;
+}
+
+
+void user_enable_single_step(struct task_struct *task)
+{
+ struct pt_regs *regs = task->thread.regs;
+
+ if (regs != NULL) {
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+ task->thread.dbcr0 &= ~DBCR0_BT;
+ task->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC;
+ regs->msr |= MSR_DE;
+#else
+ regs->msr &= ~MSR_BE;
+ regs->msr |= MSR_SE;
+#endif
+ }
+ set_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void user_enable_block_step(struct task_struct *task)
+{
+ struct pt_regs *regs = task->thread.regs;
+
+ if (regs != NULL) {
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+ task->thread.dbcr0 &= ~DBCR0_IC;
+ task->thread.dbcr0 = DBCR0_IDM | DBCR0_BT;
+ regs->msr |= MSR_DE;
+#else
+ regs->msr &= ~MSR_SE;
+ regs->msr |= MSR_BE;
+#endif
+ }
+ set_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void user_disable_single_step(struct task_struct *task)
+{
+ struct pt_regs *regs = task->thread.regs;
+
+ if (regs != NULL) {
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+ /*
+ * The logic to disable single stepping should be as
+ * simple as turning off the Instruction Complete flag.
+ * And, after doing so, if all debug flags are off, turn
+ * off DBCR0(IDM) and MSR(DE) .... Torez
+ */
+ task->thread.dbcr0 &= ~DBCR0_IC;
+ /*
+ * Test to see if any of the DBCR_ACTIVE_EVENTS bits are set.
+ */
+ if (!DBCR_ACTIVE_EVENTS(task->thread.dbcr0,
+ task->thread.dbcr1)) {
+ /*
+ * All debug events were off.....
+ */
+ task->thread.dbcr0 &= ~DBCR0_IDM;
+ regs->msr &= ~MSR_DE;
+ }
+#else
+ regs->msr &= ~(MSR_SE | MSR_BE);
+#endif
+ }
+ clear_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+void ptrace_triggered(struct perf_event *bp,
+ struct perf_sample_data *data, struct pt_regs *regs)
+{
+ struct perf_event_attr attr;
+
+ /*
+ * Disable the breakpoint request here since ptrace has defined a
+ * one-shot behaviour for breakpoint exceptions in PPC64.
+ * The SIGTRAP signal is generated automatically for us in do_dabr().
+ * We don't have to do anything about that here
+ */
+ attr = bp->attr;
+ attr.disabled = true;
+ modify_user_hw_breakpoint(bp, &attr);
+}
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
+int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
+ unsigned long data)
+{
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ int ret;
+ struct thread_struct *thread = &(task->thread);
+ struct perf_event *bp;
+ struct perf_event_attr attr;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
+ /* For ppc64 we support one DABR and no IABR's at the moment (ppc64).
+ * For embedded processors we support one DAC and no IAC's at the
+ * moment.
+ */
+ if (addr > 0)
+ return -EINVAL;
+
+ /* The bottom 3 bits in dabr are flags */
+ if ((data & ~0x7UL) >= TASK_SIZE)
+ return -EIO;
+
+#ifndef CONFIG_PPC_ADV_DEBUG_REGS
+ /* For processors using DABR (i.e. 970), the bottom 3 bits are flags.
+ * It was assumed, on previous implementations, that 3 bits were
+ * passed together with the data address, fitting the design of the
+ * DABR register, as follows:
+ *
+ * bit 0: Read flag
+ * bit 1: Write flag
+ * bit 2: Breakpoint translation
+ *
+ * Thus, we use them here as so.
+ */
+
+ /* Ensure breakpoint translation bit is set */
+ if (data && !(data & DABR_TRANSLATION))
+ return -EIO;
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ if (ptrace_get_breakpoints(task) < 0)
+ return -ESRCH;
+
+ bp = thread->ptrace_bps[0];
+ if ((!data) || !(data & (DABR_DATA_WRITE | DABR_DATA_READ))) {
+ if (bp) {
+ unregister_hw_breakpoint(bp);
+ thread->ptrace_bps[0] = NULL;
+ }
+ ptrace_put_breakpoints(task);
+ return 0;
+ }
+ if (bp) {
+ attr = bp->attr;
+ attr.bp_addr = data & ~HW_BREAKPOINT_ALIGN;
+ arch_bp_generic_fields(data &
+ (DABR_DATA_WRITE | DABR_DATA_READ),
+ &attr.bp_type);
+ ret = modify_user_hw_breakpoint(bp, &attr);
+ if (ret) {
+ ptrace_put_breakpoints(task);
+ return ret;
+ }
+ thread->ptrace_bps[0] = bp;
+ ptrace_put_breakpoints(task);
+ thread->dabr = data;
+ return 0;
+ }
+
+ /* Create a new breakpoint request if one doesn't exist already */
+ hw_breakpoint_init(&attr);
+ attr.bp_addr = data & ~HW_BREAKPOINT_ALIGN;
+ arch_bp_generic_fields(data & (DABR_DATA_WRITE | DABR_DATA_READ),
+ &attr.bp_type);
+
+ thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr,
+ ptrace_triggered, NULL, task);
+ if (IS_ERR(bp)) {
+ thread->ptrace_bps[0] = NULL;
+ ptrace_put_breakpoints(task);
+ return PTR_ERR(bp);
+ }
+
+ ptrace_put_breakpoints(task);
+
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
+ /* Move contents to the DABR register */
+ task->thread.dabr = data;
+#else /* CONFIG_PPC_ADV_DEBUG_REGS */
+ /* As described above, it was assumed 3 bits were passed with the data
+ * address, but we will assume only the mode bits will be passed
+ * as to not cause alignment restrictions for DAC-based processors.
+ */
+
+ /* DAC's hold the whole address without any mode flags */
+ task->thread.dac1 = data & ~0x3UL;
+
+ if (task->thread.dac1 == 0) {
+ dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W);
+ if (!DBCR_ACTIVE_EVENTS(task->thread.dbcr0,
+ task->thread.dbcr1)) {
+ task->thread.regs->msr &= ~MSR_DE;
+ task->thread.dbcr0 &= ~DBCR0_IDM;
+ }
+ return 0;
+ }
+
+ /* Read or Write bits must be set */
+
+ if (!(data & 0x3UL))
+ return -EINVAL;
+
+ /* Set the Internal Debugging flag (IDM bit 1) for the DBCR0
+ register */
+ task->thread.dbcr0 |= DBCR0_IDM;
+
+ /* Check for write and read flags and set DBCR0
+ accordingly */
+ dbcr_dac(task) &= ~(DBCR_DAC1R|DBCR_DAC1W);
+ if (data & 0x1UL)
+ dbcr_dac(task) |= DBCR_DAC1R;
+ if (data & 0x2UL)
+ dbcr_dac(task) |= DBCR_DAC1W;
+ task->thread.regs->msr |= MSR_DE;
+#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
+ return 0;
+}
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Make sure single step bits etc are not set.
+ */
+void ptrace_disable(struct task_struct *child)
+{
+ /* make sure the single step bit is not set. */
+ user_disable_single_step(child);
+}
+
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+static long set_intruction_bp(struct task_struct *child,
+ struct ppc_hw_breakpoint *bp_info)
+{
+ int slot;
+ int slot1_in_use = ((child->thread.dbcr0 & DBCR0_IAC1) != 0);
+ int slot2_in_use = ((child->thread.dbcr0 & DBCR0_IAC2) != 0);
+ int slot3_in_use = ((child->thread.dbcr0 & DBCR0_IAC3) != 0);
+ int slot4_in_use = ((child->thread.dbcr0 & DBCR0_IAC4) != 0);
+
+ if (dbcr_iac_range(child) & DBCR_IAC12MODE)
+ slot2_in_use = 1;
+ if (dbcr_iac_range(child) & DBCR_IAC34MODE)
+ slot4_in_use = 1;
+
+ if (bp_info->addr >= TASK_SIZE)
+ return -EIO;
+
+ if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) {
+
+ /* Make sure range is valid. */
+ if (bp_info->addr2 >= TASK_SIZE)
+ return -EIO;
+
+ /* We need a pair of IAC regsisters */
+ if ((!slot1_in_use) && (!slot2_in_use)) {
+ slot = 1;
+ child->thread.iac1 = bp_info->addr;
+ child->thread.iac2 = bp_info->addr2;
+ child->thread.dbcr0 |= DBCR0_IAC1;
+ if (bp_info->addr_mode ==
+ PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
+ dbcr_iac_range(child) |= DBCR_IAC12X;
+ else
+ dbcr_iac_range(child) |= DBCR_IAC12I;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+ } else if ((!slot3_in_use) && (!slot4_in_use)) {
+ slot = 3;
+ child->thread.iac3 = bp_info->addr;
+ child->thread.iac4 = bp_info->addr2;
+ child->thread.dbcr0 |= DBCR0_IAC3;
+ if (bp_info->addr_mode ==
+ PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
+ dbcr_iac_range(child) |= DBCR_IAC34X;
+ else
+ dbcr_iac_range(child) |= DBCR_IAC34I;
+#endif
+ } else
+ return -ENOSPC;
+ } else {
+ /* We only need one. If possible leave a pair free in
+ * case a range is needed later
+ */
+ if (!slot1_in_use) {
+ /*
+ * Don't use iac1 if iac1-iac2 are free and either
+ * iac3 or iac4 (but not both) are free
+ */
+ if (slot2_in_use || (slot3_in_use == slot4_in_use)) {
+ slot = 1;
+ child->thread.iac1 = bp_info->addr;
+ child->thread.dbcr0 |= DBCR0_IAC1;
+ goto out;
+ }
+ }
+ if (!slot2_in_use) {
+ slot = 2;
+ child->thread.iac2 = bp_info->addr;
+ child->thread.dbcr0 |= DBCR0_IAC2;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+ } else if (!slot3_in_use) {
+ slot = 3;
+ child->thread.iac3 = bp_info->addr;
+ child->thread.dbcr0 |= DBCR0_IAC3;
+ } else if (!slot4_in_use) {
+ slot = 4;
+ child->thread.iac4 = bp_info->addr;
+ child->thread.dbcr0 |= DBCR0_IAC4;
+#endif
+ } else
+ return -ENOSPC;
+ }
+out:
+ child->thread.dbcr0 |= DBCR0_IDM;
+ child->thread.regs->msr |= MSR_DE;
+
+ return slot;
+}
+
+static int del_instruction_bp(struct task_struct *child, int slot)
+{
+ switch (slot) {
+ case 1:
+ if ((child->thread.dbcr0 & DBCR0_IAC1) == 0)
+ return -ENOENT;
+
+ if (dbcr_iac_range(child) & DBCR_IAC12MODE) {
+ /* address range - clear slots 1 & 2 */
+ child->thread.iac2 = 0;
+ dbcr_iac_range(child) &= ~DBCR_IAC12MODE;
+ }
+ child->thread.iac1 = 0;
+ child->thread.dbcr0 &= ~DBCR0_IAC1;
+ break;
+ case 2:
+ if ((child->thread.dbcr0 & DBCR0_IAC2) == 0)
+ return -ENOENT;
+
+ if (dbcr_iac_range(child) & DBCR_IAC12MODE)
+ /* used in a range */
+ return -EINVAL;
+ child->thread.iac2 = 0;
+ child->thread.dbcr0 &= ~DBCR0_IAC2;
+ break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+ case 3:
+ if ((child->thread.dbcr0 & DBCR0_IAC3) == 0)
+ return -ENOENT;
+
+ if (dbcr_iac_range(child) & DBCR_IAC34MODE) {
+ /* address range - clear slots 3 & 4 */
+ child->thread.iac4 = 0;
+ dbcr_iac_range(child) &= ~DBCR_IAC34MODE;
+ }
+ child->thread.iac3 = 0;
+ child->thread.dbcr0 &= ~DBCR0_IAC3;
+ break;
+ case 4:
+ if ((child->thread.dbcr0 & DBCR0_IAC4) == 0)
+ return -ENOENT;
+
+ if (dbcr_iac_range(child) & DBCR_IAC34MODE)
+ /* Used in a range */
+ return -EINVAL;
+ child->thread.iac4 = 0;
+ child->thread.dbcr0 &= ~DBCR0_IAC4;
+ break;
+#endif
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)
+{
+ int byte_enable =
+ (bp_info->condition_mode >> PPC_BREAKPOINT_CONDITION_BE_SHIFT)
+ & 0xf;
+ int condition_mode =
+ bp_info->condition_mode & PPC_BREAKPOINT_CONDITION_MODE;
+ int slot;
+
+ if (byte_enable && (condition_mode == 0))
+ return -EINVAL;
+
+ if (bp_info->addr >= TASK_SIZE)
+ return -EIO;
+
+ if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) {
+ slot = 1;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
+ dbcr_dac(child) |= DBCR_DAC1R;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
+ dbcr_dac(child) |= DBCR_DAC1W;
+ child->thread.dac1 = (unsigned long)bp_info->addr;
+#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
+ if (byte_enable) {
+ child->thread.dvc1 =
+ (unsigned long)bp_info->condition_value;
+ child->thread.dbcr2 |=
+ ((byte_enable << DBCR2_DVC1BE_SHIFT) |
+ (condition_mode << DBCR2_DVC1M_SHIFT));
+ }
+#endif
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+ } else if (child->thread.dbcr2 & DBCR2_DAC12MODE) {
+ /* Both dac1 and dac2 are part of a range */
+ return -ENOSPC;
+#endif
+ } else if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) {
+ slot = 2;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
+ dbcr_dac(child) |= DBCR_DAC2R;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
+ dbcr_dac(child) |= DBCR_DAC2W;
+ child->thread.dac2 = (unsigned long)bp_info->addr;
+#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
+ if (byte_enable) {
+ child->thread.dvc2 =
+ (unsigned long)bp_info->condition_value;
+ child->thread.dbcr2 |=
+ ((byte_enable << DBCR2_DVC2BE_SHIFT) |
+ (condition_mode << DBCR2_DVC2M_SHIFT));
+ }
+#endif
+ } else
+ return -ENOSPC;
+ child->thread.dbcr0 |= DBCR0_IDM;
+ child->thread.regs->msr |= MSR_DE;
+
+ return slot + 4;
+}
+
+static int del_dac(struct task_struct *child, int slot)
+{
+ if (slot == 1) {
+ if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0)
+ return -ENOENT;
+
+ child->thread.dac1 = 0;
+ dbcr_dac(child) &= ~(DBCR_DAC1R | DBCR_DAC1W);
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+ if (child->thread.dbcr2 & DBCR2_DAC12MODE) {
+ child->thread.dac2 = 0;
+ child->thread.dbcr2 &= ~DBCR2_DAC12MODE;
+ }
+ child->thread.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE);
+#endif
+#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
+ child->thread.dvc1 = 0;
+#endif
+ } else if (slot == 2) {
+ if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0)
+ return -ENOENT;
+
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+ if (child->thread.dbcr2 & DBCR2_DAC12MODE)
+ /* Part of a range */
+ return -EINVAL;
+ child->thread.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE);
+#endif
+#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
+ child->thread.dvc2 = 0;
+#endif
+ child->thread.dac2 = 0;
+ dbcr_dac(child) &= ~(DBCR_DAC2R | DBCR_DAC2W);
+ } else
+ return -EINVAL;
+
+ return 0;
+}
+#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
+
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+static int set_dac_range(struct task_struct *child,
+ struct ppc_hw_breakpoint *bp_info)
+{
+ int mode = bp_info->addr_mode & PPC_BREAKPOINT_MODE_MASK;
+
+ /* We don't allow range watchpoints to be used with DVC */
+ if (bp_info->condition_mode)
+ return -EINVAL;
+
+ /*
+ * Best effort to verify the address range. The user/supervisor bits
+ * prevent trapping in kernel space, but let's fail on an obvious bad
+ * range. The simple test on the mask is not fool-proof, and any
+ * exclusive range will spill over into kernel space.
+ */
+ if (bp_info->addr >= TASK_SIZE)
+ return -EIO;
+ if (mode == PPC_BREAKPOINT_MODE_MASK) {
+ /*
+ * dac2 is a bitmask. Don't allow a mask that makes a
+ * kernel space address from a valid dac1 value
+ */
+ if (~((unsigned long)bp_info->addr2) >= TASK_SIZE)
+ return -EIO;
+ } else {
+ /*
+ * For range breakpoints, addr2 must also be a valid address
+ */
+ if (bp_info->addr2 >= TASK_SIZE)
+ return -EIO;
+ }
+
+ if (child->thread.dbcr0 &
+ (DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W))
+ return -ENOSPC;
+
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
+ child->thread.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM);
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
+ child->thread.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM);
+ child->thread.dac1 = bp_info->addr;
+ child->thread.dac2 = bp_info->addr2;
+ if (mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE)
+ child->thread.dbcr2 |= DBCR2_DAC12M;
+ else if (mode == PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
+ child->thread.dbcr2 |= DBCR2_DAC12MX;
+ else /* PPC_BREAKPOINT_MODE_MASK */
+ child->thread.dbcr2 |= DBCR2_DAC12MM;
+ child->thread.regs->msr |= MSR_DE;
+
+ return 5;
+}
+#endif /* CONFIG_PPC_ADV_DEBUG_DAC_RANGE */
+
+static long ppc_set_hwdebug(struct task_struct *child,
+ struct ppc_hw_breakpoint *bp_info)
+{
+#ifndef CONFIG_PPC_ADV_DEBUG_REGS
+ unsigned long dabr;
+#endif
+
+ if (bp_info->version != 1)
+ return -ENOTSUPP;
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+ /*
+ * Check for invalid flags and combinations
+ */
+ if ((bp_info->trigger_type == 0) ||
+ (bp_info->trigger_type & ~(PPC_BREAKPOINT_TRIGGER_EXECUTE |
+ PPC_BREAKPOINT_TRIGGER_RW)) ||
+ (bp_info->addr_mode & ~PPC_BREAKPOINT_MODE_MASK) ||
+ (bp_info->condition_mode &
+ ~(PPC_BREAKPOINT_CONDITION_MODE |
+ PPC_BREAKPOINT_CONDITION_BE_ALL)))
+ return -EINVAL;
+#if CONFIG_PPC_ADV_DEBUG_DVCS == 0
+ if (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)
+ return -EINVAL;
+#endif
+
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_EXECUTE) {
+ if ((bp_info->trigger_type != PPC_BREAKPOINT_TRIGGER_EXECUTE) ||
+ (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE))
+ return -EINVAL;
+ return set_intruction_bp(child, bp_info);
+ }
+ if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT)
+ return set_dac(child, bp_info);
+
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+ return set_dac_range(child, bp_info);
+#else
+ return -EINVAL;
+#endif
+#else /* !CONFIG_PPC_ADV_DEBUG_DVCS */
+ /*
+ * We only support one data breakpoint
+ */
+ if ((bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_RW) == 0 ||
+ (bp_info->trigger_type & ~PPC_BREAKPOINT_TRIGGER_RW) != 0 ||
+ bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT ||
+ bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)
+ return -EINVAL;
+
+ if (child->thread.dabr)
+ return -ENOSPC;
+
+ if ((unsigned long)bp_info->addr >= TASK_SIZE)
+ return -EIO;
+
+ dabr = (unsigned long)bp_info->addr & ~7UL;
+ dabr |= DABR_TRANSLATION;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
+ dabr |= DABR_DATA_READ;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
+ dabr |= DABR_DATA_WRITE;
+
+ child->thread.dabr = dabr;
+
+ return 1;
+#endif /* !CONFIG_PPC_ADV_DEBUG_DVCS */
+}
+
+static long ppc_del_hwdebug(struct task_struct *child, long addr, long data)
+{
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+ int rc;
+
+ if (data <= 4)
+ rc = del_instruction_bp(child, (int)data);
+ else
+ rc = del_dac(child, (int)data - 4);
+
+ if (!rc) {
+ if (!DBCR_ACTIVE_EVENTS(child->thread.dbcr0,
+ child->thread.dbcr1)) {
+ child->thread.dbcr0 &= ~DBCR0_IDM;
+ child->thread.regs->msr &= ~MSR_DE;
+ }
+ }
+ return rc;
+#else
+ if (data != 1)
+ return -EINVAL;
+ if (child->thread.dabr == 0)
+ return -ENOENT;
+
+ child->thread.dabr = 0;
+
+ return 0;
+#endif
+}
+
+/*
+ * Here are the old "legacy" powerpc specific getregs/setregs ptrace calls,
+ * we mark them as obsolete now, they will be removed in a future version
+ */
+static long arch_ptrace_old(struct task_struct *child, long request,
+ unsigned long addr, unsigned long data)
+{
+ void __user *datavp = (void __user *) data;
+
+ switch (request) {
+ case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */
+ return copy_regset_to_user(child, &user_ppc_native_view,
+ REGSET_GPR, 0, 32 * sizeof(long),
+ datavp);
+
+ case PPC_PTRACE_SETREGS: /* Set GPRs 0 - 31. */
+ return copy_regset_from_user(child, &user_ppc_native_view,
+ REGSET_GPR, 0, 32 * sizeof(long),
+ datavp);
+
+ case PPC_PTRACE_GETFPREGS: /* Get FPRs 0 - 31. */
+ return copy_regset_to_user(child, &user_ppc_native_view,
+ REGSET_FPR, 0, 32 * sizeof(double),
+ datavp);
+
+ case PPC_PTRACE_SETFPREGS: /* Set FPRs 0 - 31. */
+ return copy_regset_from_user(child, &user_ppc_native_view,
+ REGSET_FPR, 0, 32 * sizeof(double),
+ datavp);
+ }
+
+ return -EPERM;
+}
+
+long arch_ptrace(struct task_struct *child, long request,
+ unsigned long addr, unsigned long data)
+{
+ int ret = -EPERM;
+ void __user *datavp = (void __user *) data;
+ unsigned long __user *datalp = datavp;
+
+ switch (request) {
+ /* read the word at location addr in the USER area. */
+ case PTRACE_PEEKUSR: {
+ unsigned long index, tmp;
+
+ ret = -EIO;
+ /* convert to index and check */
+#ifdef CONFIG_PPC32
+ index = addr >> 2;
+ if ((addr & 3) || (index > PT_FPSCR)
+ || (child->thread.regs == NULL))
+#else
+ index = addr >> 3;
+ if ((addr & 7) || (index > PT_FPSCR))
+#endif
+ break;
+
+ CHECK_FULL_REGS(child->thread.regs);
+ if (index < PT_FPR0) {
+ tmp = ptrace_get_reg(child, (int) index);
+ } else {
+ unsigned int fpidx = index - PT_FPR0;
+
+ flush_fp_to_thread(child);
+ if (fpidx < (PT_FPSCR - PT_FPR0))
+ tmp = ((unsigned long *)child->thread.fpr)
+ [fpidx * TS_FPRWIDTH];
+ else
+ tmp = child->thread.fpscr.val;
+ }
+ ret = put_user(tmp, datalp);
+ break;
+ }
+
+ /* write the word at location addr in the USER area */
+ case PTRACE_POKEUSR: {
+ unsigned long index;
+
+ ret = -EIO;
+ /* convert to index and check */
+#ifdef CONFIG_PPC32
+ index = addr >> 2;
+ if ((addr & 3) || (index > PT_FPSCR)
+ || (child->thread.regs == NULL))
+#else
+ index = addr >> 3;
+ if ((addr & 7) || (index > PT_FPSCR))
+#endif
+ break;
+
+ CHECK_FULL_REGS(child->thread.regs);
+ if (index < PT_FPR0) {
+ ret = ptrace_put_reg(child, index, data);
+ } else {
+ unsigned int fpidx = index - PT_FPR0;
+
+ flush_fp_to_thread(child);
+ if (fpidx < (PT_FPSCR - PT_FPR0))
+ ((unsigned long *)child->thread.fpr)
+ [fpidx * TS_FPRWIDTH] = data;
+ else
+ child->thread.fpscr.val = data;
+ ret = 0;
+ }
+ break;
+ }
+
+ case PPC_PTRACE_GETHWDBGINFO: {
+ struct ppc_debug_info dbginfo;
+
+ dbginfo.version = 1;
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+ dbginfo.num_instruction_bps = CONFIG_PPC_ADV_DEBUG_IACS;
+ dbginfo.num_data_bps = CONFIG_PPC_ADV_DEBUG_DACS;
+ dbginfo.num_condition_regs = CONFIG_PPC_ADV_DEBUG_DVCS;
+ dbginfo.data_bp_alignment = 4;
+ dbginfo.sizeof_condition = 4;
+ dbginfo.features = PPC_DEBUG_FEATURE_INSN_BP_RANGE |
+ PPC_DEBUG_FEATURE_INSN_BP_MASK;
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+ dbginfo.features |=
+ PPC_DEBUG_FEATURE_DATA_BP_RANGE |
+ PPC_DEBUG_FEATURE_DATA_BP_MASK;
+#endif
+#else /* !CONFIG_PPC_ADV_DEBUG_REGS */
+ dbginfo.num_instruction_bps = 0;
+ dbginfo.num_data_bps = 1;
+ dbginfo.num_condition_regs = 0;
+#ifdef CONFIG_PPC64
+ dbginfo.data_bp_alignment = 8;
+#else
+ dbginfo.data_bp_alignment = 4;
+#endif
+ dbginfo.sizeof_condition = 0;
+ dbginfo.features = 0;
+#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
+
+ if (!access_ok(VERIFY_WRITE, datavp,
+ sizeof(struct ppc_debug_info)))
+ return -EFAULT;
+ ret = __copy_to_user(datavp, &dbginfo,
+ sizeof(struct ppc_debug_info)) ?
+ -EFAULT : 0;
+ break;
+ }
+
+ case PPC_PTRACE_SETHWDEBUG: {
+ struct ppc_hw_breakpoint bp_info;
+
+ if (!access_ok(VERIFY_READ, datavp,
+ sizeof(struct ppc_hw_breakpoint)))
+ return -EFAULT;
+ ret = __copy_from_user(&bp_info, datavp,
+ sizeof(struct ppc_hw_breakpoint)) ?
+ -EFAULT : 0;
+ if (!ret)
+ ret = ppc_set_hwdebug(child, &bp_info);
+ break;
+ }
+
+ case PPC_PTRACE_DELHWDEBUG: {
+ ret = ppc_del_hwdebug(child, addr, data);
+ break;
+ }
+
+ case PTRACE_GET_DEBUGREG: {
+ ret = -EINVAL;
+ /* We only support one DABR and no IABRS at the moment */
+ if (addr > 0)
+ break;
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+ ret = put_user(child->thread.dac1, datalp);
+#else
+ ret = put_user(child->thread.dabr, datalp);
+#endif
+ break;
+ }
+
+ case PTRACE_SET_DEBUGREG:
+ ret = ptrace_set_debugreg(child, addr, data);
+ break;
+
+#ifdef CONFIG_PPC64
+ case PTRACE_GETREGS64:
+#endif
+ case PTRACE_GETREGS: /* Get all pt_regs from the child. */
+ return copy_regset_to_user(child, &user_ppc_native_view,
+ REGSET_GPR,
+ 0, sizeof(struct pt_regs),
+ datavp);
+
+#ifdef CONFIG_PPC64
+ case PTRACE_SETREGS64:
+#endif
+ case PTRACE_SETREGS: /* Set all gp regs in the child. */
+ return copy_regset_from_user(child, &user_ppc_native_view,
+ REGSET_GPR,
+ 0, sizeof(struct pt_regs),
+ datavp);
+
+ case PTRACE_GETFPREGS: /* Get the child FPU state (FPR0...31 + FPSCR) */
+ return copy_regset_to_user(child, &user_ppc_native_view,
+ REGSET_FPR,
+ 0, sizeof(elf_fpregset_t),
+ datavp);
+
+ case PTRACE_SETFPREGS: /* Set the child FPU state (FPR0...31 + FPSCR) */
+ return copy_regset_from_user(child, &user_ppc_native_view,
+ REGSET_FPR,
+ 0, sizeof(elf_fpregset_t),
+ datavp);
+
+#ifdef CONFIG_ALTIVEC
+ case PTRACE_GETVRREGS:
+ return copy_regset_to_user(child, &user_ppc_native_view,
+ REGSET_VMX,
+ 0, (33 * sizeof(vector128) +
+ sizeof(u32)),
+ datavp);
+
+ case PTRACE_SETVRREGS:
+ return copy_regset_from_user(child, &user_ppc_native_view,
+ REGSET_VMX,
+ 0, (33 * sizeof(vector128) +
+ sizeof(u32)),
+ datavp);
+#endif
+#ifdef CONFIG_VSX
+ case PTRACE_GETVSRREGS:
+ return copy_regset_to_user(child, &user_ppc_native_view,
+ REGSET_VSX,
+ 0, 32 * sizeof(double),
+ datavp);
+
+ case PTRACE_SETVSRREGS:
+ return copy_regset_from_user(child, &user_ppc_native_view,
+ REGSET_VSX,
+ 0, 32 * sizeof(double),
+ datavp);
+#endif
+#ifdef CONFIG_SPE
+ case PTRACE_GETEVRREGS:
+ /* Get the child spe register state. */
+ return copy_regset_to_user(child, &user_ppc_native_view,
+ REGSET_SPE, 0, 35 * sizeof(u32),
+ datavp);
+
+ case PTRACE_SETEVRREGS:
+ /* Set the child spe register state. */
+ return copy_regset_from_user(child, &user_ppc_native_view,
+ REGSET_SPE, 0, 35 * sizeof(u32),
+ datavp);
+#endif
+
+ /* Old reverse args ptrace callss */
+ case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */
+ case PPC_PTRACE_SETREGS: /* Set GPRs 0 - 31. */
+ case PPC_PTRACE_GETFPREGS: /* Get FPRs 0 - 31. */
+ case PPC_PTRACE_SETFPREGS: /* Get FPRs 0 - 31. */
+ ret = arch_ptrace_old(child, request, addr, data);
+ break;
+
+ default:
+ ret = ptrace_request(child, request, addr, data);
+ break;
+ }
+ return ret;
+}
+
+/*
+ * We must return the syscall number to actually look up in the table.
+ * This can be -1L to skip running any syscall at all.
+ */
+long do_syscall_trace_enter(struct pt_regs *regs)
+{
+ long ret = 0;
+
+ secure_computing(regs->gpr[0]);
+
+ if (test_thread_flag(TIF_SYSCALL_TRACE) &&
+ tracehook_report_syscall_entry(regs))
+ /*
+ * Tracing decided this syscall should not happen.
+ * We'll return a bogus call number to get an ENOSYS
+ * error, but leave the original number in regs->gpr[0].
+ */
+ ret = -1L;
+
+ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+ trace_sys_enter(regs, regs->gpr[0]);
+
+#ifdef CONFIG_PPC64
+ if (!is_32bit_task())
+ audit_syscall_entry(AUDIT_ARCH_PPC64,
+ regs->gpr[0],
+ regs->gpr[3], regs->gpr[4],
+ regs->gpr[5], regs->gpr[6]);
+ else
+#endif
+ audit_syscall_entry(AUDIT_ARCH_PPC,
+ regs->gpr[0],
+ regs->gpr[3] & 0xffffffff,
+ regs->gpr[4] & 0xffffffff,
+ regs->gpr[5] & 0xffffffff,
+ regs->gpr[6] & 0xffffffff);
+
+ return ret ?: regs->gpr[0];
+}
+
+void do_syscall_trace_leave(struct pt_regs *regs)
+{
+ int step;
+
+ audit_syscall_exit(regs);
+
+ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+ trace_sys_exit(regs, regs->result);
+
+ step = test_thread_flag(TIF_SINGLESTEP);
+ if (step || test_thread_flag(TIF_SYSCALL_TRACE))
+ tracehook_report_syscall_exit(regs, step);
+}
diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c
new file mode 100644
index 00000000..469349d1
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace32.c
@@ -0,0 +1,337 @@
+/*
+ * ptrace for 32-bit processes running on a 64-bit kernel.
+ *
+ * PowerPC version
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Derived from "arch/m68k/kernel/ptrace.c"
+ * Copyright (C) 1994 by Hamish Macdonald
+ * Taken from linux/kernel/ptrace.c and modified for M680x0.
+ * linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds
+ *
+ * Modified by Cort Dougan (cort@hq.fsmlabs.com)
+ * and Paul Mackerras (paulus@samba.org).
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file COPYING in the main directory of
+ * this archive for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/regset.h>
+#include <linux/user.h>
+#include <linux/security.h>
+#include <linux/signal.h>
+#include <linux/compat.h>
+
+#include <asm/uaccess.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/switch_to.h>
+
+/*
+ * does not yet catch signals sent when the child dies.
+ * in exit.c or in signal.c.
+ */
+
+/*
+ * Here are the old "legacy" powerpc specific getregs/setregs ptrace calls,
+ * we mark them as obsolete now, they will be removed in a future version
+ */
+static long compat_ptrace_old(struct task_struct *child, long request,
+ long addr, long data)
+{
+ switch (request) {
+ case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */
+ return copy_regset_to_user(child,
+ task_user_regset_view(current), 0,
+ 0, 32 * sizeof(compat_long_t),
+ compat_ptr(data));
+
+ case PPC_PTRACE_SETREGS: /* Set GPRs 0 - 31. */
+ return copy_regset_from_user(child,
+ task_user_regset_view(current), 0,
+ 0, 32 * sizeof(compat_long_t),
+ compat_ptr(data));
+ }
+
+ return -EPERM;
+}
+
+/* Macros to workout the correct index for the FPR in the thread struct */
+#define FPRNUMBER(i) (((i) - PT_FPR0) >> 1)
+#define FPRHALF(i) (((i) - PT_FPR0) & 1)
+#define FPRINDEX(i) TS_FPRWIDTH * FPRNUMBER(i) * 2 + FPRHALF(i)
+#define FPRINDEX_3264(i) (TS_FPRWIDTH * ((i) - PT_FPR0))
+
+long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
+ compat_ulong_t caddr, compat_ulong_t cdata)
+{
+ unsigned long addr = caddr;
+ unsigned long data = cdata;
+ int ret;
+
+ switch (request) {
+ /*
+ * Read 4 bytes of the other process' storage
+ * data is a pointer specifying where the user wants the
+ * 4 bytes copied into
+ * addr is a pointer in the user's storage that contains an 8 byte
+ * address in the other process of the 4 bytes that is to be read
+ * (this is run in a 32-bit process looking at a 64-bit process)
+ * when I and D space are separate, these will need to be fixed.
+ */
+ case PPC_PTRACE_PEEKTEXT_3264:
+ case PPC_PTRACE_PEEKDATA_3264: {
+ u32 tmp;
+ int copied;
+ u32 __user * addrOthers;
+
+ ret = -EIO;
+
+ /* Get the addr in the other process that we want to read */
+ if (get_user(addrOthers, (u32 __user * __user *)addr) != 0)
+ break;
+
+ copied = access_process_vm(child, (u64)addrOthers, &tmp,
+ sizeof(tmp), 0);
+ if (copied != sizeof(tmp))
+ break;
+ ret = put_user(tmp, (u32 __user *)data);
+ break;
+ }
+
+ /* Read a register (specified by ADDR) out of the "user area" */
+ case PTRACE_PEEKUSR: {
+ int index;
+ unsigned long tmp;
+
+ ret = -EIO;
+ /* convert to index and check */
+ index = (unsigned long) addr >> 2;
+ if ((addr & 3) || (index > PT_FPSCR32))
+ break;
+
+ CHECK_FULL_REGS(child->thread.regs);
+ if (index < PT_FPR0) {
+ tmp = ptrace_get_reg(child, index);
+ } else {
+ flush_fp_to_thread(child);
+ /*
+ * the user space code considers the floating point
+ * to be an array of unsigned int (32 bits) - the
+ * index passed in is based on this assumption.
+ */
+ tmp = ((unsigned int *)child->thread.fpr)
+ [FPRINDEX(index)];
+ }
+ ret = put_user((unsigned int)tmp, (u32 __user *)data);
+ break;
+ }
+
+ /*
+ * Read 4 bytes out of the other process' pt_regs area
+ * data is a pointer specifying where the user wants the
+ * 4 bytes copied into
+ * addr is the offset into the other process' pt_regs structure
+ * that is to be read
+ * (this is run in a 32-bit process looking at a 64-bit process)
+ */
+ case PPC_PTRACE_PEEKUSR_3264: {
+ u32 index;
+ u32 reg32bits;
+ u64 tmp;
+ u32 numReg;
+ u32 part;
+
+ ret = -EIO;
+ /* Determine which register the user wants */
+ index = (u64)addr >> 2;
+ numReg = index / 2;
+ /* Determine which part of the register the user wants */
+ if (index % 2)
+ part = 1; /* want the 2nd half of the register (right-most). */
+ else
+ part = 0; /* want the 1st half of the register (left-most). */
+
+ /* Validate the input - check to see if address is on the wrong boundary
+ * or beyond the end of the user area
+ */
+ if ((addr & 3) || numReg > PT_FPSCR)
+ break;
+
+ CHECK_FULL_REGS(child->thread.regs);
+ if (numReg >= PT_FPR0) {
+ flush_fp_to_thread(child);
+ /* get 64 bit FPR */
+ tmp = ((u64 *)child->thread.fpr)
+ [FPRINDEX_3264(numReg)];
+ } else { /* register within PT_REGS struct */
+ tmp = ptrace_get_reg(child, numReg);
+ }
+ reg32bits = ((u32*)&tmp)[part];
+ ret = put_user(reg32bits, (u32 __user *)data);
+ break;
+ }
+
+ /*
+ * Write 4 bytes into the other process' storage
+ * data is the 4 bytes that the user wants written
+ * addr is a pointer in the user's storage that contains an
+ * 8 byte address in the other process where the 4 bytes
+ * that is to be written
+ * (this is run in a 32-bit process looking at a 64-bit process)
+ * when I and D space are separate, these will need to be fixed.
+ */
+ case PPC_PTRACE_POKETEXT_3264:
+ case PPC_PTRACE_POKEDATA_3264: {
+ u32 tmp = data;
+ u32 __user * addrOthers;
+
+ /* Get the addr in the other process that we want to write into */
+ ret = -EIO;
+ if (get_user(addrOthers, (u32 __user * __user *)addr) != 0)
+ break;
+ ret = 0;
+ if (access_process_vm(child, (u64)addrOthers, &tmp,
+ sizeof(tmp), 1) == sizeof(tmp))
+ break;
+ ret = -EIO;
+ break;
+ }
+
+ /* write the word at location addr in the USER area */
+ case PTRACE_POKEUSR: {
+ unsigned long index;
+
+ ret = -EIO;
+ /* convert to index and check */
+ index = (unsigned long) addr >> 2;
+ if ((addr & 3) || (index > PT_FPSCR32))
+ break;
+
+ CHECK_FULL_REGS(child->thread.regs);
+ if (index < PT_FPR0) {
+ ret = ptrace_put_reg(child, index, data);
+ } else {
+ flush_fp_to_thread(child);
+ /*
+ * the user space code considers the floating point
+ * to be an array of unsigned int (32 bits) - the
+ * index passed in is based on this assumption.
+ */
+ ((unsigned int *)child->thread.fpr)
+ [FPRINDEX(index)] = data;
+ ret = 0;
+ }
+ break;
+ }
+
+ /*
+ * Write 4 bytes into the other process' pt_regs area
+ * data is the 4 bytes that the user wants written
+ * addr is the offset into the other process' pt_regs structure
+ * that is to be written into
+ * (this is run in a 32-bit process looking at a 64-bit process)
+ */
+ case PPC_PTRACE_POKEUSR_3264: {
+ u32 index;
+ u32 numReg;
+
+ ret = -EIO;
+ /* Determine which register the user wants */
+ index = (u64)addr >> 2;
+ numReg = index / 2;
+
+ /*
+ * Validate the input - check to see if address is on the
+ * wrong boundary or beyond the end of the user area
+ */
+ if ((addr & 3) || (numReg > PT_FPSCR))
+ break;
+ CHECK_FULL_REGS(child->thread.regs);
+ if (numReg < PT_FPR0) {
+ unsigned long freg = ptrace_get_reg(child, numReg);
+ if (index % 2)
+ freg = (freg & ~0xfffffffful) | (data & 0xfffffffful);
+ else
+ freg = (freg & 0xfffffffful) | (data << 32);
+ ret = ptrace_put_reg(child, numReg, freg);
+ } else {
+ u64 *tmp;
+ flush_fp_to_thread(child);
+ /* get 64 bit FPR ... */
+ tmp = &(((u64 *)child->thread.fpr)
+ [FPRINDEX_3264(numReg)]);
+ /* ... write the 32 bit part we want */
+ ((u32 *)tmp)[index % 2] = data;
+ ret = 0;
+ }
+ break;
+ }
+
+ case PTRACE_GET_DEBUGREG: {
+ ret = -EINVAL;
+ /* We only support one DABR and no IABRS at the moment */
+ if (addr > 0)
+ break;
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+ ret = put_user(child->thread.dac1, (u32 __user *)data);
+#else
+ ret = put_user(child->thread.dabr, (u32 __user *)data);
+#endif
+ break;
+ }
+
+ case PTRACE_GETREGS: /* Get all pt_regs from the child. */
+ return copy_regset_to_user(
+ child, task_user_regset_view(current), 0,
+ 0, PT_REGS_COUNT * sizeof(compat_long_t),
+ compat_ptr(data));
+
+ case PTRACE_SETREGS: /* Set all gp regs in the child. */
+ return copy_regset_from_user(
+ child, task_user_regset_view(current), 0,
+ 0, PT_REGS_COUNT * sizeof(compat_long_t),
+ compat_ptr(data));
+
+ case PTRACE_GETFPREGS:
+ case PTRACE_SETFPREGS:
+ case PTRACE_GETVRREGS:
+ case PTRACE_SETVRREGS:
+ case PTRACE_GETVSRREGS:
+ case PTRACE_SETVSRREGS:
+ case PTRACE_GETREGS64:
+ case PTRACE_SETREGS64:
+ case PPC_PTRACE_GETFPREGS:
+ case PPC_PTRACE_SETFPREGS:
+ case PTRACE_KILL:
+ case PTRACE_SINGLESTEP:
+ case PTRACE_DETACH:
+ case PTRACE_SET_DEBUGREG:
+ case PTRACE_SYSCALL:
+ case PTRACE_CONT:
+ case PPC_PTRACE_GETHWDBGINFO:
+ case PPC_PTRACE_SETHWDEBUG:
+ case PPC_PTRACE_DELHWDEBUG:
+ ret = arch_ptrace(child, request, addr, data);
+ break;
+
+ /* Old reverse args ptrace callss */
+ case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */
+ case PPC_PTRACE_SETREGS: /* Set GPRs 0 - 31. */
+ ret = compat_ptrace_old(child, request, addr, data);
+ break;
+
+ default:
+ ret = compat_ptrace_request(child, request, addr, data);
+ break;
+ }
+
+ return ret;
+}
diff --git a/arch/powerpc/kernel/reloc_32.S b/arch/powerpc/kernel/reloc_32.S
new file mode 100644
index 00000000..ef46ba6e
--- /dev/null
+++ b/arch/powerpc/kernel/reloc_32.S
@@ -0,0 +1,208 @@
+/*
+ * Code to process dynamic relocations for PPC32.
+ *
+ * Copyrights (C) IBM Corporation, 2011.
+ * Author: Suzuki Poulose <suzuki@in.ibm.com>
+ *
+ * - Based on ppc64 code - reloc_64.S
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/ppc_asm.h>
+
+/* Dynamic section table entry tags */
+DT_RELA = 7 /* Tag for Elf32_Rela section */
+DT_RELASZ = 8 /* Size of the Rela relocs */
+DT_RELAENT = 9 /* Size of one Rela reloc entry */
+
+STN_UNDEF = 0 /* Undefined symbol index */
+STB_LOCAL = 0 /* Local binding for the symbol */
+
+R_PPC_ADDR16_LO = 4 /* Lower half of (S+A) */
+R_PPC_ADDR16_HI = 5 /* Upper half of (S+A) */
+R_PPC_ADDR16_HA = 6 /* High Adjusted (S+A) */
+R_PPC_RELATIVE = 22
+
+/*
+ * r3 = desired final address
+ */
+
+_GLOBAL(relocate)
+
+ mflr r0 /* Save our LR */
+ bl 0f /* Find our current runtime address */
+0: mflr r12 /* Make it accessible */
+ mtlr r0
+
+ lwz r11, (p_dyn - 0b)(r12)
+ add r11, r11, r12 /* runtime address of .dynamic section */
+ lwz r9, (p_rela - 0b)(r12)
+ add r9, r9, r12 /* runtime address of .rela.dyn section */
+ lwz r10, (p_st - 0b)(r12)
+ add r10, r10, r12 /* runtime address of _stext section */
+ lwz r13, (p_sym - 0b)(r12)
+ add r13, r13, r12 /* runtime address of .dynsym section */
+
+ /*
+ * Scan the dynamic section for RELA, RELASZ entries
+ */
+ li r6, 0
+ li r7, 0
+ li r8, 0
+1: lwz r5, 0(r11) /* ELF_Dyn.d_tag */
+ cmpwi r5, 0 /* End of ELF_Dyn[] */
+ beq eodyn
+ cmpwi r5, DT_RELA
+ bne relasz
+ lwz r7, 4(r11) /* r7 = rela.link */
+ b skip
+relasz:
+ cmpwi r5, DT_RELASZ
+ bne relaent
+ lwz r8, 4(r11) /* r8 = Total Rela relocs size */
+ b skip
+relaent:
+ cmpwi r5, DT_RELAENT
+ bne skip
+ lwz r6, 4(r11) /* r6 = Size of one Rela reloc */
+skip:
+ addi r11, r11, 8
+ b 1b
+eodyn: /* End of Dyn Table scan */
+
+ /* Check if we have found all the entries */
+ cmpwi r7, 0
+ beq done
+ cmpwi r8, 0
+ beq done
+ cmpwi r6, 0
+ beq done
+
+
+ /*
+ * Work out the current offset from the link time address of .rela
+ * section.
+ * cur_offset[r7] = rela.run[r9] - rela.link [r7]
+ * _stext.link[r12] = _stext.run[r10] - cur_offset[r7]
+ * final_offset[r3] = _stext.final[r3] - _stext.link[r12]
+ */
+ subf r7, r7, r9 /* cur_offset */
+ subf r12, r7, r10
+ subf r3, r12, r3 /* final_offset */
+
+ subf r8, r6, r8 /* relaz -= relaent */
+ /*
+ * Scan through the .rela table and process each entry
+ * r9 - points to the current .rela table entry
+ * r13 - points to the symbol table
+ */
+
+ /*
+ * Check if we have a relocation based on symbol
+ * r5 will hold the value of the symbol.
+ */
+applyrela:
+ lwz r4, 4(r9) /* r4 = rela.r_info */
+ srwi r5, r4, 8 /* ELF32_R_SYM(r_info) */
+ cmpwi r5, STN_UNDEF /* sym == STN_UNDEF ? */
+ beq get_type /* value = 0 */
+ /* Find the value of the symbol at index(r5) */
+ slwi r5, r5, 4 /* r5 = r5 * sizeof(Elf32_Sym) */
+ add r12, r13, r5 /* r12 = &__dyn_sym[Index] */
+
+ /*
+ * GNU ld has a bug, where dynamic relocs based on
+ * STB_LOCAL symbols, the value should be assumed
+ * to be zero. - Alan Modra
+ */
+ /* XXX: Do we need to check if we are using GNU ld ? */
+ lbz r5, 12(r12) /* r5 = dyn_sym[Index].st_info */
+ extrwi r5, r5, 4, 24 /* r5 = ELF32_ST_BIND(r5) */
+ cmpwi r5, STB_LOCAL /* st_value = 0, ld bug */
+ beq get_type /* We have r5 = 0 */
+ lwz r5, 4(r12) /* r5 = __dyn_sym[Index].st_value */
+
+get_type:
+ /* Load the relocation type to r4 */
+ extrwi r4, r4, 8, 24 /* r4 = ELF32_R_TYPE(r_info) = ((char*)r4)[3] */
+
+ /* R_PPC_RELATIVE */
+ cmpwi r4, R_PPC_RELATIVE
+ bne hi16
+ lwz r4, 0(r9) /* r_offset */
+ lwz r0, 8(r9) /* r_addend */
+ add r0, r0, r3 /* final addend */
+ stwx r0, r4, r7 /* memory[r4+r7]) = (u32)r0 */
+ b nxtrela /* continue */
+
+ /* R_PPC_ADDR16_HI */
+hi16:
+ cmpwi r4, R_PPC_ADDR16_HI
+ bne ha16
+ lwz r4, 0(r9) /* r_offset */
+ lwz r0, 8(r9) /* r_addend */
+ add r0, r0, r3
+ add r0, r0, r5 /* r0 = (S+A+Offset) */
+ extrwi r0, r0, 16, 0 /* r0 = (r0 >> 16) */
+ b store_half
+
+ /* R_PPC_ADDR16_HA */
+ha16:
+ cmpwi r4, R_PPC_ADDR16_HA
+ bne lo16
+ lwz r4, 0(r9) /* r_offset */
+ lwz r0, 8(r9) /* r_addend */
+ add r0, r0, r3
+ add r0, r0, r5 /* r0 = (S+A+Offset) */
+ extrwi r5, r0, 1, 16 /* Extract bit 16 */
+ extrwi r0, r0, 16, 0 /* r0 = (r0 >> 16) */
+ add r0, r0, r5 /* Add it to r0 */
+ b store_half
+
+ /* R_PPC_ADDR16_LO */
+lo16:
+ cmpwi r4, R_PPC_ADDR16_LO
+ bne nxtrela
+ lwz r4, 0(r9) /* r_offset */
+ lwz r0, 8(r9) /* r_addend */
+ add r0, r0, r3
+ add r0, r0, r5 /* r0 = (S+A+Offset) */
+ extrwi r0, r0, 16, 16 /* r0 &= 0xffff */
+ /* Fall through to */
+
+ /* Store half word */
+store_half:
+ sthx r0, r4, r7 /* memory[r4+r7] = (u16)r0 */
+
+nxtrela:
+ /*
+ * We have to flush the modified instructions to the
+ * main storage from the d-cache. And also, invalidate the
+ * cached instructions in i-cache which has been modified.
+ *
+ * We delay the sync / isync operation till the end, since
+ * we won't be executing the modified instructions until
+ * we return from here.
+ */
+ dcbst r4,r7
+ sync /* Ensure the data is flushed before icbi */
+ icbi r4,r7
+ cmpwi r8, 0 /* relasz = 0 ? */
+ ble done
+ add r9, r9, r6 /* move to next entry in the .rela table */
+ subf r8, r6, r8 /* relasz -= relaent */
+ b applyrela
+
+done:
+ sync /* Wait for the flush to finish */
+ isync /* Discard prefetched instructions */
+ blr
+
+p_dyn: .long __dynamic_start - 0b
+p_rela: .long __rela_dyn_start - 0b
+p_sym: .long __dynamic_symtab - 0b
+p_st: .long _stext - 0b
diff --git a/arch/powerpc/kernel/reloc_64.S b/arch/powerpc/kernel/reloc_64.S
new file mode 100644
index 00000000..b47a0e1a
--- /dev/null
+++ b/arch/powerpc/kernel/reloc_64.S
@@ -0,0 +1,87 @@
+/*
+ * Code to process dynamic relocations in the kernel.
+ *
+ * Copyright 2008 Paul Mackerras, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/ppc_asm.h>
+
+RELA = 7
+RELACOUNT = 0x6ffffff9
+R_PPC64_RELATIVE = 22
+
+/*
+ * r3 = desired final address of kernel
+ */
+_GLOBAL(relocate)
+ mflr r0
+ bcl 20,31,$+4
+0: mflr r12 /* r12 has runtime addr of label 0 */
+ mtlr r0
+ ld r11,(p_dyn - 0b)(r12)
+ add r11,r11,r12 /* r11 has runtime addr of .dynamic section */
+ ld r9,(p_rela - 0b)(r12)
+ add r9,r9,r12 /* r9 has runtime addr of .rela.dyn section */
+ ld r10,(p_st - 0b)(r12)
+ add r10,r10,r12 /* r10 has runtime addr of _stext */
+
+ /*
+ * Scan the dynamic section for the RELA and RELACOUNT entries.
+ */
+ li r7,0
+ li r8,0
+1: ld r6,0(r11) /* get tag */
+ cmpdi r6,0
+ beq 4f /* end of list */
+ cmpdi r6,RELA
+ bne 2f
+ ld r7,8(r11) /* get RELA pointer in r7 */
+ b 3f
+2: addis r6,r6,(-RELACOUNT)@ha
+ cmpdi r6,RELACOUNT@l
+ bne 3f
+ ld r8,8(r11) /* get RELACOUNT value in r8 */
+3: addi r11,r11,16
+ b 1b
+4: cmpdi r7,0 /* check we have both RELA and RELACOUNT */
+ cmpdi cr1,r8,0
+ beq 6f
+ beq cr1,6f
+
+ /*
+ * Work out linktime address of _stext and hence the
+ * relocation offset to be applied.
+ * cur_offset [r7] = rela.run [r9] - rela.link [r7]
+ * _stext.link [r10] = _stext.run [r10] - cur_offset [r7]
+ * final_offset [r3] = _stext.final [r3] - _stext.link [r10]
+ */
+ subf r7,r7,r9 /* cur_offset */
+ subf r10,r7,r10
+ subf r3,r10,r3 /* final_offset */
+
+ /*
+ * Run through the list of relocations and process the
+ * R_PPC64_RELATIVE ones.
+ */
+ mtctr r8
+5: lwz r0,12(9) /* ELF64_R_TYPE(reloc->r_info) */
+ cmpwi r0,R_PPC64_RELATIVE
+ bne 6f
+ ld r6,0(r9) /* reloc->r_offset */
+ ld r0,16(r9) /* reloc->r_addend */
+ add r0,r0,r3
+ stdx r0,r7,r6
+ addi r9,r9,24
+ bdnz 5b
+
+6: blr
+
+p_dyn: .llong __dynamic_start - 0b
+p_rela: .llong __rela_dyn_start - 0b
+p_st: .llong _stext - 0b
+
diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c
new file mode 100644
index 00000000..8777fb02
--- /dev/null
+++ b/arch/powerpc/kernel/rtas-proc.c
@@ -0,0 +1,790 @@
+/*
+ * Copyright (C) 2000 Tilmann Bitterberg
+ * (tilmann@bitterberg.de)
+ *
+ * RTAS (Runtime Abstraction Services) stuff
+ * Intention is to provide a clean user interface
+ * to use the RTAS.
+ *
+ * TODO:
+ * Split off a header file and maybe move it to a different
+ * location. Write Documentation on what the /proc/rtas/ entries
+ * actually do.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <linux/ctype.h>
+#include <linux/time.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <linux/bitops.h>
+#include <linux/rtc.h>
+
+#include <asm/uaccess.h>
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/machdep.h> /* for ppc_md */
+#include <asm/time.h>
+
+/* Token for Sensors */
+#define KEY_SWITCH 0x0001
+#define ENCLOSURE_SWITCH 0x0002
+#define THERMAL_SENSOR 0x0003
+#define LID_STATUS 0x0004
+#define POWER_SOURCE 0x0005
+#define BATTERY_VOLTAGE 0x0006
+#define BATTERY_REMAINING 0x0007
+#define BATTERY_PERCENTAGE 0x0008
+#define EPOW_SENSOR 0x0009
+#define BATTERY_CYCLESTATE 0x000a
+#define BATTERY_CHARGING 0x000b
+
+/* IBM specific sensors */
+#define IBM_SURVEILLANCE 0x2328 /* 9000 */
+#define IBM_FANRPM 0x2329 /* 9001 */
+#define IBM_VOLTAGE 0x232a /* 9002 */
+#define IBM_DRCONNECTOR 0x232b /* 9003 */
+#define IBM_POWERSUPPLY 0x232c /* 9004 */
+
+/* Status return values */
+#define SENSOR_CRITICAL_HIGH 13
+#define SENSOR_WARNING_HIGH 12
+#define SENSOR_NORMAL 11
+#define SENSOR_WARNING_LOW 10
+#define SENSOR_CRITICAL_LOW 9
+#define SENSOR_SUCCESS 0
+#define SENSOR_HW_ERROR -1
+#define SENSOR_BUSY -2
+#define SENSOR_NOT_EXIST -3
+#define SENSOR_DR_ENTITY -9000
+
+/* Location Codes */
+#define LOC_SCSI_DEV_ADDR 'A'
+#define LOC_SCSI_DEV_LOC 'B'
+#define LOC_CPU 'C'
+#define LOC_DISKETTE 'D'
+#define LOC_ETHERNET 'E'
+#define LOC_FAN 'F'
+#define LOC_GRAPHICS 'G'
+/* reserved / not used 'H' */
+#define LOC_IO_ADAPTER 'I'
+/* reserved / not used 'J' */
+#define LOC_KEYBOARD 'K'
+#define LOC_LCD 'L'
+#define LOC_MEMORY 'M'
+#define LOC_NV_MEMORY 'N'
+#define LOC_MOUSE 'O'
+#define LOC_PLANAR 'P'
+#define LOC_OTHER_IO 'Q'
+#define LOC_PARALLEL 'R'
+#define LOC_SERIAL 'S'
+#define LOC_DEAD_RING 'T'
+#define LOC_RACKMOUNTED 'U' /* for _u_nit is rack mounted */
+#define LOC_VOLTAGE 'V'
+#define LOC_SWITCH_ADAPTER 'W'
+#define LOC_OTHER 'X'
+#define LOC_FIRMWARE 'Y'
+#define LOC_SCSI 'Z'
+
+/* Tokens for indicators */
+#define TONE_FREQUENCY 0x0001 /* 0 - 1000 (HZ)*/
+#define TONE_VOLUME 0x0002 /* 0 - 100 (%) */
+#define SYSTEM_POWER_STATE 0x0003
+#define WARNING_LIGHT 0x0004
+#define DISK_ACTIVITY_LIGHT 0x0005
+#define HEX_DISPLAY_UNIT 0x0006
+#define BATTERY_WARNING_TIME 0x0007
+#define CONDITION_CYCLE_REQUEST 0x0008
+#define SURVEILLANCE_INDICATOR 0x2328 /* 9000 */
+#define DR_ACTION 0x2329 /* 9001 */
+#define DR_INDICATOR 0x232a /* 9002 */
+/* 9003 - 9004: Vendor specific */
+/* 9006 - 9999: Vendor specific */
+
+/* other */
+#define MAX_SENSORS 17 /* I only know of 17 sensors */
+#define MAX_LINELENGTH 256
+#define SENSOR_PREFIX "ibm,sensor-"
+#define cel_to_fahr(x) ((x*9/5)+32)
+
+
+/* Globals */
+static struct rtas_sensors sensors;
+static struct device_node *rtas_node = NULL;
+static unsigned long power_on_time = 0; /* Save the time the user set */
+static char progress_led[MAX_LINELENGTH];
+
+static unsigned long rtas_tone_frequency = 1000;
+static unsigned long rtas_tone_volume = 0;
+
+/* ****************STRUCTS******************************************* */
+struct individual_sensor {
+ unsigned int token;
+ unsigned int quant;
+};
+
+struct rtas_sensors {
+ struct individual_sensor sensor[MAX_SENSORS];
+ unsigned int quant;
+};
+
+/* ****************************************************************** */
+/* Declarations */
+static int ppc_rtas_sensors_show(struct seq_file *m, void *v);
+static int ppc_rtas_clock_show(struct seq_file *m, void *v);
+static ssize_t ppc_rtas_clock_write(struct file *file,
+ const char __user *buf, size_t count, loff_t *ppos);
+static int ppc_rtas_progress_show(struct seq_file *m, void *v);
+static ssize_t ppc_rtas_progress_write(struct file *file,
+ const char __user *buf, size_t count, loff_t *ppos);
+static int ppc_rtas_poweron_show(struct seq_file *m, void *v);
+static ssize_t ppc_rtas_poweron_write(struct file *file,
+ const char __user *buf, size_t count, loff_t *ppos);
+
+static ssize_t ppc_rtas_tone_freq_write(struct file *file,
+ const char __user *buf, size_t count, loff_t *ppos);
+static int ppc_rtas_tone_freq_show(struct seq_file *m, void *v);
+static ssize_t ppc_rtas_tone_volume_write(struct file *file,
+ const char __user *buf, size_t count, loff_t *ppos);
+static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v);
+static int ppc_rtas_rmo_buf_show(struct seq_file *m, void *v);
+
+static int sensors_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, ppc_rtas_sensors_show, NULL);
+}
+
+static const struct file_operations ppc_rtas_sensors_operations = {
+ .open = sensors_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int poweron_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, ppc_rtas_poweron_show, NULL);
+}
+
+static const struct file_operations ppc_rtas_poweron_operations = {
+ .open = poweron_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .write = ppc_rtas_poweron_write,
+ .release = single_release,
+};
+
+static int progress_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, ppc_rtas_progress_show, NULL);
+}
+
+static const struct file_operations ppc_rtas_progress_operations = {
+ .open = progress_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .write = ppc_rtas_progress_write,
+ .release = single_release,
+};
+
+static int clock_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, ppc_rtas_clock_show, NULL);
+}
+
+static const struct file_operations ppc_rtas_clock_operations = {
+ .open = clock_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .write = ppc_rtas_clock_write,
+ .release = single_release,
+};
+
+static int tone_freq_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, ppc_rtas_tone_freq_show, NULL);
+}
+
+static const struct file_operations ppc_rtas_tone_freq_operations = {
+ .open = tone_freq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .write = ppc_rtas_tone_freq_write,
+ .release = single_release,
+};
+
+static int tone_volume_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, ppc_rtas_tone_volume_show, NULL);
+}
+
+static const struct file_operations ppc_rtas_tone_volume_operations = {
+ .open = tone_volume_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .write = ppc_rtas_tone_volume_write,
+ .release = single_release,
+};
+
+static int rmo_buf_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, ppc_rtas_rmo_buf_show, NULL);
+}
+
+static const struct file_operations ppc_rtas_rmo_buf_ops = {
+ .open = rmo_buf_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int ppc_rtas_find_all_sensors(void);
+static void ppc_rtas_process_sensor(struct seq_file *m,
+ struct individual_sensor *s, int state, int error, const char *loc);
+static char *ppc_rtas_process_error(int error);
+static void get_location_code(struct seq_file *m,
+ struct individual_sensor *s, const char *loc);
+static void check_location_string(struct seq_file *m, const char *c);
+static void check_location(struct seq_file *m, const char *c);
+
+static int __init proc_rtas_init(void)
+{
+ if (!machine_is(pseries))
+ return -ENODEV;
+
+ rtas_node = of_find_node_by_name(NULL, "rtas");
+ if (rtas_node == NULL)
+ return -ENODEV;
+
+ proc_create("powerpc/rtas/progress", S_IRUGO|S_IWUSR, NULL,
+ &ppc_rtas_progress_operations);
+ proc_create("powerpc/rtas/clock", S_IRUGO|S_IWUSR, NULL,
+ &ppc_rtas_clock_operations);
+ proc_create("powerpc/rtas/poweron", S_IWUSR|S_IRUGO, NULL,
+ &ppc_rtas_poweron_operations);
+ proc_create("powerpc/rtas/sensors", S_IRUGO, NULL,
+ &ppc_rtas_sensors_operations);
+ proc_create("powerpc/rtas/frequency", S_IWUSR|S_IRUGO, NULL,
+ &ppc_rtas_tone_freq_operations);
+ proc_create("powerpc/rtas/volume", S_IWUSR|S_IRUGO, NULL,
+ &ppc_rtas_tone_volume_operations);
+ proc_create("powerpc/rtas/rmo_buffer", S_IRUSR, NULL,
+ &ppc_rtas_rmo_buf_ops);
+ return 0;
+}
+
+__initcall(proc_rtas_init);
+
+static int parse_number(const char __user *p, size_t count, unsigned long *val)
+{
+ char buf[40];
+ char *end;
+
+ if (count > 39)
+ return -EINVAL;
+
+ if (copy_from_user(buf, p, count))
+ return -EFAULT;
+
+ buf[count] = 0;
+
+ *val = simple_strtoul(buf, &end, 10);
+ if (*end && *end != '\n')
+ return -EINVAL;
+
+ return 0;
+}
+
+/* ****************************************************************** */
+/* POWER-ON-TIME */
+/* ****************************************************************** */
+static ssize_t ppc_rtas_poweron_write(struct file *file,
+ const char __user *buf, size_t count, loff_t *ppos)
+{
+ struct rtc_time tm;
+ unsigned long nowtime;
+ int error = parse_number(buf, count, &nowtime);
+ if (error)
+ return error;
+
+ power_on_time = nowtime; /* save the time */
+
+ to_tm(nowtime, &tm);
+
+ error = rtas_call(rtas_token("set-time-for-power-on"), 7, 1, NULL,
+ tm.tm_year, tm.tm_mon, tm.tm_mday,
+ tm.tm_hour, tm.tm_min, tm.tm_sec, 0 /* nano */);
+ if (error)
+ printk(KERN_WARNING "error: setting poweron time returned: %s\n",
+ ppc_rtas_process_error(error));
+ return count;
+}
+/* ****************************************************************** */
+static int ppc_rtas_poweron_show(struct seq_file *m, void *v)
+{
+ if (power_on_time == 0)
+ seq_printf(m, "Power on time not set\n");
+ else
+ seq_printf(m, "%lu\n",power_on_time);
+ return 0;
+}
+
+/* ****************************************************************** */
+/* PROGRESS */
+/* ****************************************************************** */
+static ssize_t ppc_rtas_progress_write(struct file *file,
+ const char __user *buf, size_t count, loff_t *ppos)
+{
+ unsigned long hex;
+
+ if (count >= MAX_LINELENGTH)
+ count = MAX_LINELENGTH -1;
+ if (copy_from_user(progress_led, buf, count)) { /* save the string */
+ return -EFAULT;
+ }
+ progress_led[count] = 0;
+
+ /* Lets see if the user passed hexdigits */
+ hex = simple_strtoul(progress_led, NULL, 10);
+
+ rtas_progress ((char *)progress_led, hex);
+ return count;
+
+ /* clear the line */
+ /* rtas_progress(" ", 0xffff);*/
+}
+/* ****************************************************************** */
+static int ppc_rtas_progress_show(struct seq_file *m, void *v)
+{
+ if (progress_led[0])
+ seq_printf(m, "%s\n", progress_led);
+ return 0;
+}
+
+/* ****************************************************************** */
+/* CLOCK */
+/* ****************************************************************** */
+static ssize_t ppc_rtas_clock_write(struct file *file,
+ const char __user *buf, size_t count, loff_t *ppos)
+{
+ struct rtc_time tm;
+ unsigned long nowtime;
+ int error = parse_number(buf, count, &nowtime);
+ if (error)
+ return error;
+
+ to_tm(nowtime, &tm);
+ error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL,
+ tm.tm_year, tm.tm_mon, tm.tm_mday,
+ tm.tm_hour, tm.tm_min, tm.tm_sec, 0);
+ if (error)
+ printk(KERN_WARNING "error: setting the clock returned: %s\n",
+ ppc_rtas_process_error(error));
+ return count;
+}
+/* ****************************************************************** */
+static int ppc_rtas_clock_show(struct seq_file *m, void *v)
+{
+ int ret[8];
+ int error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret);
+
+ if (error) {
+ printk(KERN_WARNING "error: reading the clock returned: %s\n",
+ ppc_rtas_process_error(error));
+ seq_printf(m, "0");
+ } else {
+ unsigned int year, mon, day, hour, min, sec;
+ year = ret[0]; mon = ret[1]; day = ret[2];
+ hour = ret[3]; min = ret[4]; sec = ret[5];
+ seq_printf(m, "%lu\n",
+ mktime(year, mon, day, hour, min, sec));
+ }
+ return 0;
+}
+
+/* ****************************************************************** */
+/* SENSOR STUFF */
+/* ****************************************************************** */
+static int ppc_rtas_sensors_show(struct seq_file *m, void *v)
+{
+ int i,j;
+ int state, error;
+ int get_sensor_state = rtas_token("get-sensor-state");
+
+ seq_printf(m, "RTAS (RunTime Abstraction Services) Sensor Information\n");
+ seq_printf(m, "Sensor\t\tValue\t\tCondition\tLocation\n");
+ seq_printf(m, "********************************************************\n");
+
+ if (ppc_rtas_find_all_sensors() != 0) {
+ seq_printf(m, "\nNo sensors are available\n");
+ return 0;
+ }
+
+ for (i=0; i<sensors.quant; i++) {
+ struct individual_sensor *p = &sensors.sensor[i];
+ char rstr[64];
+ const char *loc;
+ int llen, offs;
+
+ sprintf (rstr, SENSOR_PREFIX"%04d", p->token);
+ loc = of_get_property(rtas_node, rstr, &llen);
+
+ /* A sensor may have multiple instances */
+ for (j = 0, offs = 0; j <= p->quant; j++) {
+ error = rtas_call(get_sensor_state, 2, 2, &state,
+ p->token, j);
+
+ ppc_rtas_process_sensor(m, p, state, error, loc);
+ seq_putc(m, '\n');
+ if (loc) {
+ offs += strlen(loc) + 1;
+ loc += strlen(loc) + 1;
+ if (offs >= llen)
+ loc = NULL;
+ }
+ }
+ }
+ return 0;
+}
+
+/* ****************************************************************** */
+
+static int ppc_rtas_find_all_sensors(void)
+{
+ const unsigned int *utmp;
+ int len, i;
+
+ utmp = of_get_property(rtas_node, "rtas-sensors", &len);
+ if (utmp == NULL) {
+ printk (KERN_ERR "error: could not get rtas-sensors\n");
+ return 1;
+ }
+
+ sensors.quant = len / 8; /* int + int */
+
+ for (i=0; i<sensors.quant; i++) {
+ sensors.sensor[i].token = *utmp++;
+ sensors.sensor[i].quant = *utmp++;
+ }
+ return 0;
+}
+
+/* ****************************************************************** */
+/*
+ * Builds a string of what rtas returned
+ */
+static char *ppc_rtas_process_error(int error)
+{
+ switch (error) {
+ case SENSOR_CRITICAL_HIGH:
+ return "(critical high)";
+ case SENSOR_WARNING_HIGH:
+ return "(warning high)";
+ case SENSOR_NORMAL:
+ return "(normal)";
+ case SENSOR_WARNING_LOW:
+ return "(warning low)";
+ case SENSOR_CRITICAL_LOW:
+ return "(critical low)";
+ case SENSOR_SUCCESS:
+ return "(read ok)";
+ case SENSOR_HW_ERROR:
+ return "(hardware error)";
+ case SENSOR_BUSY:
+ return "(busy)";
+ case SENSOR_NOT_EXIST:
+ return "(non existent)";
+ case SENSOR_DR_ENTITY:
+ return "(dr entity removed)";
+ default:
+ return "(UNKNOWN)";
+ }
+}
+
+/* ****************************************************************** */
+/*
+ * Builds a string out of what the sensor said
+ */
+
+static void ppc_rtas_process_sensor(struct seq_file *m,
+ struct individual_sensor *s, int state, int error, const char *loc)
+{
+ /* Defined return vales */
+ const char * key_switch[] = { "Off\t", "Normal\t", "Secure\t",
+ "Maintenance" };
+ const char * enclosure_switch[] = { "Closed", "Open" };
+ const char * lid_status[] = { " ", "Open", "Closed" };
+ const char * power_source[] = { "AC\t", "Battery",
+ "AC & Battery" };
+ const char * battery_remaining[] = { "Very Low", "Low", "Mid", "High" };
+ const char * epow_sensor[] = {
+ "EPOW Reset", "Cooling warning", "Power warning",
+ "System shutdown", "System halt", "EPOW main enclosure",
+ "EPOW power off" };
+ const char * battery_cyclestate[] = { "None", "In progress",
+ "Requested" };
+ const char * battery_charging[] = { "Charging", "Discharching",
+ "No current flow" };
+ const char * ibm_drconnector[] = { "Empty", "Present", "Unusable",
+ "Exchange" };
+
+ int have_strings = 0;
+ int num_states = 0;
+ int temperature = 0;
+ int unknown = 0;
+
+ /* What kind of sensor do we have here? */
+
+ switch (s->token) {
+ case KEY_SWITCH:
+ seq_printf(m, "Key switch:\t");
+ num_states = sizeof(key_switch) / sizeof(char *);
+ if (state < num_states) {
+ seq_printf(m, "%s\t", key_switch[state]);
+ have_strings = 1;
+ }
+ break;
+ case ENCLOSURE_SWITCH:
+ seq_printf(m, "Enclosure switch:\t");
+ num_states = sizeof(enclosure_switch) / sizeof(char *);
+ if (state < num_states) {
+ seq_printf(m, "%s\t",
+ enclosure_switch[state]);
+ have_strings = 1;
+ }
+ break;
+ case THERMAL_SENSOR:
+ seq_printf(m, "Temp. (C/F):\t");
+ temperature = 1;
+ break;
+ case LID_STATUS:
+ seq_printf(m, "Lid status:\t");
+ num_states = sizeof(lid_status) / sizeof(char *);
+ if (state < num_states) {
+ seq_printf(m, "%s\t", lid_status[state]);
+ have_strings = 1;
+ }
+ break;
+ case POWER_SOURCE:
+ seq_printf(m, "Power source:\t");
+ num_states = sizeof(power_source) / sizeof(char *);
+ if (state < num_states) {
+ seq_printf(m, "%s\t",
+ power_source[state]);
+ have_strings = 1;
+ }
+ break;
+ case BATTERY_VOLTAGE:
+ seq_printf(m, "Battery voltage:\t");
+ break;
+ case BATTERY_REMAINING:
+ seq_printf(m, "Battery remaining:\t");
+ num_states = sizeof(battery_remaining) / sizeof(char *);
+ if (state < num_states)
+ {
+ seq_printf(m, "%s\t",
+ battery_remaining[state]);
+ have_strings = 1;
+ }
+ break;
+ case BATTERY_PERCENTAGE:
+ seq_printf(m, "Battery percentage:\t");
+ break;
+ case EPOW_SENSOR:
+ seq_printf(m, "EPOW Sensor:\t");
+ num_states = sizeof(epow_sensor) / sizeof(char *);
+ if (state < num_states) {
+ seq_printf(m, "%s\t", epow_sensor[state]);
+ have_strings = 1;
+ }
+ break;
+ case BATTERY_CYCLESTATE:
+ seq_printf(m, "Battery cyclestate:\t");
+ num_states = sizeof(battery_cyclestate) /
+ sizeof(char *);
+ if (state < num_states) {
+ seq_printf(m, "%s\t",
+ battery_cyclestate[state]);
+ have_strings = 1;
+ }
+ break;
+ case BATTERY_CHARGING:
+ seq_printf(m, "Battery Charging:\t");
+ num_states = sizeof(battery_charging) / sizeof(char *);
+ if (state < num_states) {
+ seq_printf(m, "%s\t",
+ battery_charging[state]);
+ have_strings = 1;
+ }
+ break;
+ case IBM_SURVEILLANCE:
+ seq_printf(m, "Surveillance:\t");
+ break;
+ case IBM_FANRPM:
+ seq_printf(m, "Fan (rpm):\t");
+ break;
+ case IBM_VOLTAGE:
+ seq_printf(m, "Voltage (mv):\t");
+ break;
+ case IBM_DRCONNECTOR:
+ seq_printf(m, "DR connector:\t");
+ num_states = sizeof(ibm_drconnector) / sizeof(char *);
+ if (state < num_states) {
+ seq_printf(m, "%s\t",
+ ibm_drconnector[state]);
+ have_strings = 1;
+ }
+ break;
+ case IBM_POWERSUPPLY:
+ seq_printf(m, "Powersupply:\t");
+ break;
+ default:
+ seq_printf(m, "Unknown sensor (type %d), ignoring it\n",
+ s->token);
+ unknown = 1;
+ have_strings = 1;
+ break;
+ }
+ if (have_strings == 0) {
+ if (temperature) {
+ seq_printf(m, "%4d /%4d\t", state, cel_to_fahr(state));
+ } else
+ seq_printf(m, "%10d\t", state);
+ }
+ if (unknown == 0) {
+ seq_printf(m, "%s\t", ppc_rtas_process_error(error));
+ get_location_code(m, s, loc);
+ }
+}
+
+/* ****************************************************************** */
+
+static void check_location(struct seq_file *m, const char *c)
+{
+ switch (c[0]) {
+ case LOC_PLANAR:
+ seq_printf(m, "Planar #%c", c[1]);
+ break;
+ case LOC_CPU:
+ seq_printf(m, "CPU #%c", c[1]);
+ break;
+ case LOC_FAN:
+ seq_printf(m, "Fan #%c", c[1]);
+ break;
+ case LOC_RACKMOUNTED:
+ seq_printf(m, "Rack #%c", c[1]);
+ break;
+ case LOC_VOLTAGE:
+ seq_printf(m, "Voltage #%c", c[1]);
+ break;
+ case LOC_LCD:
+ seq_printf(m, "LCD #%c", c[1]);
+ break;
+ case '.':
+ seq_printf(m, "- %c", c[1]);
+ break;
+ default:
+ seq_printf(m, "Unknown location");
+ break;
+ }
+}
+
+
+/* ****************************************************************** */
+/*
+ * Format:
+ * ${LETTER}${NUMBER}[[-/]${LETTER}${NUMBER} [ ... ] ]
+ * the '.' may be an abbrevation
+ */
+static void check_location_string(struct seq_file *m, const char *c)
+{
+ while (*c) {
+ if (isalpha(*c) || *c == '.')
+ check_location(m, c);
+ else if (*c == '/' || *c == '-')
+ seq_printf(m, " at ");
+ c++;
+ }
+}
+
+
+/* ****************************************************************** */
+
+static void get_location_code(struct seq_file *m, struct individual_sensor *s,
+ const char *loc)
+{
+ if (!loc || !*loc) {
+ seq_printf(m, "---");/* does not have a location */
+ } else {
+ check_location_string(m, loc);
+ }
+ seq_putc(m, ' ');
+}
+/* ****************************************************************** */
+/* INDICATORS - Tone Frequency */
+/* ****************************************************************** */
+static ssize_t ppc_rtas_tone_freq_write(struct file *file,
+ const char __user *buf, size_t count, loff_t *ppos)
+{
+ unsigned long freq;
+ int error = parse_number(buf, count, &freq);
+ if (error)
+ return error;
+
+ rtas_tone_frequency = freq; /* save it for later */
+ error = rtas_call(rtas_token("set-indicator"), 3, 1, NULL,
+ TONE_FREQUENCY, 0, freq);
+ if (error)
+ printk(KERN_WARNING "error: setting tone frequency returned: %s\n",
+ ppc_rtas_process_error(error));
+ return count;
+}
+/* ****************************************************************** */
+static int ppc_rtas_tone_freq_show(struct seq_file *m, void *v)
+{
+ seq_printf(m, "%lu\n", rtas_tone_frequency);
+ return 0;
+}
+/* ****************************************************************** */
+/* INDICATORS - Tone Volume */
+/* ****************************************************************** */
+static ssize_t ppc_rtas_tone_volume_write(struct file *file,
+ const char __user *buf, size_t count, loff_t *ppos)
+{
+ unsigned long volume;
+ int error = parse_number(buf, count, &volume);
+ if (error)
+ return error;
+
+ if (volume > 100)
+ volume = 100;
+
+ rtas_tone_volume = volume; /* save it for later */
+ error = rtas_call(rtas_token("set-indicator"), 3, 1, NULL,
+ TONE_VOLUME, 0, volume);
+ if (error)
+ printk(KERN_WARNING "error: setting tone volume returned: %s\n",
+ ppc_rtas_process_error(error));
+ return count;
+}
+/* ****************************************************************** */
+static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v)
+{
+ seq_printf(m, "%lu\n", rtas_tone_volume);
+ return 0;
+}
+
+#define RMO_READ_BUF_MAX 30
+
+/* RTAS Userspace access */
+static int ppc_rtas_rmo_buf_show(struct seq_file *m, void *v)
+{
+ seq_printf(m, "%016lx %x\n", rtas_rmo_buf, RTAS_RMOBUF_MAX);
+ return 0;
+}
diff --git a/arch/powerpc/kernel/rtas-rtc.c b/arch/powerpc/kernel/rtas-rtc.c
new file mode 100644
index 00000000..c57c1935
--- /dev/null
+++ b/arch/powerpc/kernel/rtas-rtc.c
@@ -0,0 +1,112 @@
+#include <linux/kernel.h>
+#include <linux/time.h>
+#include <linux/timer.h>
+#include <linux/init.h>
+#include <linux/rtc.h>
+#include <linux/delay.h>
+#include <linux/ratelimit.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/time.h>
+
+
+#define MAX_RTC_WAIT 5000 /* 5 sec */
+#define RTAS_CLOCK_BUSY (-2)
+unsigned long __init rtas_get_boot_time(void)
+{
+ int ret[8];
+ int error;
+ unsigned int wait_time;
+ u64 max_wait_tb;
+
+ max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT;
+ do {
+ error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret);
+
+ wait_time = rtas_busy_delay_time(error);
+ if (wait_time) {
+ /* This is boot time so we spin. */
+ udelay(wait_time*1000);
+ }
+ } while (wait_time && (get_tb() < max_wait_tb));
+
+ if (error != 0) {
+ printk_ratelimited(KERN_WARNING
+ "error: reading the clock failed (%d)\n",
+ error);
+ return 0;
+ }
+
+ return mktime(ret[0], ret[1], ret[2], ret[3], ret[4], ret[5]);
+}
+
+/* NOTE: get_rtc_time will get an error if executed in interrupt context
+ * and if a delay is needed to read the clock. In this case we just
+ * silently return without updating rtc_tm.
+ */
+void rtas_get_rtc_time(struct rtc_time *rtc_tm)
+{
+ int ret[8];
+ int error;
+ unsigned int wait_time;
+ u64 max_wait_tb;
+
+ max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT;
+ do {
+ error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret);
+
+ wait_time = rtas_busy_delay_time(error);
+ if (wait_time) {
+ if (in_interrupt()) {
+ memset(rtc_tm, 0, sizeof(struct rtc_time));
+ printk_ratelimited(KERN_WARNING
+ "error: reading clock "
+ "would delay interrupt\n");
+ return; /* delay not allowed */
+ }
+ msleep(wait_time);
+ }
+ } while (wait_time && (get_tb() < max_wait_tb));
+
+ if (error != 0) {
+ printk_ratelimited(KERN_WARNING
+ "error: reading the clock failed (%d)\n",
+ error);
+ return;
+ }
+
+ rtc_tm->tm_sec = ret[5];
+ rtc_tm->tm_min = ret[4];
+ rtc_tm->tm_hour = ret[3];
+ rtc_tm->tm_mday = ret[2];
+ rtc_tm->tm_mon = ret[1] - 1;
+ rtc_tm->tm_year = ret[0] - 1900;
+}
+
+int rtas_set_rtc_time(struct rtc_time *tm)
+{
+ int error, wait_time;
+ u64 max_wait_tb;
+
+ max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT;
+ do {
+ error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL,
+ tm->tm_year + 1900, tm->tm_mon + 1,
+ tm->tm_mday, tm->tm_hour, tm->tm_min,
+ tm->tm_sec, 0);
+
+ wait_time = rtas_busy_delay_time(error);
+ if (wait_time) {
+ if (in_interrupt())
+ return 1; /* probably decrementer */
+ msleep(wait_time);
+ }
+ } while (wait_time && (get_tb() < max_wait_tb));
+
+ if (error != 0)
+ printk_ratelimited(KERN_WARNING
+ "error: setting the clock failed (%d)\n",
+ error);
+
+ return 0;
+}
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
new file mode 100644
index 00000000..fcec3824
--- /dev/null
+++ b/arch/powerpc/kernel/rtas.c
@@ -0,0 +1,1088 @@
+/*
+ *
+ * Procedures for interfacing to the RTAS on CHRP machines.
+ *
+ * Peter Bergner, IBM March 2001.
+ * Copyright (C) 2001 IBM.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <stdarg.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/capability.h>
+#include <linux/delay.h>
+#include <linux/smp.h>
+#include <linux/completion.h>
+#include <linux/cpumask.h>
+#include <linux/memblock.h>
+#include <linux/slab.h>
+#include <linux/reboot.h>
+
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/hvcall.h>
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/page.h>
+#include <asm/param.h>
+#include <asm/delay.h>
+#include <asm/uaccess.h>
+#include <asm/udbg.h>
+#include <asm/syscalls.h>
+#include <asm/smp.h>
+#include <linux/atomic.h>
+#include <asm/time.h>
+#include <asm/mmu.h>
+#include <asm/topology.h>
+#include <asm/pSeries_reconfig.h>
+
+struct rtas_t rtas = {
+ .lock = __ARCH_SPIN_LOCK_UNLOCKED
+};
+EXPORT_SYMBOL(rtas);
+
+DEFINE_SPINLOCK(rtas_data_buf_lock);
+EXPORT_SYMBOL(rtas_data_buf_lock);
+
+char rtas_data_buf[RTAS_DATA_BUF_SIZE] __cacheline_aligned;
+EXPORT_SYMBOL(rtas_data_buf);
+
+unsigned long rtas_rmo_buf;
+
+/*
+ * If non-NULL, this gets called when the kernel terminates.
+ * This is done like this so rtas_flash can be a module.
+ */
+void (*rtas_flash_term_hook)(int);
+EXPORT_SYMBOL(rtas_flash_term_hook);
+
+/* RTAS use home made raw locking instead of spin_lock_irqsave
+ * because those can be called from within really nasty contexts
+ * such as having the timebase stopped which would lockup with
+ * normal locks and spinlock debugging enabled
+ */
+static unsigned long lock_rtas(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ preempt_disable();
+ arch_spin_lock_flags(&rtas.lock, flags);
+ return flags;
+}
+
+static void unlock_rtas(unsigned long flags)
+{
+ arch_spin_unlock(&rtas.lock);
+ local_irq_restore(flags);
+ preempt_enable();
+}
+
+/*
+ * call_rtas_display_status and call_rtas_display_status_delay
+ * are designed only for very early low-level debugging, which
+ * is why the token is hard-coded to 10.
+ */
+static void call_rtas_display_status(char c)
+{
+ struct rtas_args *args = &rtas.args;
+ unsigned long s;
+
+ if (!rtas.base)
+ return;
+ s = lock_rtas();
+
+ args->token = 10;
+ args->nargs = 1;
+ args->nret = 1;
+ args->rets = (rtas_arg_t *)&(args->args[1]);
+ args->args[0] = (unsigned char)c;
+
+ enter_rtas(__pa(args));
+
+ unlock_rtas(s);
+}
+
+static void call_rtas_display_status_delay(char c)
+{
+ static int pending_newline = 0; /* did last write end with unprinted newline? */
+ static int width = 16;
+
+ if (c == '\n') {
+ while (width-- > 0)
+ call_rtas_display_status(' ');
+ width = 16;
+ mdelay(500);
+ pending_newline = 1;
+ } else {
+ if (pending_newline) {
+ call_rtas_display_status('\r');
+ call_rtas_display_status('\n');
+ }
+ pending_newline = 0;
+ if (width--) {
+ call_rtas_display_status(c);
+ udelay(10000);
+ }
+ }
+}
+
+void __init udbg_init_rtas_panel(void)
+{
+ udbg_putc = call_rtas_display_status_delay;
+}
+
+#ifdef CONFIG_UDBG_RTAS_CONSOLE
+
+/* If you think you're dying before early_init_dt_scan_rtas() does its
+ * work, you can hard code the token values for your firmware here and
+ * hardcode rtas.base/entry etc.
+ */
+static unsigned int rtas_putchar_token = RTAS_UNKNOWN_SERVICE;
+static unsigned int rtas_getchar_token = RTAS_UNKNOWN_SERVICE;
+
+static void udbg_rtascon_putc(char c)
+{
+ int tries;
+
+ if (!rtas.base)
+ return;
+
+ /* Add CRs before LFs */
+ if (c == '\n')
+ udbg_rtascon_putc('\r');
+
+ /* if there is more than one character to be displayed, wait a bit */
+ for (tries = 0; tries < 16; tries++) {
+ if (rtas_call(rtas_putchar_token, 1, 1, NULL, c) == 0)
+ break;
+ udelay(1000);
+ }
+}
+
+static int udbg_rtascon_getc_poll(void)
+{
+ int c;
+
+ if (!rtas.base)
+ return -1;
+
+ if (rtas_call(rtas_getchar_token, 0, 2, &c))
+ return -1;
+
+ return c;
+}
+
+static int udbg_rtascon_getc(void)
+{
+ int c;
+
+ while ((c = udbg_rtascon_getc_poll()) == -1)
+ ;
+
+ return c;
+}
+
+
+void __init udbg_init_rtas_console(void)
+{
+ udbg_putc = udbg_rtascon_putc;
+ udbg_getc = udbg_rtascon_getc;
+ udbg_getc_poll = udbg_rtascon_getc_poll;
+}
+#endif /* CONFIG_UDBG_RTAS_CONSOLE */
+
+void rtas_progress(char *s, unsigned short hex)
+{
+ struct device_node *root;
+ int width;
+ const int *p;
+ char *os;
+ static int display_character, set_indicator;
+ static int display_width, display_lines, form_feed;
+ static const int *row_width;
+ static DEFINE_SPINLOCK(progress_lock);
+ static int current_line;
+ static int pending_newline = 0; /* did last write end with unprinted newline? */
+
+ if (!rtas.base)
+ return;
+
+ if (display_width == 0) {
+ display_width = 0x10;
+ if ((root = of_find_node_by_path("/rtas"))) {
+ if ((p = of_get_property(root,
+ "ibm,display-line-length", NULL)))
+ display_width = *p;
+ if ((p = of_get_property(root,
+ "ibm,form-feed", NULL)))
+ form_feed = *p;
+ if ((p = of_get_property(root,
+ "ibm,display-number-of-lines", NULL)))
+ display_lines = *p;
+ row_width = of_get_property(root,
+ "ibm,display-truncation-length", NULL);
+ of_node_put(root);
+ }
+ display_character = rtas_token("display-character");
+ set_indicator = rtas_token("set-indicator");
+ }
+
+ if (display_character == RTAS_UNKNOWN_SERVICE) {
+ /* use hex display if available */
+ if (set_indicator != RTAS_UNKNOWN_SERVICE)
+ rtas_call(set_indicator, 3, 1, NULL, 6, 0, hex);
+ return;
+ }
+
+ spin_lock(&progress_lock);
+
+ /*
+ * Last write ended with newline, but we didn't print it since
+ * it would just clear the bottom line of output. Print it now
+ * instead.
+ *
+ * If no newline is pending and form feed is supported, clear the
+ * display with a form feed; otherwise, print a CR to start output
+ * at the beginning of the line.
+ */
+ if (pending_newline) {
+ rtas_call(display_character, 1, 1, NULL, '\r');
+ rtas_call(display_character, 1, 1, NULL, '\n');
+ pending_newline = 0;
+ } else {
+ current_line = 0;
+ if (form_feed)
+ rtas_call(display_character, 1, 1, NULL,
+ (char)form_feed);
+ else
+ rtas_call(display_character, 1, 1, NULL, '\r');
+ }
+
+ if (row_width)
+ width = row_width[current_line];
+ else
+ width = display_width;
+ os = s;
+ while (*os) {
+ if (*os == '\n' || *os == '\r') {
+ /* If newline is the last character, save it
+ * until next call to avoid bumping up the
+ * display output.
+ */
+ if (*os == '\n' && !os[1]) {
+ pending_newline = 1;
+ current_line++;
+ if (current_line > display_lines-1)
+ current_line = display_lines-1;
+ spin_unlock(&progress_lock);
+ return;
+ }
+
+ /* RTAS wants CR-LF, not just LF */
+
+ if (*os == '\n') {
+ rtas_call(display_character, 1, 1, NULL, '\r');
+ rtas_call(display_character, 1, 1, NULL, '\n');
+ } else {
+ /* CR might be used to re-draw a line, so we'll
+ * leave it alone and not add LF.
+ */
+ rtas_call(display_character, 1, 1, NULL, *os);
+ }
+
+ if (row_width)
+ width = row_width[current_line];
+ else
+ width = display_width;
+ } else {
+ width--;
+ rtas_call(display_character, 1, 1, NULL, *os);
+ }
+
+ os++;
+
+ /* if we overwrite the screen length */
+ if (width <= 0)
+ while ((*os != 0) && (*os != '\n') && (*os != '\r'))
+ os++;
+ }
+
+ spin_unlock(&progress_lock);
+}
+EXPORT_SYMBOL(rtas_progress); /* needed by rtas_flash module */
+
+int rtas_token(const char *service)
+{
+ const int *tokp;
+ if (rtas.dev == NULL)
+ return RTAS_UNKNOWN_SERVICE;
+ tokp = of_get_property(rtas.dev, service, NULL);
+ return tokp ? *tokp : RTAS_UNKNOWN_SERVICE;
+}
+EXPORT_SYMBOL(rtas_token);
+
+int rtas_service_present(const char *service)
+{
+ return rtas_token(service) != RTAS_UNKNOWN_SERVICE;
+}
+EXPORT_SYMBOL(rtas_service_present);
+
+#ifdef CONFIG_RTAS_ERROR_LOGGING
+/*
+ * Return the firmware-specified size of the error log buffer
+ * for all rtas calls that require an error buffer argument.
+ * This includes 'check-exception' and 'rtas-last-error'.
+ */
+int rtas_get_error_log_max(void)
+{
+ static int rtas_error_log_max;
+ if (rtas_error_log_max)
+ return rtas_error_log_max;
+
+ rtas_error_log_max = rtas_token ("rtas-error-log-max");
+ if ((rtas_error_log_max == RTAS_UNKNOWN_SERVICE) ||
+ (rtas_error_log_max > RTAS_ERROR_LOG_MAX)) {
+ printk (KERN_WARNING "RTAS: bad log buffer size %d\n",
+ rtas_error_log_max);
+ rtas_error_log_max = RTAS_ERROR_LOG_MAX;
+ }
+ return rtas_error_log_max;
+}
+EXPORT_SYMBOL(rtas_get_error_log_max);
+
+
+static char rtas_err_buf[RTAS_ERROR_LOG_MAX];
+static int rtas_last_error_token;
+
+/** Return a copy of the detailed error text associated with the
+ * most recent failed call to rtas. Because the error text
+ * might go stale if there are any other intervening rtas calls,
+ * this routine must be called atomically with whatever produced
+ * the error (i.e. with rtas.lock still held from the previous call).
+ */
+static char *__fetch_rtas_last_error(char *altbuf)
+{
+ struct rtas_args err_args, save_args;
+ u32 bufsz;
+ char *buf = NULL;
+
+ if (rtas_last_error_token == -1)
+ return NULL;
+
+ bufsz = rtas_get_error_log_max();
+
+ err_args.token = rtas_last_error_token;
+ err_args.nargs = 2;
+ err_args.nret = 1;
+ err_args.args[0] = (rtas_arg_t)__pa(rtas_err_buf);
+ err_args.args[1] = bufsz;
+ err_args.args[2] = 0;
+
+ save_args = rtas.args;
+ rtas.args = err_args;
+
+ enter_rtas(__pa(&rtas.args));
+
+ err_args = rtas.args;
+ rtas.args = save_args;
+
+ /* Log the error in the unlikely case that there was one. */
+ if (unlikely(err_args.args[2] == 0)) {
+ if (altbuf) {
+ buf = altbuf;
+ } else {
+ buf = rtas_err_buf;
+ if (mem_init_done)
+ buf = kmalloc(RTAS_ERROR_LOG_MAX, GFP_ATOMIC);
+ }
+ if (buf)
+ memcpy(buf, rtas_err_buf, RTAS_ERROR_LOG_MAX);
+ }
+
+ return buf;
+}
+
+#define get_errorlog_buffer() kmalloc(RTAS_ERROR_LOG_MAX, GFP_KERNEL)
+
+#else /* CONFIG_RTAS_ERROR_LOGGING */
+#define __fetch_rtas_last_error(x) NULL
+#define get_errorlog_buffer() NULL
+#endif
+
+int rtas_call(int token, int nargs, int nret, int *outputs, ...)
+{
+ va_list list;
+ int i;
+ unsigned long s;
+ struct rtas_args *rtas_args;
+ char *buff_copy = NULL;
+ int ret;
+
+ if (!rtas.entry || token == RTAS_UNKNOWN_SERVICE)
+ return -1;
+
+ s = lock_rtas();
+ rtas_args = &rtas.args;
+
+ rtas_args->token = token;
+ rtas_args->nargs = nargs;
+ rtas_args->nret = nret;
+ rtas_args->rets = (rtas_arg_t *)&(rtas_args->args[nargs]);
+ va_start(list, outputs);
+ for (i = 0; i < nargs; ++i)
+ rtas_args->args[i] = va_arg(list, rtas_arg_t);
+ va_end(list);
+
+ for (i = 0; i < nret; ++i)
+ rtas_args->rets[i] = 0;
+
+ enter_rtas(__pa(rtas_args));
+
+ /* A -1 return code indicates that the last command couldn't
+ be completed due to a hardware error. */
+ if (rtas_args->rets[0] == -1)
+ buff_copy = __fetch_rtas_last_error(NULL);
+
+ if (nret > 1 && outputs != NULL)
+ for (i = 0; i < nret-1; ++i)
+ outputs[i] = rtas_args->rets[i+1];
+ ret = (nret > 0)? rtas_args->rets[0]: 0;
+
+ unlock_rtas(s);
+
+ if (buff_copy) {
+ log_error(buff_copy, ERR_TYPE_RTAS_LOG, 0);
+ if (mem_init_done)
+ kfree(buff_copy);
+ }
+ return ret;
+}
+EXPORT_SYMBOL(rtas_call);
+
+/* For RTAS_BUSY (-2), delay for 1 millisecond. For an extended busy status
+ * code of 990n, perform the hinted delay of 10^n (last digit) milliseconds.
+ */
+unsigned int rtas_busy_delay_time(int status)
+{
+ int order;
+ unsigned int ms = 0;
+
+ if (status == RTAS_BUSY) {
+ ms = 1;
+ } else if (status >= 9900 && status <= 9905) {
+ order = status - 9900;
+ for (ms = 1; order > 0; order--)
+ ms *= 10;
+ }
+
+ return ms;
+}
+EXPORT_SYMBOL(rtas_busy_delay_time);
+
+/* For an RTAS busy status code, perform the hinted delay. */
+unsigned int rtas_busy_delay(int status)
+{
+ unsigned int ms;
+
+ might_sleep();
+ ms = rtas_busy_delay_time(status);
+ if (ms && need_resched())
+ msleep(ms);
+
+ return ms;
+}
+EXPORT_SYMBOL(rtas_busy_delay);
+
+static int rtas_error_rc(int rtas_rc)
+{
+ int rc;
+
+ switch (rtas_rc) {
+ case -1: /* Hardware Error */
+ rc = -EIO;
+ break;
+ case -3: /* Bad indicator/domain/etc */
+ rc = -EINVAL;
+ break;
+ case -9000: /* Isolation error */
+ rc = -EFAULT;
+ break;
+ case -9001: /* Outstanding TCE/PTE */
+ rc = -EEXIST;
+ break;
+ case -9002: /* No usable slot */
+ rc = -ENODEV;
+ break;
+ default:
+ printk(KERN_ERR "%s: unexpected RTAS error %d\n",
+ __func__, rtas_rc);
+ rc = -ERANGE;
+ break;
+ }
+ return rc;
+}
+
+int rtas_get_power_level(int powerdomain, int *level)
+{
+ int token = rtas_token("get-power-level");
+ int rc;
+
+ if (token == RTAS_UNKNOWN_SERVICE)
+ return -ENOENT;
+
+ while ((rc = rtas_call(token, 1, 2, level, powerdomain)) == RTAS_BUSY)
+ udelay(1);
+
+ if (rc < 0)
+ return rtas_error_rc(rc);
+ return rc;
+}
+EXPORT_SYMBOL(rtas_get_power_level);
+
+int rtas_set_power_level(int powerdomain, int level, int *setlevel)
+{
+ int token = rtas_token("set-power-level");
+ int rc;
+
+ if (token == RTAS_UNKNOWN_SERVICE)
+ return -ENOENT;
+
+ do {
+ rc = rtas_call(token, 2, 2, setlevel, powerdomain, level);
+ } while (rtas_busy_delay(rc));
+
+ if (rc < 0)
+ return rtas_error_rc(rc);
+ return rc;
+}
+EXPORT_SYMBOL(rtas_set_power_level);
+
+int rtas_get_sensor(int sensor, int index, int *state)
+{
+ int token = rtas_token("get-sensor-state");
+ int rc;
+
+ if (token == RTAS_UNKNOWN_SERVICE)
+ return -ENOENT;
+
+ do {
+ rc = rtas_call(token, 2, 2, state, sensor, index);
+ } while (rtas_busy_delay(rc));
+
+ if (rc < 0)
+ return rtas_error_rc(rc);
+ return rc;
+}
+EXPORT_SYMBOL(rtas_get_sensor);
+
+bool rtas_indicator_present(int token, int *maxindex)
+{
+ int proplen, count, i;
+ const struct indicator_elem {
+ u32 token;
+ u32 maxindex;
+ } *indicators;
+
+ indicators = of_get_property(rtas.dev, "rtas-indicators", &proplen);
+ if (!indicators)
+ return false;
+
+ count = proplen / sizeof(struct indicator_elem);
+
+ for (i = 0; i < count; i++) {
+ if (indicators[i].token != token)
+ continue;
+ if (maxindex)
+ *maxindex = indicators[i].maxindex;
+ return true;
+ }
+
+ return false;
+}
+EXPORT_SYMBOL(rtas_indicator_present);
+
+int rtas_set_indicator(int indicator, int index, int new_value)
+{
+ int token = rtas_token("set-indicator");
+ int rc;
+
+ if (token == RTAS_UNKNOWN_SERVICE)
+ return -ENOENT;
+
+ do {
+ rc = rtas_call(token, 3, 1, NULL, indicator, index, new_value);
+ } while (rtas_busy_delay(rc));
+
+ if (rc < 0)
+ return rtas_error_rc(rc);
+ return rc;
+}
+EXPORT_SYMBOL(rtas_set_indicator);
+
+/*
+ * Ignoring RTAS extended delay
+ */
+int rtas_set_indicator_fast(int indicator, int index, int new_value)
+{
+ int rc;
+ int token = rtas_token("set-indicator");
+
+ if (token == RTAS_UNKNOWN_SERVICE)
+ return -ENOENT;
+
+ rc = rtas_call(token, 3, 1, NULL, indicator, index, new_value);
+
+ WARN_ON(rc == -2 || (rc >= 9900 && rc <= 9905));
+
+ if (rc < 0)
+ return rtas_error_rc(rc);
+
+ return rc;
+}
+
+void rtas_restart(char *cmd)
+{
+ if (rtas_flash_term_hook)
+ rtas_flash_term_hook(SYS_RESTART);
+ printk("RTAS system-reboot returned %d\n",
+ rtas_call(rtas_token("system-reboot"), 0, 1, NULL));
+ for (;;);
+}
+
+void rtas_power_off(void)
+{
+ if (rtas_flash_term_hook)
+ rtas_flash_term_hook(SYS_POWER_OFF);
+ /* allow power on only with power button press */
+ printk("RTAS power-off returned %d\n",
+ rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1));
+ for (;;);
+}
+
+void rtas_halt(void)
+{
+ if (rtas_flash_term_hook)
+ rtas_flash_term_hook(SYS_HALT);
+ /* allow power on only with power button press */
+ printk("RTAS power-off returned %d\n",
+ rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1));
+ for (;;);
+}
+
+/* Must be in the RMO region, so we place it here */
+static char rtas_os_term_buf[2048];
+
+void rtas_os_term(char *str)
+{
+ int status;
+
+ /*
+ * Firmware with the ibm,extended-os-term property is guaranteed
+ * to always return from an ibm,os-term call. Earlier versions without
+ * this property may terminate the partition which we want to avoid
+ * since it interferes with panic_timeout.
+ */
+ if (RTAS_UNKNOWN_SERVICE == rtas_token("ibm,os-term") ||
+ RTAS_UNKNOWN_SERVICE == rtas_token("ibm,extended-os-term"))
+ return;
+
+ snprintf(rtas_os_term_buf, 2048, "OS panic: %s", str);
+
+ do {
+ status = rtas_call(rtas_token("ibm,os-term"), 1, 1, NULL,
+ __pa(rtas_os_term_buf));
+ } while (rtas_busy_delay(status));
+
+ if (status != 0)
+ printk(KERN_EMERG "ibm,os-term call failed %d\n", status);
+}
+
+static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE;
+#ifdef CONFIG_PPC_PSERIES
+static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_when_done)
+{
+ u16 slb_size = mmu_slb_size;
+ int rc = H_MULTI_THREADS_ACTIVE;
+ int cpu;
+
+ slb_set_size(SLB_MIN_SIZE);
+ printk(KERN_DEBUG "calling ibm,suspend-me on cpu %i\n", smp_processor_id());
+
+ while (rc == H_MULTI_THREADS_ACTIVE && !atomic_read(&data->done) &&
+ !atomic_read(&data->error))
+ rc = rtas_call(data->token, 0, 1, NULL);
+
+ if (rc || atomic_read(&data->error)) {
+ printk(KERN_DEBUG "ibm,suspend-me returned %d\n", rc);
+ slb_set_size(slb_size);
+ }
+
+ if (atomic_read(&data->error))
+ rc = atomic_read(&data->error);
+
+ atomic_set(&data->error, rc);
+ pSeries_coalesce_init();
+
+ if (wake_when_done) {
+ atomic_set(&data->done, 1);
+
+ for_each_online_cpu(cpu)
+ plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu));
+ }
+
+ if (atomic_dec_return(&data->working) == 0)
+ complete(data->complete);
+
+ return rc;
+}
+
+int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data)
+{
+ atomic_inc(&data->working);
+ return __rtas_suspend_last_cpu(data, 0);
+}
+
+static int __rtas_suspend_cpu(struct rtas_suspend_me_data *data, int wake_when_done)
+{
+ long rc = H_SUCCESS;
+ unsigned long msr_save;
+ int cpu;
+
+ atomic_inc(&data->working);
+
+ /* really need to ensure MSR.EE is off for H_JOIN */
+ msr_save = mfmsr();
+ mtmsr(msr_save & ~(MSR_EE));
+
+ while (rc == H_SUCCESS && !atomic_read(&data->done) && !atomic_read(&data->error))
+ rc = plpar_hcall_norets(H_JOIN);
+
+ mtmsr(msr_save);
+
+ if (rc == H_SUCCESS) {
+ /* This cpu was prodded and the suspend is complete. */
+ goto out;
+ } else if (rc == H_CONTINUE) {
+ /* All other cpus are in H_JOIN, this cpu does
+ * the suspend.
+ */
+ return __rtas_suspend_last_cpu(data, wake_when_done);
+ } else {
+ printk(KERN_ERR "H_JOIN on cpu %i failed with rc = %ld\n",
+ smp_processor_id(), rc);
+ atomic_set(&data->error, rc);
+ }
+
+ if (wake_when_done) {
+ atomic_set(&data->done, 1);
+
+ /* This cpu did the suspend or got an error; in either case,
+ * we need to prod all other other cpus out of join state.
+ * Extra prods are harmless.
+ */
+ for_each_online_cpu(cpu)
+ plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu));
+ }
+out:
+ if (atomic_dec_return(&data->working) == 0)
+ complete(data->complete);
+ return rc;
+}
+
+int rtas_suspend_cpu(struct rtas_suspend_me_data *data)
+{
+ return __rtas_suspend_cpu(data, 0);
+}
+
+static void rtas_percpu_suspend_me(void *info)
+{
+ __rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1);
+}
+
+int rtas_ibm_suspend_me(struct rtas_args *args)
+{
+ long state;
+ long rc;
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+ struct rtas_suspend_me_data data;
+ DECLARE_COMPLETION_ONSTACK(done);
+
+ if (!rtas_service_present("ibm,suspend-me"))
+ return -ENOSYS;
+
+ /* Make sure the state is valid */
+ rc = plpar_hcall(H_VASI_STATE, retbuf,
+ ((u64)args->args[0] << 32) | args->args[1]);
+
+ state = retbuf[0];
+
+ if (rc) {
+ printk(KERN_ERR "rtas_ibm_suspend_me: vasi_state returned %ld\n",rc);
+ return rc;
+ } else if (state == H_VASI_ENABLED) {
+ args->args[args->nargs] = RTAS_NOT_SUSPENDABLE;
+ return 0;
+ } else if (state != H_VASI_SUSPENDING) {
+ printk(KERN_ERR "rtas_ibm_suspend_me: vasi_state returned state %ld\n",
+ state);
+ args->args[args->nargs] = -1;
+ return 0;
+ }
+
+ atomic_set(&data.working, 0);
+ atomic_set(&data.done, 0);
+ atomic_set(&data.error, 0);
+ data.token = rtas_token("ibm,suspend-me");
+ data.complete = &done;
+ stop_topology_update();
+
+ /* Call function on all CPUs. One of us will make the
+ * rtas call
+ */
+ if (on_each_cpu(rtas_percpu_suspend_me, &data, 0))
+ atomic_set(&data.error, -EINVAL);
+
+ wait_for_completion(&done);
+
+ if (atomic_read(&data.error) != 0)
+ printk(KERN_ERR "Error doing global join\n");
+
+ start_topology_update();
+
+ return atomic_read(&data.error);
+}
+#else /* CONFIG_PPC_PSERIES */
+int rtas_ibm_suspend_me(struct rtas_args *args)
+{
+ return -ENOSYS;
+}
+#endif
+
+/**
+ * Find a specific pseries error log in an RTAS extended event log.
+ * @log: RTAS error/event log
+ * @section_id: two character section identifier
+ *
+ * Returns a pointer to the specified errorlog or NULL if not found.
+ */
+struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log,
+ uint16_t section_id)
+{
+ struct rtas_ext_event_log_v6 *ext_log =
+ (struct rtas_ext_event_log_v6 *)log->buffer;
+ struct pseries_errorlog *sect;
+ unsigned char *p, *log_end;
+
+ /* Check that we understand the format */
+ if (log->extended_log_length < sizeof(struct rtas_ext_event_log_v6) ||
+ ext_log->log_format != RTAS_V6EXT_LOG_FORMAT_EVENT_LOG ||
+ ext_log->company_id != RTAS_V6EXT_COMPANY_ID_IBM)
+ return NULL;
+
+ log_end = log->buffer + log->extended_log_length;
+ p = ext_log->vendor_log;
+
+ while (p < log_end) {
+ sect = (struct pseries_errorlog *)p;
+ if (sect->id == section_id)
+ return sect;
+ p += sect->length;
+ }
+
+ return NULL;
+}
+
+asmlinkage int ppc_rtas(struct rtas_args __user *uargs)
+{
+ struct rtas_args args;
+ unsigned long flags;
+ char *buff_copy, *errbuf = NULL;
+ int nargs;
+ int rc;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (copy_from_user(&args, uargs, 3 * sizeof(u32)) != 0)
+ return -EFAULT;
+
+ nargs = args.nargs;
+ if (nargs > ARRAY_SIZE(args.args)
+ || args.nret > ARRAY_SIZE(args.args)
+ || nargs + args.nret > ARRAY_SIZE(args.args))
+ return -EINVAL;
+
+ /* Copy in args. */
+ if (copy_from_user(args.args, uargs->args,
+ nargs * sizeof(rtas_arg_t)) != 0)
+ return -EFAULT;
+
+ if (args.token == RTAS_UNKNOWN_SERVICE)
+ return -EINVAL;
+
+ args.rets = &args.args[nargs];
+ memset(args.rets, 0, args.nret * sizeof(rtas_arg_t));
+
+ /* Need to handle ibm,suspend_me call specially */
+ if (args.token == ibm_suspend_me_token) {
+ rc = rtas_ibm_suspend_me(&args);
+ if (rc)
+ return rc;
+ goto copy_return;
+ }
+
+ buff_copy = get_errorlog_buffer();
+
+ flags = lock_rtas();
+
+ rtas.args = args;
+ enter_rtas(__pa(&rtas.args));
+ args = rtas.args;
+
+ /* A -1 return code indicates that the last command couldn't
+ be completed due to a hardware error. */
+ if (args.rets[0] == -1)
+ errbuf = __fetch_rtas_last_error(buff_copy);
+
+ unlock_rtas(flags);
+
+ if (buff_copy) {
+ if (errbuf)
+ log_error(errbuf, ERR_TYPE_RTAS_LOG, 0);
+ kfree(buff_copy);
+ }
+
+ copy_return:
+ /* Copy out args. */
+ if (copy_to_user(uargs->args + nargs,
+ args.args + nargs,
+ args.nret * sizeof(rtas_arg_t)) != 0)
+ return -EFAULT;
+
+ return 0;
+}
+
+/*
+ * Call early during boot, before mem init or bootmem, to retrieve the RTAS
+ * informations from the device-tree and allocate the RMO buffer for userland
+ * accesses.
+ */
+void __init rtas_initialize(void)
+{
+ unsigned long rtas_region = RTAS_INSTANTIATE_MAX;
+
+ /* Get RTAS dev node and fill up our "rtas" structure with infos
+ * about it.
+ */
+ rtas.dev = of_find_node_by_name(NULL, "rtas");
+ if (rtas.dev) {
+ const u32 *basep, *entryp, *sizep;
+
+ basep = of_get_property(rtas.dev, "linux,rtas-base", NULL);
+ sizep = of_get_property(rtas.dev, "rtas-size", NULL);
+ if (basep != NULL && sizep != NULL) {
+ rtas.base = *basep;
+ rtas.size = *sizep;
+ entryp = of_get_property(rtas.dev,
+ "linux,rtas-entry", NULL);
+ if (entryp == NULL) /* Ugh */
+ rtas.entry = rtas.base;
+ else
+ rtas.entry = *entryp;
+ } else
+ rtas.dev = NULL;
+ }
+ if (!rtas.dev)
+ return;
+
+ /* If RTAS was found, allocate the RMO buffer for it and look for
+ * the stop-self token if any
+ */
+#ifdef CONFIG_PPC64
+ if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR)) {
+ rtas_region = min(ppc64_rma_size, RTAS_INSTANTIATE_MAX);
+ ibm_suspend_me_token = rtas_token("ibm,suspend-me");
+ }
+#endif
+ rtas_rmo_buf = memblock_alloc_base(RTAS_RMOBUF_MAX, PAGE_SIZE, rtas_region);
+
+#ifdef CONFIG_RTAS_ERROR_LOGGING
+ rtas_last_error_token = rtas_token("rtas-last-error");
+#endif
+}
+
+int __init early_init_dt_scan_rtas(unsigned long node,
+ const char *uname, int depth, void *data)
+{
+ u32 *basep, *entryp, *sizep;
+
+ if (depth != 1 || strcmp(uname, "rtas") != 0)
+ return 0;
+
+ basep = of_get_flat_dt_prop(node, "linux,rtas-base", NULL);
+ entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL);
+ sizep = of_get_flat_dt_prop(node, "rtas-size", NULL);
+
+ if (basep && entryp && sizep) {
+ rtas.base = *basep;
+ rtas.entry = *entryp;
+ rtas.size = *sizep;
+ }
+
+#ifdef CONFIG_UDBG_RTAS_CONSOLE
+ basep = of_get_flat_dt_prop(node, "put-term-char", NULL);
+ if (basep)
+ rtas_putchar_token = *basep;
+
+ basep = of_get_flat_dt_prop(node, "get-term-char", NULL);
+ if (basep)
+ rtas_getchar_token = *basep;
+
+ if (rtas_putchar_token != RTAS_UNKNOWN_SERVICE &&
+ rtas_getchar_token != RTAS_UNKNOWN_SERVICE)
+ udbg_init_rtas_console();
+
+#endif
+
+ /* break now */
+ return 1;
+}
+
+static arch_spinlock_t timebase_lock;
+static u64 timebase = 0;
+
+void __cpuinit rtas_give_timebase(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ hard_irq_disable();
+ arch_spin_lock(&timebase_lock);
+ rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL);
+ timebase = get_tb();
+ arch_spin_unlock(&timebase_lock);
+
+ while (timebase)
+ barrier();
+ rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL);
+ local_irq_restore(flags);
+}
+
+void __cpuinit rtas_take_timebase(void)
+{
+ while (!timebase)
+ barrier();
+ arch_spin_lock(&timebase_lock);
+ set_tb(timebase >> 32, timebase & 0xffffffff);
+ timebase = 0;
+ arch_spin_unlock(&timebase_lock);
+}
diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
new file mode 100644
index 00000000..4174b4b2
--- /dev/null
+++ b/arch/powerpc/kernel/rtas_flash.c
@@ -0,0 +1,807 @@
+/*
+ * c 2001 PPC 64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * /proc/powerpc/rtas/firmware_flash interface
+ *
+ * This file implements a firmware_flash interface to pump a firmware
+ * image into the kernel. At reboot time rtas_restart() will see the
+ * firmware image and flash it as it reboots (see rtas.c).
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/proc_fs.h>
+#include <linux/reboot.h>
+#include <asm/delay.h>
+#include <asm/uaccess.h>
+#include <asm/rtas.h>
+#include <asm/abs_addr.h>
+
+#define MODULE_VERS "1.0"
+#define MODULE_NAME "rtas_flash"
+
+#define FIRMWARE_FLASH_NAME "firmware_flash"
+#define FIRMWARE_UPDATE_NAME "firmware_update"
+#define MANAGE_FLASH_NAME "manage_flash"
+#define VALIDATE_FLASH_NAME "validate_flash"
+
+/* General RTAS Status Codes */
+#define RTAS_RC_SUCCESS 0
+#define RTAS_RC_HW_ERR -1
+#define RTAS_RC_BUSY -2
+
+/* Flash image status values */
+#define FLASH_AUTH -9002 /* RTAS Not Service Authority Partition */
+#define FLASH_NO_OP -1099 /* No operation initiated by user */
+#define FLASH_IMG_SHORT -1005 /* Flash image shorter than expected */
+#define FLASH_IMG_BAD_LEN -1004 /* Bad length value in flash list block */
+#define FLASH_IMG_NULL_DATA -1003 /* Bad data value in flash list block */
+#define FLASH_IMG_READY 0 /* Firmware img ready for flash on reboot */
+
+/* Manage image status values */
+#define MANAGE_AUTH -9002 /* RTAS Not Service Authority Partition */
+#define MANAGE_ACTIVE_ERR -9001 /* RTAS Cannot Overwrite Active Img */
+#define MANAGE_NO_OP -1099 /* No operation initiated by user */
+#define MANAGE_PARAM_ERR -3 /* RTAS Parameter Error */
+#define MANAGE_HW_ERR -1 /* RTAS Hardware Error */
+
+/* Validate image status values */
+#define VALIDATE_AUTH -9002 /* RTAS Not Service Authority Partition */
+#define VALIDATE_NO_OP -1099 /* No operation initiated by the user */
+#define VALIDATE_INCOMPLETE -1002 /* User copied < VALIDATE_BUF_SIZE */
+#define VALIDATE_READY -1001 /* Firmware image ready for validation */
+#define VALIDATE_PARAM_ERR -3 /* RTAS Parameter Error */
+#define VALIDATE_HW_ERR -1 /* RTAS Hardware Error */
+#define VALIDATE_TMP_UPDATE 0 /* Validate Return Status */
+#define VALIDATE_FLASH_AUTH 1 /* Validate Return Status */
+#define VALIDATE_INVALID_IMG 2 /* Validate Return Status */
+#define VALIDATE_CUR_UNKNOWN 3 /* Validate Return Status */
+#define VALIDATE_TMP_COMMIT_DL 4 /* Validate Return Status */
+#define VALIDATE_TMP_COMMIT 5 /* Validate Return Status */
+#define VALIDATE_TMP_UPDATE_DL 6 /* Validate Return Status */
+
+/* ibm,manage-flash-image operation tokens */
+#define RTAS_REJECT_TMP_IMG 0
+#define RTAS_COMMIT_TMP_IMG 1
+
+/* Array sizes */
+#define VALIDATE_BUF_SIZE 4096
+#define RTAS_MSG_MAXLEN 64
+
+/* Quirk - RTAS requires 4k list length and block size */
+#define RTAS_BLKLIST_LENGTH 4096
+#define RTAS_BLK_SIZE 4096
+
+struct flash_block {
+ char *data;
+ unsigned long length;
+};
+
+/* This struct is very similar but not identical to
+ * that needed by the rtas flash update.
+ * All we need to do for rtas is rewrite num_blocks
+ * into a version/length and translate the pointers
+ * to absolute.
+ */
+#define FLASH_BLOCKS_PER_NODE ((RTAS_BLKLIST_LENGTH - 16) / sizeof(struct flash_block))
+struct flash_block_list {
+ unsigned long num_blocks;
+ struct flash_block_list *next;
+ struct flash_block blocks[FLASH_BLOCKS_PER_NODE];
+};
+
+static struct flash_block_list *rtas_firmware_flash_list;
+
+/* Use slab cache to guarantee 4k alignment */
+static struct kmem_cache *flash_block_cache = NULL;
+
+#define FLASH_BLOCK_LIST_VERSION (1UL)
+
+/* Local copy of the flash block list.
+ * We only allow one open of the flash proc file and create this
+ * list as we go. The rtas_firmware_flash_list varable will be
+ * set once the data is fully read.
+ *
+ * For convenience as we build the list we use virtual addrs,
+ * we do not fill in the version number, and the length field
+ * is treated as the number of entries currently in the block
+ * (i.e. not a byte count). This is all fixed when calling
+ * the flash routine.
+ */
+
+/* Status int must be first member of struct */
+struct rtas_update_flash_t
+{
+ int status; /* Flash update status */
+ struct flash_block_list *flist; /* Local copy of flash block list */
+};
+
+/* Status int must be first member of struct */
+struct rtas_manage_flash_t
+{
+ int status; /* Returned status */
+ unsigned int op; /* Reject or commit image */
+};
+
+/* Status int must be first member of struct */
+struct rtas_validate_flash_t
+{
+ int status; /* Returned status */
+ char buf[VALIDATE_BUF_SIZE]; /* Candidate image buffer */
+ unsigned int buf_size; /* Size of image buf */
+ unsigned int update_results; /* Update results token */
+};
+
+static DEFINE_SPINLOCK(flash_file_open_lock);
+static struct proc_dir_entry *firmware_flash_pde;
+static struct proc_dir_entry *firmware_update_pde;
+static struct proc_dir_entry *validate_pde;
+static struct proc_dir_entry *manage_pde;
+
+/* Do simple sanity checks on the flash image. */
+static int flash_list_valid(struct flash_block_list *flist)
+{
+ struct flash_block_list *f;
+ int i;
+ unsigned long block_size, image_size;
+
+ /* Paranoid self test here. We also collect the image size. */
+ image_size = 0;
+ for (f = flist; f; f = f->next) {
+ for (i = 0; i < f->num_blocks; i++) {
+ if (f->blocks[i].data == NULL) {
+ return FLASH_IMG_NULL_DATA;
+ }
+ block_size = f->blocks[i].length;
+ if (block_size <= 0 || block_size > RTAS_BLK_SIZE) {
+ return FLASH_IMG_BAD_LEN;
+ }
+ image_size += block_size;
+ }
+ }
+
+ if (image_size < (256 << 10)) {
+ if (image_size < 2)
+ return FLASH_NO_OP;
+ }
+
+ printk(KERN_INFO "FLASH: flash image with %ld bytes stored for hardware flash on reboot\n", image_size);
+
+ return FLASH_IMG_READY;
+}
+
+static void free_flash_list(struct flash_block_list *f)
+{
+ struct flash_block_list *next;
+ int i;
+
+ while (f) {
+ for (i = 0; i < f->num_blocks; i++)
+ kmem_cache_free(flash_block_cache, f->blocks[i].data);
+ next = f->next;
+ kmem_cache_free(flash_block_cache, f);
+ f = next;
+ }
+}
+
+static int rtas_flash_release(struct inode *inode, struct file *file)
+{
+ struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode);
+ struct rtas_update_flash_t *uf;
+
+ uf = (struct rtas_update_flash_t *) dp->data;
+ if (uf->flist) {
+ /* File was opened in write mode for a new flash attempt */
+ /* Clear saved list */
+ if (rtas_firmware_flash_list) {
+ free_flash_list(rtas_firmware_flash_list);
+ rtas_firmware_flash_list = NULL;
+ }
+
+ if (uf->status != FLASH_AUTH)
+ uf->status = flash_list_valid(uf->flist);
+
+ if (uf->status == FLASH_IMG_READY)
+ rtas_firmware_flash_list = uf->flist;
+ else
+ free_flash_list(uf->flist);
+
+ uf->flist = NULL;
+ }
+
+ atomic_dec(&dp->count);
+ return 0;
+}
+
+static void get_flash_status_msg(int status, char *buf)
+{
+ char *msg;
+
+ switch (status) {
+ case FLASH_AUTH:
+ msg = "error: this partition does not have service authority\n";
+ break;
+ case FLASH_NO_OP:
+ msg = "info: no firmware image for flash\n";
+ break;
+ case FLASH_IMG_SHORT:
+ msg = "error: flash image short\n";
+ break;
+ case FLASH_IMG_BAD_LEN:
+ msg = "error: internal error bad length\n";
+ break;
+ case FLASH_IMG_NULL_DATA:
+ msg = "error: internal error null data\n";
+ break;
+ case FLASH_IMG_READY:
+ msg = "ready: firmware image ready for flash on reboot\n";
+ break;
+ default:
+ sprintf(buf, "error: unexpected status value %d\n", status);
+ return;
+ }
+
+ strcpy(buf, msg);
+}
+
+/* Reading the proc file will show status (not the firmware contents) */
+static ssize_t rtas_flash_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode);
+ struct rtas_update_flash_t *uf;
+ char msg[RTAS_MSG_MAXLEN];
+
+ uf = dp->data;
+
+ if (!strcmp(dp->name, FIRMWARE_FLASH_NAME)) {
+ get_flash_status_msg(uf->status, msg);
+ } else { /* FIRMWARE_UPDATE_NAME */
+ sprintf(msg, "%d\n", uf->status);
+ }
+
+ return simple_read_from_buffer(buf, count, ppos, msg, strlen(msg));
+}
+
+/* constructor for flash_block_cache */
+void rtas_block_ctor(void *ptr)
+{
+ memset(ptr, 0, RTAS_BLK_SIZE);
+}
+
+/* We could be much more efficient here. But to keep this function
+ * simple we allocate a page to the block list no matter how small the
+ * count is. If the system is low on memory it will be just as well
+ * that we fail....
+ */
+static ssize_t rtas_flash_write(struct file *file, const char __user *buffer,
+ size_t count, loff_t *off)
+{
+ struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode);
+ struct rtas_update_flash_t *uf;
+ char *p;
+ int next_free;
+ struct flash_block_list *fl;
+
+ uf = (struct rtas_update_flash_t *) dp->data;
+
+ if (uf->status == FLASH_AUTH || count == 0)
+ return count; /* discard data */
+
+ /* In the case that the image is not ready for flashing, the memory
+ * allocated for the block list will be freed upon the release of the
+ * proc file
+ */
+ if (uf->flist == NULL) {
+ uf->flist = kmem_cache_alloc(flash_block_cache, GFP_KERNEL);
+ if (!uf->flist)
+ return -ENOMEM;
+ }
+
+ fl = uf->flist;
+ while (fl->next)
+ fl = fl->next; /* seek to last block_list for append */
+ next_free = fl->num_blocks;
+ if (next_free == FLASH_BLOCKS_PER_NODE) {
+ /* Need to allocate another block_list */
+ fl->next = kmem_cache_alloc(flash_block_cache, GFP_KERNEL);
+ if (!fl->next)
+ return -ENOMEM;
+ fl = fl->next;
+ next_free = 0;
+ }
+
+ if (count > RTAS_BLK_SIZE)
+ count = RTAS_BLK_SIZE;
+ p = kmem_cache_alloc(flash_block_cache, GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ if(copy_from_user(p, buffer, count)) {
+ kmem_cache_free(flash_block_cache, p);
+ return -EFAULT;
+ }
+ fl->blocks[next_free].data = p;
+ fl->blocks[next_free].length = count;
+ fl->num_blocks++;
+
+ return count;
+}
+
+static int rtas_excl_open(struct inode *inode, struct file *file)
+{
+ struct proc_dir_entry *dp = PDE(inode);
+
+ /* Enforce exclusive open with use count of PDE */
+ spin_lock(&flash_file_open_lock);
+ if (atomic_read(&dp->count) > 2) {
+ spin_unlock(&flash_file_open_lock);
+ return -EBUSY;
+ }
+
+ atomic_inc(&dp->count);
+ spin_unlock(&flash_file_open_lock);
+
+ return 0;
+}
+
+static int rtas_excl_release(struct inode *inode, struct file *file)
+{
+ struct proc_dir_entry *dp = PDE(inode);
+
+ atomic_dec(&dp->count);
+
+ return 0;
+}
+
+static void manage_flash(struct rtas_manage_flash_t *args_buf)
+{
+ s32 rc;
+
+ do {
+ rc = rtas_call(rtas_token("ibm,manage-flash-image"), 1,
+ 1, NULL, args_buf->op);
+ } while (rtas_busy_delay(rc));
+
+ args_buf->status = rc;
+}
+
+static ssize_t manage_flash_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode);
+ struct rtas_manage_flash_t *args_buf;
+ char msg[RTAS_MSG_MAXLEN];
+ int msglen;
+
+ args_buf = dp->data;
+ if (args_buf == NULL)
+ return 0;
+
+ msglen = sprintf(msg, "%d\n", args_buf->status);
+
+ return simple_read_from_buffer(buf, count, ppos, msg, msglen);
+}
+
+static ssize_t manage_flash_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *off)
+{
+ struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode);
+ struct rtas_manage_flash_t *args_buf;
+ const char reject_str[] = "0";
+ const char commit_str[] = "1";
+ char stkbuf[10];
+ int op;
+
+ args_buf = (struct rtas_manage_flash_t *) dp->data;
+ if ((args_buf->status == MANAGE_AUTH) || (count == 0))
+ return count;
+
+ op = -1;
+ if (buf) {
+ if (count > 9) count = 9;
+ if (copy_from_user (stkbuf, buf, count)) {
+ return -EFAULT;
+ }
+ if (strncmp(stkbuf, reject_str, strlen(reject_str)) == 0)
+ op = RTAS_REJECT_TMP_IMG;
+ else if (strncmp(stkbuf, commit_str, strlen(commit_str)) == 0)
+ op = RTAS_COMMIT_TMP_IMG;
+ }
+
+ if (op == -1) /* buf is empty, or contains invalid string */
+ return -EINVAL;
+
+ args_buf->op = op;
+ manage_flash(args_buf);
+
+ return count;
+}
+
+static void validate_flash(struct rtas_validate_flash_t *args_buf)
+{
+ int token = rtas_token("ibm,validate-flash-image");
+ int update_results;
+ s32 rc;
+
+ rc = 0;
+ do {
+ spin_lock(&rtas_data_buf_lock);
+ memcpy(rtas_data_buf, args_buf->buf, VALIDATE_BUF_SIZE);
+ rc = rtas_call(token, 2, 2, &update_results,
+ (u32) __pa(rtas_data_buf), args_buf->buf_size);
+ memcpy(args_buf->buf, rtas_data_buf, VALIDATE_BUF_SIZE);
+ spin_unlock(&rtas_data_buf_lock);
+ } while (rtas_busy_delay(rc));
+
+ args_buf->status = rc;
+ args_buf->update_results = update_results;
+}
+
+static int get_validate_flash_msg(struct rtas_validate_flash_t *args_buf,
+ char *msg)
+{
+ int n;
+
+ if (args_buf->status >= VALIDATE_TMP_UPDATE) {
+ n = sprintf(msg, "%d\n", args_buf->update_results);
+ if ((args_buf->update_results >= VALIDATE_CUR_UNKNOWN) ||
+ (args_buf->update_results == VALIDATE_TMP_UPDATE))
+ n += sprintf(msg + n, "%s\n", args_buf->buf);
+ } else {
+ n = sprintf(msg, "%d\n", args_buf->status);
+ }
+ return n;
+}
+
+static ssize_t validate_flash_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode);
+ struct rtas_validate_flash_t *args_buf;
+ char msg[RTAS_MSG_MAXLEN];
+ int msglen;
+
+ args_buf = dp->data;
+
+ msglen = get_validate_flash_msg(args_buf, msg);
+
+ return simple_read_from_buffer(buf, count, ppos, msg, msglen);
+}
+
+static ssize_t validate_flash_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *off)
+{
+ struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode);
+ struct rtas_validate_flash_t *args_buf;
+ int rc;
+
+ args_buf = (struct rtas_validate_flash_t *) dp->data;
+
+ if (dp->data == NULL) {
+ dp->data = kmalloc(sizeof(struct rtas_validate_flash_t),
+ GFP_KERNEL);
+ if (dp->data == NULL)
+ return -ENOMEM;
+ }
+
+ /* We are only interested in the first 4K of the
+ * candidate image */
+ if ((*off >= VALIDATE_BUF_SIZE) ||
+ (args_buf->status == VALIDATE_AUTH)) {
+ *off += count;
+ return count;
+ }
+
+ if (*off + count >= VALIDATE_BUF_SIZE) {
+ count = VALIDATE_BUF_SIZE - *off;
+ args_buf->status = VALIDATE_READY;
+ } else {
+ args_buf->status = VALIDATE_INCOMPLETE;
+ }
+
+ if (!access_ok(VERIFY_READ, buf, count)) {
+ rc = -EFAULT;
+ goto done;
+ }
+ if (copy_from_user(args_buf->buf + *off, buf, count)) {
+ rc = -EFAULT;
+ goto done;
+ }
+
+ *off += count;
+ rc = count;
+done:
+ if (rc < 0) {
+ kfree(dp->data);
+ dp->data = NULL;
+ }
+ return rc;
+}
+
+static int validate_flash_release(struct inode *inode, struct file *file)
+{
+ struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode);
+ struct rtas_validate_flash_t *args_buf;
+
+ args_buf = (struct rtas_validate_flash_t *) dp->data;
+
+ if (args_buf->status == VALIDATE_READY) {
+ args_buf->buf_size = VALIDATE_BUF_SIZE;
+ validate_flash(args_buf);
+ }
+
+ /* The matching atomic_inc was in rtas_excl_open() */
+ atomic_dec(&dp->count);
+
+ return 0;
+}
+
+static void rtas_flash_firmware(int reboot_type)
+{
+ unsigned long image_size;
+ struct flash_block_list *f, *next, *flist;
+ unsigned long rtas_block_list;
+ int i, status, update_token;
+
+ if (rtas_firmware_flash_list == NULL)
+ return; /* nothing to do */
+
+ if (reboot_type != SYS_RESTART) {
+ printk(KERN_ALERT "FLASH: firmware flash requires a reboot\n");
+ printk(KERN_ALERT "FLASH: the firmware image will NOT be flashed\n");
+ return;
+ }
+
+ update_token = rtas_token("ibm,update-flash-64-and-reboot");
+ if (update_token == RTAS_UNKNOWN_SERVICE) {
+ printk(KERN_ALERT "FLASH: ibm,update-flash-64-and-reboot "
+ "is not available -- not a service partition?\n");
+ printk(KERN_ALERT "FLASH: firmware will not be flashed\n");
+ return;
+ }
+
+ /*
+ * Just before starting the firmware flash, cancel the event scan work
+ * to avoid any soft lockup issues.
+ */
+ rtas_cancel_event_scan();
+
+ /*
+ * NOTE: the "first" block must be under 4GB, so we create
+ * an entry with no data blocks in the reserved buffer in
+ * the kernel data segment.
+ */
+ spin_lock(&rtas_data_buf_lock);
+ flist = (struct flash_block_list *)&rtas_data_buf[0];
+ flist->num_blocks = 0;
+ flist->next = rtas_firmware_flash_list;
+ rtas_block_list = virt_to_abs(flist);
+ if (rtas_block_list >= 4UL*1024*1024*1024) {
+ printk(KERN_ALERT "FLASH: kernel bug...flash list header addr above 4GB\n");
+ spin_unlock(&rtas_data_buf_lock);
+ return;
+ }
+
+ printk(KERN_ALERT "FLASH: preparing saved firmware image for flash\n");
+ /* Update the block_list in place. */
+ rtas_firmware_flash_list = NULL; /* too hard to backout on error */
+ image_size = 0;
+ for (f = flist; f; f = next) {
+ /* Translate data addrs to absolute */
+ for (i = 0; i < f->num_blocks; i++) {
+ f->blocks[i].data = (char *)virt_to_abs(f->blocks[i].data);
+ image_size += f->blocks[i].length;
+ }
+ next = f->next;
+ /* Don't translate NULL pointer for last entry */
+ if (f->next)
+ f->next = (struct flash_block_list *)virt_to_abs(f->next);
+ else
+ f->next = NULL;
+ /* make num_blocks into the version/length field */
+ f->num_blocks = (FLASH_BLOCK_LIST_VERSION << 56) | ((f->num_blocks+1)*16);
+ }
+
+ printk(KERN_ALERT "FLASH: flash image is %ld bytes\n", image_size);
+ printk(KERN_ALERT "FLASH: performing flash and reboot\n");
+ rtas_progress("Flashing \n", 0x0);
+ rtas_progress("Please Wait... ", 0x0);
+ printk(KERN_ALERT "FLASH: this will take several minutes. Do not power off!\n");
+ status = rtas_call(update_token, 1, 1, NULL, rtas_block_list);
+ switch (status) { /* should only get "bad" status */
+ case 0:
+ printk(KERN_ALERT "FLASH: success\n");
+ break;
+ case -1:
+ printk(KERN_ALERT "FLASH: hardware error. Firmware may not be not flashed\n");
+ break;
+ case -3:
+ printk(KERN_ALERT "FLASH: image is corrupt or not correct for this platform. Firmware not flashed\n");
+ break;
+ case -4:
+ printk(KERN_ALERT "FLASH: flash failed when partially complete. System may not reboot\n");
+ break;
+ default:
+ printk(KERN_ALERT "FLASH: unknown flash return code %d\n", status);
+ break;
+ }
+ spin_unlock(&rtas_data_buf_lock);
+}
+
+static void remove_flash_pde(struct proc_dir_entry *dp)
+{
+ if (dp) {
+ kfree(dp->data);
+ remove_proc_entry(dp->name, dp->parent);
+ }
+}
+
+static int initialize_flash_pde_data(const char *rtas_call_name,
+ size_t buf_size,
+ struct proc_dir_entry *dp)
+{
+ int *status;
+ int token;
+
+ dp->data = kzalloc(buf_size, GFP_KERNEL);
+ if (dp->data == NULL) {
+ remove_flash_pde(dp);
+ return -ENOMEM;
+ }
+
+ /*
+ * This code assumes that the status int is the first member of the
+ * struct
+ */
+ status = (int *) dp->data;
+ token = rtas_token(rtas_call_name);
+ if (token == RTAS_UNKNOWN_SERVICE)
+ *status = FLASH_AUTH;
+ else
+ *status = FLASH_NO_OP;
+
+ return 0;
+}
+
+static struct proc_dir_entry *create_flash_pde(const char *filename,
+ const struct file_operations *fops)
+{
+ return proc_create(filename, S_IRUSR | S_IWUSR, NULL, fops);
+}
+
+static const struct file_operations rtas_flash_operations = {
+ .owner = THIS_MODULE,
+ .read = rtas_flash_read,
+ .write = rtas_flash_write,
+ .open = rtas_excl_open,
+ .release = rtas_flash_release,
+ .llseek = default_llseek,
+};
+
+static const struct file_operations manage_flash_operations = {
+ .owner = THIS_MODULE,
+ .read = manage_flash_read,
+ .write = manage_flash_write,
+ .open = rtas_excl_open,
+ .release = rtas_excl_release,
+ .llseek = default_llseek,
+};
+
+static const struct file_operations validate_flash_operations = {
+ .owner = THIS_MODULE,
+ .read = validate_flash_read,
+ .write = validate_flash_write,
+ .open = rtas_excl_open,
+ .release = validate_flash_release,
+ .llseek = default_llseek,
+};
+
+static int __init rtas_flash_init(void)
+{
+ int rc;
+
+ if (rtas_token("ibm,update-flash-64-and-reboot") ==
+ RTAS_UNKNOWN_SERVICE) {
+ printk(KERN_ERR "rtas_flash: no firmware flash support\n");
+ return 1;
+ }
+
+ firmware_flash_pde = create_flash_pde("powerpc/rtas/"
+ FIRMWARE_FLASH_NAME,
+ &rtas_flash_operations);
+ if (firmware_flash_pde == NULL) {
+ rc = -ENOMEM;
+ goto cleanup;
+ }
+
+ rc = initialize_flash_pde_data("ibm,update-flash-64-and-reboot",
+ sizeof(struct rtas_update_flash_t),
+ firmware_flash_pde);
+ if (rc != 0)
+ goto cleanup;
+
+ firmware_update_pde = create_flash_pde("powerpc/rtas/"
+ FIRMWARE_UPDATE_NAME,
+ &rtas_flash_operations);
+ if (firmware_update_pde == NULL) {
+ rc = -ENOMEM;
+ goto cleanup;
+ }
+
+ rc = initialize_flash_pde_data("ibm,update-flash-64-and-reboot",
+ sizeof(struct rtas_update_flash_t),
+ firmware_update_pde);
+ if (rc != 0)
+ goto cleanup;
+
+ validate_pde = create_flash_pde("powerpc/rtas/" VALIDATE_FLASH_NAME,
+ &validate_flash_operations);
+ if (validate_pde == NULL) {
+ rc = -ENOMEM;
+ goto cleanup;
+ }
+
+ rc = initialize_flash_pde_data("ibm,validate-flash-image",
+ sizeof(struct rtas_validate_flash_t),
+ validate_pde);
+ if (rc != 0)
+ goto cleanup;
+
+ manage_pde = create_flash_pde("powerpc/rtas/" MANAGE_FLASH_NAME,
+ &manage_flash_operations);
+ if (manage_pde == NULL) {
+ rc = -ENOMEM;
+ goto cleanup;
+ }
+
+ rc = initialize_flash_pde_data("ibm,manage-flash-image",
+ sizeof(struct rtas_manage_flash_t),
+ manage_pde);
+ if (rc != 0)
+ goto cleanup;
+
+ rtas_flash_term_hook = rtas_flash_firmware;
+
+ flash_block_cache = kmem_cache_create("rtas_flash_cache",
+ RTAS_BLK_SIZE, RTAS_BLK_SIZE, 0,
+ rtas_block_ctor);
+ if (!flash_block_cache) {
+ printk(KERN_ERR "%s: failed to create block cache\n",
+ __func__);
+ rc = -ENOMEM;
+ goto cleanup;
+ }
+ return 0;
+
+cleanup:
+ remove_flash_pde(firmware_flash_pde);
+ remove_flash_pde(firmware_update_pde);
+ remove_flash_pde(validate_pde);
+ remove_flash_pde(manage_pde);
+
+ return rc;
+}
+
+static void __exit rtas_flash_cleanup(void)
+{
+ rtas_flash_term_hook = NULL;
+
+ if (flash_block_cache)
+ kmem_cache_destroy(flash_block_cache);
+
+ remove_flash_pde(firmware_flash_pde);
+ remove_flash_pde(firmware_update_pde);
+ remove_flash_pde(validate_pde);
+ remove_flash_pde(manage_pde);
+}
+
+module_init(rtas_flash_init);
+module_exit(rtas_flash_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
new file mode 100644
index 00000000..179af906
--- /dev/null
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -0,0 +1,304 @@
+/*
+ * Copyright (C) 2001 Dave Engebretsen, IBM Corporation
+ * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * RTAS specific routines for PCI.
+ *
+ * Based on code from pci.c, chrp_pci.c and pSeries_pci.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/threads.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/irq.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/iommu.h>
+#include <asm/rtas.h>
+#include <asm/mpic.h>
+#include <asm/ppc-pci.h>
+#include <asm/eeh.h>
+
+/* RTAS tokens */
+static int read_pci_config;
+static int write_pci_config;
+static int ibm_read_pci_config;
+static int ibm_write_pci_config;
+
+static inline int config_access_valid(struct pci_dn *dn, int where)
+{
+ if (where < 256)
+ return 1;
+ if (where < 4096 && dn->pci_ext_config_space)
+ return 1;
+
+ return 0;
+}
+
+int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
+{
+ int returnval = -1;
+ unsigned long buid, addr;
+ int ret;
+
+ if (!pdn)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ if (!config_access_valid(pdn, where))
+ return PCIBIOS_BAD_REGISTER_NUMBER;
+
+ addr = rtas_config_addr(pdn->busno, pdn->devfn, where);
+ buid = pdn->phb->buid;
+ if (buid) {
+ ret = rtas_call(ibm_read_pci_config, 4, 2, &returnval,
+ addr, BUID_HI(buid), BUID_LO(buid), size);
+ } else {
+ ret = rtas_call(read_pci_config, 2, 2, &returnval, addr, size);
+ }
+ *val = returnval;
+
+ if (ret)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ if (returnval == EEH_IO_ERROR_VALUE(size) &&
+ eeh_dn_check_failure (pdn->node, NULL))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int rtas_pci_read_config(struct pci_bus *bus,
+ unsigned int devfn,
+ int where, int size, u32 *val)
+{
+ struct device_node *busdn, *dn;
+
+ busdn = pci_bus_to_OF_node(bus);
+
+ /* Search only direct children of the bus */
+ for (dn = busdn->child; dn; dn = dn->sibling) {
+ struct pci_dn *pdn = PCI_DN(dn);
+ if (pdn && pdn->devfn == devfn
+ && of_device_is_available(dn))
+ return rtas_read_config(pdn, where, size, val);
+ }
+
+ return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val)
+{
+ unsigned long buid, addr;
+ int ret;
+
+ if (!pdn)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ if (!config_access_valid(pdn, where))
+ return PCIBIOS_BAD_REGISTER_NUMBER;
+
+ addr = rtas_config_addr(pdn->busno, pdn->devfn, where);
+ buid = pdn->phb->buid;
+ if (buid) {
+ ret = rtas_call(ibm_write_pci_config, 5, 1, NULL, addr,
+ BUID_HI(buid), BUID_LO(buid), size, (ulong) val);
+ } else {
+ ret = rtas_call(write_pci_config, 3, 1, NULL, addr, size, (ulong)val);
+ }
+
+ if (ret)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int rtas_pci_write_config(struct pci_bus *bus,
+ unsigned int devfn,
+ int where, int size, u32 val)
+{
+ struct device_node *busdn, *dn;
+
+ busdn = pci_bus_to_OF_node(bus);
+
+ /* Search only direct children of the bus */
+ for (dn = busdn->child; dn; dn = dn->sibling) {
+ struct pci_dn *pdn = PCI_DN(dn);
+ if (pdn && pdn->devfn == devfn
+ && of_device_is_available(dn))
+ return rtas_write_config(pdn, where, size, val);
+ }
+ return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+static struct pci_ops rtas_pci_ops = {
+ .read = rtas_pci_read_config,
+ .write = rtas_pci_write_config,
+};
+
+static int is_python(struct device_node *dev)
+{
+ const char *model = of_get_property(dev, "model", NULL);
+
+ if (model && strstr(model, "Python"))
+ return 1;
+
+ return 0;
+}
+
+static void python_countermeasures(struct device_node *dev)
+{
+ struct resource registers;
+ void __iomem *chip_regs;
+ volatile u32 val;
+
+ if (of_address_to_resource(dev, 0, &registers)) {
+ printk(KERN_ERR "Can't get address for Python workarounds !\n");
+ return;
+ }
+
+ /* Python's register file is 1 MB in size. */
+ chip_regs = ioremap(registers.start & ~(0xfffffUL), 0x100000);
+
+ /*
+ * Firmware doesn't always clear this bit which is critical
+ * for good performance - Anton
+ */
+
+#define PRG_CL_RESET_VALID 0x00010000
+
+ val = in_be32(chip_regs + 0xf6030);
+ if (val & PRG_CL_RESET_VALID) {
+ printk(KERN_INFO "Python workaround: ");
+ val &= ~PRG_CL_RESET_VALID;
+ out_be32(chip_regs + 0xf6030, val);
+ /*
+ * We must read it back for changes to
+ * take effect
+ */
+ val = in_be32(chip_regs + 0xf6030);
+ printk("reg0: %x\n", val);
+ }
+
+ iounmap(chip_regs);
+}
+
+void __init init_pci_config_tokens (void)
+{
+ read_pci_config = rtas_token("read-pci-config");
+ write_pci_config = rtas_token("write-pci-config");
+ ibm_read_pci_config = rtas_token("ibm,read-pci-config");
+ ibm_write_pci_config = rtas_token("ibm,write-pci-config");
+}
+
+unsigned long __devinit get_phb_buid (struct device_node *phb)
+{
+ struct resource r;
+
+ if (ibm_read_pci_config == -1)
+ return 0;
+ if (of_address_to_resource(phb, 0, &r))
+ return 0;
+ return r.start;
+}
+
+static int phb_set_bus_ranges(struct device_node *dev,
+ struct pci_controller *phb)
+{
+ const int *bus_range;
+ unsigned int len;
+
+ bus_range = of_get_property(dev, "bus-range", &len);
+ if (bus_range == NULL || len < 2 * sizeof(int)) {
+ return 1;
+ }
+
+ phb->first_busno = bus_range[0];
+ phb->last_busno = bus_range[1];
+
+ return 0;
+}
+
+int __devinit rtas_setup_phb(struct pci_controller *phb)
+{
+ struct device_node *dev = phb->dn;
+
+ if (is_python(dev))
+ python_countermeasures(dev);
+
+ if (phb_set_bus_ranges(dev, phb))
+ return 1;
+
+ phb->ops = &rtas_pci_ops;
+ phb->buid = get_phb_buid(dev);
+
+ return 0;
+}
+
+void __init find_and_init_phbs(void)
+{
+ struct device_node *node;
+ struct pci_controller *phb;
+ struct device_node *root = of_find_node_by_path("/");
+
+ for_each_child_of_node(root, node) {
+ if (node->type == NULL || (strcmp(node->type, "pci") != 0 &&
+ strcmp(node->type, "pciex") != 0))
+ continue;
+
+ phb = pcibios_alloc_controller(node);
+ if (!phb)
+ continue;
+ rtas_setup_phb(phb);
+ pci_process_bridge_OF_ranges(phb, node, 0);
+ isa_bridge_find_early(phb);
+ }
+
+ of_node_put(root);
+ pci_devs_phb_init();
+
+ /* Create EEH devices for all PHBs */
+ eeh_dev_phb_init();
+
+ /*
+ * PCI_PROBE_ONLY and PCI_REASSIGN_ALL_BUS can be set via properties
+ * in chosen.
+ */
+ if (of_chosen) {
+ const int *prop;
+
+ prop = of_get_property(of_chosen,
+ "linux,pci-probe-only", NULL);
+ if (prop) {
+ if (*prop)
+ pci_add_flags(PCI_PROBE_ONLY);
+ else
+ pci_clear_flags(PCI_PROBE_ONLY);
+ }
+
+#ifdef CONFIG_PPC32 /* Will be made generic soon */
+ prop = of_get_property(of_chosen,
+ "linux,pci-assign-all-buses", NULL);
+ if (prop && *prop)
+ pci_add_flags(PCI_REASSIGN_ALL_BUS);
+#endif /* CONFIG_PPC32 */
+ }
+}
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
new file mode 100644
index 00000000..1045ff49
--- /dev/null
+++ b/arch/powerpc/kernel/rtasd.c
@@ -0,0 +1,555 @@
+/*
+ * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Communication to userspace based on kernel/printk.c
+ */
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/poll.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/vmalloc.h>
+#include <linux/spinlock.h>
+#include <linux/cpu.h>
+#include <linux/workqueue.h>
+#include <linux/slab.h>
+
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/rtas.h>
+#include <asm/prom.h>
+#include <asm/nvram.h>
+#include <linux/atomic.h>
+#include <asm/machdep.h>
+
+
+static DEFINE_SPINLOCK(rtasd_log_lock);
+
+static DECLARE_WAIT_QUEUE_HEAD(rtas_log_wait);
+
+static char *rtas_log_buf;
+static unsigned long rtas_log_start;
+static unsigned long rtas_log_size;
+
+static int surveillance_timeout = -1;
+
+static unsigned int rtas_error_log_max;
+static unsigned int rtas_error_log_buffer_max;
+
+/* RTAS service tokens */
+static unsigned int event_scan;
+static unsigned int rtas_event_scan_rate;
+
+static int full_rtas_msgs = 0;
+
+/* Stop logging to nvram after first fatal error */
+static int logging_enabled; /* Until we initialize everything,
+ * make sure we don't try logging
+ * anything */
+static int error_log_cnt;
+
+/*
+ * Since we use 32 bit RTAS, the physical address of this must be below
+ * 4G or else bad things happen. Allocate this in the kernel data and
+ * make it big enough.
+ */
+static unsigned char logdata[RTAS_ERROR_LOG_MAX];
+
+static char *rtas_type[] = {
+ "Unknown", "Retry", "TCE Error", "Internal Device Failure",
+ "Timeout", "Data Parity", "Address Parity", "Cache Parity",
+ "Address Invalid", "ECC Uncorrected", "ECC Corrupted",
+};
+
+static char *rtas_event_type(int type)
+{
+ if ((type > 0) && (type < 11))
+ return rtas_type[type];
+
+ switch (type) {
+ case RTAS_TYPE_EPOW:
+ return "EPOW";
+ case RTAS_TYPE_PLATFORM:
+ return "Platform Error";
+ case RTAS_TYPE_IO:
+ return "I/O Event";
+ case RTAS_TYPE_INFO:
+ return "Platform Information Event";
+ case RTAS_TYPE_DEALLOC:
+ return "Resource Deallocation Event";
+ case RTAS_TYPE_DUMP:
+ return "Dump Notification Event";
+ }
+
+ return rtas_type[0];
+}
+
+/* To see this info, grep RTAS /var/log/messages and each entry
+ * will be collected together with obvious begin/end.
+ * There will be a unique identifier on the begin and end lines.
+ * This will persist across reboots.
+ *
+ * format of error logs returned from RTAS:
+ * bytes (size) : contents
+ * --------------------------------------------------------
+ * 0-7 (8) : rtas_error_log
+ * 8-47 (40) : extended info
+ * 48-51 (4) : vendor id
+ * 52-1023 (vendor specific) : location code and debug data
+ */
+static void printk_log_rtas(char *buf, int len)
+{
+
+ int i,j,n = 0;
+ int perline = 16;
+ char buffer[64];
+ char * str = "RTAS event";
+
+ if (full_rtas_msgs) {
+ printk(RTAS_DEBUG "%d -------- %s begin --------\n",
+ error_log_cnt, str);
+
+ /*
+ * Print perline bytes on each line, each line will start
+ * with RTAS and a changing number, so syslogd will
+ * print lines that are otherwise the same. Separate every
+ * 4 bytes with a space.
+ */
+ for (i = 0; i < len; i++) {
+ j = i % perline;
+ if (j == 0) {
+ memset(buffer, 0, sizeof(buffer));
+ n = sprintf(buffer, "RTAS %d:", i/perline);
+ }
+
+ if ((i % 4) == 0)
+ n += sprintf(buffer+n, " ");
+
+ n += sprintf(buffer+n, "%02x", (unsigned char)buf[i]);
+
+ if (j == (perline-1))
+ printk(KERN_DEBUG "%s\n", buffer);
+ }
+ if ((i % perline) != 0)
+ printk(KERN_DEBUG "%s\n", buffer);
+
+ printk(RTAS_DEBUG "%d -------- %s end ----------\n",
+ error_log_cnt, str);
+ } else {
+ struct rtas_error_log *errlog = (struct rtas_error_log *)buf;
+
+ printk(RTAS_DEBUG "event: %d, Type: %s, Severity: %d\n",
+ error_log_cnt, rtas_event_type(errlog->type),
+ errlog->severity);
+ }
+}
+
+static int log_rtas_len(char * buf)
+{
+ int len;
+ struct rtas_error_log *err;
+
+ /* rtas fixed header */
+ len = 8;
+ err = (struct rtas_error_log *)buf;
+ if (err->extended && err->extended_log_length) {
+
+ /* extended header */
+ len += err->extended_log_length;
+ }
+
+ if (rtas_error_log_max == 0)
+ rtas_error_log_max = rtas_get_error_log_max();
+
+ if (len > rtas_error_log_max)
+ len = rtas_error_log_max;
+
+ return len;
+}
+
+/*
+ * First write to nvram, if fatal error, that is the only
+ * place we log the info. The error will be picked up
+ * on the next reboot by rtasd. If not fatal, run the
+ * method for the type of error. Currently, only RTAS
+ * errors have methods implemented, but in the future
+ * there might be a need to store data in nvram before a
+ * call to panic().
+ *
+ * XXX We write to nvram periodically, to indicate error has
+ * been written and sync'd, but there is a possibility
+ * that if we don't shutdown correctly, a duplicate error
+ * record will be created on next reboot.
+ */
+void pSeries_log_error(char *buf, unsigned int err_type, int fatal)
+{
+ unsigned long offset;
+ unsigned long s;
+ int len = 0;
+
+ pr_debug("rtasd: logging event\n");
+ if (buf == NULL)
+ return;
+
+ spin_lock_irqsave(&rtasd_log_lock, s);
+
+ /* get length and increase count */
+ switch (err_type & ERR_TYPE_MASK) {
+ case ERR_TYPE_RTAS_LOG:
+ len = log_rtas_len(buf);
+ if (!(err_type & ERR_FLAG_BOOT))
+ error_log_cnt++;
+ break;
+ case ERR_TYPE_KERNEL_PANIC:
+ default:
+ WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
+ spin_unlock_irqrestore(&rtasd_log_lock, s);
+ return;
+ }
+
+#ifdef CONFIG_PPC64
+ /* Write error to NVRAM */
+ if (logging_enabled && !(err_type & ERR_FLAG_BOOT))
+ nvram_write_error_log(buf, len, err_type, error_log_cnt);
+#endif /* CONFIG_PPC64 */
+
+ /*
+ * rtas errors can occur during boot, and we do want to capture
+ * those somewhere, even if nvram isn't ready (why not?), and even
+ * if rtasd isn't ready. Put them into the boot log, at least.
+ */
+ if ((err_type & ERR_TYPE_MASK) == ERR_TYPE_RTAS_LOG)
+ printk_log_rtas(buf, len);
+
+ /* Check to see if we need to or have stopped logging */
+ if (fatal || !logging_enabled) {
+ logging_enabled = 0;
+ WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
+ spin_unlock_irqrestore(&rtasd_log_lock, s);
+ return;
+ }
+
+ /* call type specific method for error */
+ switch (err_type & ERR_TYPE_MASK) {
+ case ERR_TYPE_RTAS_LOG:
+ offset = rtas_error_log_buffer_max *
+ ((rtas_log_start+rtas_log_size) & LOG_NUMBER_MASK);
+
+ /* First copy over sequence number */
+ memcpy(&rtas_log_buf[offset], (void *) &error_log_cnt, sizeof(int));
+
+ /* Second copy over error log data */
+ offset += sizeof(int);
+ memcpy(&rtas_log_buf[offset], buf, len);
+
+ if (rtas_log_size < LOG_NUMBER)
+ rtas_log_size += 1;
+ else
+ rtas_log_start += 1;
+
+ WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
+ spin_unlock_irqrestore(&rtasd_log_lock, s);
+ wake_up_interruptible(&rtas_log_wait);
+ break;
+ case ERR_TYPE_KERNEL_PANIC:
+ default:
+ WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
+ spin_unlock_irqrestore(&rtasd_log_lock, s);
+ return;
+ }
+
+}
+
+static int rtas_log_open(struct inode * inode, struct file * file)
+{
+ return 0;
+}
+
+static int rtas_log_release(struct inode * inode, struct file * file)
+{
+ return 0;
+}
+
+/* This will check if all events are logged, if they are then, we
+ * know that we can safely clear the events in NVRAM.
+ * Next we'll sit and wait for something else to log.
+ */
+static ssize_t rtas_log_read(struct file * file, char __user * buf,
+ size_t count, loff_t *ppos)
+{
+ int error;
+ char *tmp;
+ unsigned long s;
+ unsigned long offset;
+
+ if (!buf || count < rtas_error_log_buffer_max)
+ return -EINVAL;
+
+ count = rtas_error_log_buffer_max;
+
+ if (!access_ok(VERIFY_WRITE, buf, count))
+ return -EFAULT;
+
+ tmp = kmalloc(count, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ spin_lock_irqsave(&rtasd_log_lock, s);
+
+ /* if it's 0, then we know we got the last one (the one in NVRAM) */
+ while (rtas_log_size == 0) {
+ if (file->f_flags & O_NONBLOCK) {
+ spin_unlock_irqrestore(&rtasd_log_lock, s);
+ error = -EAGAIN;
+ goto out;
+ }
+
+ if (!logging_enabled) {
+ spin_unlock_irqrestore(&rtasd_log_lock, s);
+ error = -ENODATA;
+ goto out;
+ }
+#ifdef CONFIG_PPC64
+ nvram_clear_error_log();
+#endif /* CONFIG_PPC64 */
+
+ spin_unlock_irqrestore(&rtasd_log_lock, s);
+ error = wait_event_interruptible(rtas_log_wait, rtas_log_size);
+ if (error)
+ goto out;
+ spin_lock_irqsave(&rtasd_log_lock, s);
+ }
+
+ offset = rtas_error_log_buffer_max * (rtas_log_start & LOG_NUMBER_MASK);
+ memcpy(tmp, &rtas_log_buf[offset], count);
+
+ rtas_log_start += 1;
+ rtas_log_size -= 1;
+ spin_unlock_irqrestore(&rtasd_log_lock, s);
+
+ error = copy_to_user(buf, tmp, count) ? -EFAULT : count;
+out:
+ kfree(tmp);
+ return error;
+}
+
+static unsigned int rtas_log_poll(struct file *file, poll_table * wait)
+{
+ poll_wait(file, &rtas_log_wait, wait);
+ if (rtas_log_size)
+ return POLLIN | POLLRDNORM;
+ return 0;
+}
+
+static const struct file_operations proc_rtas_log_operations = {
+ .read = rtas_log_read,
+ .poll = rtas_log_poll,
+ .open = rtas_log_open,
+ .release = rtas_log_release,
+ .llseek = noop_llseek,
+};
+
+static int enable_surveillance(int timeout)
+{
+ int error;
+
+ error = rtas_set_indicator(SURVEILLANCE_TOKEN, 0, timeout);
+
+ if (error == 0)
+ return 0;
+
+ if (error == -EINVAL) {
+ printk(KERN_DEBUG "rtasd: surveillance not supported\n");
+ return 0;
+ }
+
+ printk(KERN_ERR "rtasd: could not update surveillance\n");
+ return -1;
+}
+
+static void do_event_scan(void)
+{
+ int error;
+ do {
+ memset(logdata, 0, rtas_error_log_max);
+ error = rtas_call(event_scan, 4, 1, NULL,
+ RTAS_EVENT_SCAN_ALL_EVENTS, 0,
+ __pa(logdata), rtas_error_log_max);
+ if (error == -1) {
+ printk(KERN_ERR "event-scan failed\n");
+ break;
+ }
+
+ if (error == 0)
+ pSeries_log_error(logdata, ERR_TYPE_RTAS_LOG, 0);
+
+ } while(error == 0);
+}
+
+static void rtas_event_scan(struct work_struct *w);
+DECLARE_DELAYED_WORK(event_scan_work, rtas_event_scan);
+
+/*
+ * Delay should be at least one second since some machines have problems if
+ * we call event-scan too quickly.
+ */
+static unsigned long event_scan_delay = 1*HZ;
+static int first_pass = 1;
+
+static void rtas_event_scan(struct work_struct *w)
+{
+ unsigned int cpu;
+
+ do_event_scan();
+
+ get_online_cpus();
+
+ /* raw_ OK because just using CPU as starting point. */
+ cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
+ if (cpu >= nr_cpu_ids) {
+ cpu = cpumask_first(cpu_online_mask);
+
+ if (first_pass) {
+ first_pass = 0;
+ event_scan_delay = 30*HZ/rtas_event_scan_rate;
+
+ if (surveillance_timeout != -1) {
+ pr_debug("rtasd: enabling surveillance\n");
+ enable_surveillance(surveillance_timeout);
+ pr_debug("rtasd: surveillance enabled\n");
+ }
+ }
+ }
+
+ schedule_delayed_work_on(cpu, &event_scan_work,
+ __round_jiffies_relative(event_scan_delay, cpu));
+
+ put_online_cpus();
+}
+
+#ifdef CONFIG_PPC64
+static void retreive_nvram_error_log(void)
+{
+ unsigned int err_type ;
+ int rc ;
+
+ /* See if we have any error stored in NVRAM */
+ memset(logdata, 0, rtas_error_log_max);
+ rc = nvram_read_error_log(logdata, rtas_error_log_max,
+ &err_type, &error_log_cnt);
+ /* We can use rtas_log_buf now */
+ logging_enabled = 1;
+ if (!rc) {
+ if (err_type != ERR_FLAG_ALREADY_LOGGED) {
+ pSeries_log_error(logdata, err_type | ERR_FLAG_BOOT, 0);
+ }
+ }
+}
+#else /* CONFIG_PPC64 */
+static void retreive_nvram_error_log(void)
+{
+}
+#endif /* CONFIG_PPC64 */
+
+static void start_event_scan(void)
+{
+ printk(KERN_DEBUG "RTAS daemon started\n");
+ pr_debug("rtasd: will sleep for %d milliseconds\n",
+ (30000 / rtas_event_scan_rate));
+
+ /* Retrieve errors from nvram if any */
+ retreive_nvram_error_log();
+
+ schedule_delayed_work_on(cpumask_first(cpu_online_mask),
+ &event_scan_work, event_scan_delay);
+}
+
+/* Cancel the rtas event scan work */
+void rtas_cancel_event_scan(void)
+{
+ cancel_delayed_work_sync(&event_scan_work);
+}
+EXPORT_SYMBOL_GPL(rtas_cancel_event_scan);
+
+static int __init rtas_init(void)
+{
+ struct proc_dir_entry *entry;
+
+ if (!machine_is(pseries) && !machine_is(chrp))
+ return 0;
+
+ /* No RTAS */
+ event_scan = rtas_token("event-scan");
+ if (event_scan == RTAS_UNKNOWN_SERVICE) {
+ printk(KERN_INFO "rtasd: No event-scan on system\n");
+ return -ENODEV;
+ }
+
+ rtas_event_scan_rate = rtas_token("rtas-event-scan-rate");
+ if (rtas_event_scan_rate == RTAS_UNKNOWN_SERVICE) {
+ printk(KERN_ERR "rtasd: no rtas-event-scan-rate on system\n");
+ return -ENODEV;
+ }
+
+ if (!rtas_event_scan_rate) {
+ /* Broken firmware: take a rate of zero to mean don't scan */
+ printk(KERN_DEBUG "rtasd: scan rate is 0, not scanning\n");
+ return 0;
+ }
+
+ /* Make room for the sequence number */
+ rtas_error_log_max = rtas_get_error_log_max();
+ rtas_error_log_buffer_max = rtas_error_log_max + sizeof(int);
+
+ rtas_log_buf = vmalloc(rtas_error_log_buffer_max*LOG_NUMBER);
+ if (!rtas_log_buf) {
+ printk(KERN_ERR "rtasd: no memory\n");
+ return -ENOMEM;
+ }
+
+ entry = proc_create("powerpc/rtas/error_log", S_IRUSR, NULL,
+ &proc_rtas_log_operations);
+ if (!entry)
+ printk(KERN_ERR "Failed to create error_log proc entry\n");
+
+ start_event_scan();
+
+ return 0;
+}
+__initcall(rtas_init);
+
+static int __init surveillance_setup(char *str)
+{
+ int i;
+
+ /* We only do surveillance on pseries */
+ if (!machine_is(pseries))
+ return 0;
+
+ if (get_option(&str,&i)) {
+ if (i >= 0 && i <= 255)
+ surveillance_timeout = i;
+ }
+
+ return 1;
+}
+__setup("surveillance=", surveillance_setup);
+
+static int __init rtasmsgs_setup(char *str)
+{
+ if (strcmp(str, "on") == 0)
+ full_rtas_msgs = 1;
+ else if (strcmp(str, "off") == 0)
+ full_rtas_msgs = 0;
+
+ return 1;
+}
+__setup("rtasmsgs=", rtasmsgs_setup);
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
new file mode 100644
index 00000000..afd4f051
--- /dev/null
+++ b/arch/powerpc/kernel/setup-common.c
@@ -0,0 +1,733 @@
+/*
+ * Common boot and setup code for both 32-bit and 64-bit.
+ * Extracted from arch/powerpc/kernel/setup_64.c.
+ *
+ * Copyright (C) 2001 PPC64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+
+#include <linux/export.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/delay.h>
+#include <linux/initrd.h>
+#include <linux/platform_device.h>
+#include <linux/seq_file.h>
+#include <linux/ioport.h>
+#include <linux/console.h>
+#include <linux/screen_info.h>
+#include <linux/root_dev.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/unistd.h>
+#include <linux/serial.h>
+#include <linux/serial_8250.h>
+#include <linux/debugfs.h>
+#include <linux/percpu.h>
+#include <linux/memblock.h>
+#include <linux/of_platform.h>
+#include <asm/io.h>
+#include <asm/paca.h>
+#include <asm/prom.h>
+#include <asm/processor.h>
+#include <asm/vdso_datapage.h>
+#include <asm/pgtable.h>
+#include <asm/smp.h>
+#include <asm/elf.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/cputable.h>
+#include <asm/sections.h>
+#include <asm/firmware.h>
+#include <asm/btext.h>
+#include <asm/nvram.h>
+#include <asm/setup.h>
+#include <asm/rtas.h>
+#include <asm/iommu.h>
+#include <asm/serial.h>
+#include <asm/cache.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/xmon.h>
+#include <asm/cputhreads.h>
+#include <mm/mmu_decl.h>
+#include <asm/fadump.h>
+
+#include "setup.h"
+
+#ifdef DEBUG
+#include <asm/udbg.h>
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+/* The main machine-dep calls structure
+ */
+struct machdep_calls ppc_md;
+EXPORT_SYMBOL(ppc_md);
+struct machdep_calls *machine_id;
+EXPORT_SYMBOL(machine_id);
+
+unsigned long klimit = (unsigned long) _end;
+
+char cmd_line[COMMAND_LINE_SIZE];
+
+/*
+ * This still seems to be needed... -- paulus
+ */
+struct screen_info screen_info = {
+ .orig_x = 0,
+ .orig_y = 25,
+ .orig_video_cols = 80,
+ .orig_video_lines = 25,
+ .orig_video_isVGA = 1,
+ .orig_video_points = 16
+};
+
+/* Variables required to store legacy IO irq routing */
+int of_i8042_kbd_irq;
+EXPORT_SYMBOL_GPL(of_i8042_kbd_irq);
+int of_i8042_aux_irq;
+EXPORT_SYMBOL_GPL(of_i8042_aux_irq);
+
+#ifdef __DO_IRQ_CANON
+/* XXX should go elsewhere eventually */
+int ppc_do_canonicalize_irqs;
+EXPORT_SYMBOL(ppc_do_canonicalize_irqs);
+#endif
+
+/* also used by kexec */
+void machine_shutdown(void)
+{
+#ifdef CONFIG_FA_DUMP
+ /*
+ * if fadump is active, cleanup the fadump registration before we
+ * shutdown.
+ */
+ fadump_cleanup();
+#endif
+
+ if (ppc_md.machine_shutdown)
+ ppc_md.machine_shutdown();
+}
+
+void machine_restart(char *cmd)
+{
+ machine_shutdown();
+ if (ppc_md.restart)
+ ppc_md.restart(cmd);
+#ifdef CONFIG_SMP
+ smp_send_stop();
+#endif
+ printk(KERN_EMERG "System Halted, OK to turn off power\n");
+ local_irq_disable();
+ while (1) ;
+}
+
+void machine_power_off(void)
+{
+ machine_shutdown();
+ if (ppc_md.power_off)
+ ppc_md.power_off();
+#ifdef CONFIG_SMP
+ smp_send_stop();
+#endif
+ printk(KERN_EMERG "System Halted, OK to turn off power\n");
+ local_irq_disable();
+ while (1) ;
+}
+/* Used by the G5 thermal driver */
+EXPORT_SYMBOL_GPL(machine_power_off);
+
+void (*pm_power_off)(void) = machine_power_off;
+EXPORT_SYMBOL_GPL(pm_power_off);
+
+void machine_halt(void)
+{
+ machine_shutdown();
+ if (ppc_md.halt)
+ ppc_md.halt();
+#ifdef CONFIG_SMP
+ smp_send_stop();
+#endif
+ printk(KERN_EMERG "System Halted, OK to turn off power\n");
+ local_irq_disable();
+ while (1) ;
+}
+
+
+#ifdef CONFIG_TAU
+extern u32 cpu_temp(unsigned long cpu);
+extern u32 cpu_temp_both(unsigned long cpu);
+#endif /* CONFIG_TAU */
+
+#ifdef CONFIG_SMP
+DEFINE_PER_CPU(unsigned int, cpu_pvr);
+#endif
+
+static void show_cpuinfo_summary(struct seq_file *m)
+{
+ struct device_node *root;
+ const char *model = NULL;
+#if defined(CONFIG_SMP) && defined(CONFIG_PPC32)
+ unsigned long bogosum = 0;
+ int i;
+ for_each_online_cpu(i)
+ bogosum += loops_per_jiffy;
+ seq_printf(m, "total bogomips\t: %lu.%02lu\n",
+ bogosum/(500000/HZ), bogosum/(5000/HZ) % 100);
+#endif /* CONFIG_SMP && CONFIG_PPC32 */
+ seq_printf(m, "timebase\t: %lu\n", ppc_tb_freq);
+ if (ppc_md.name)
+ seq_printf(m, "platform\t: %s\n", ppc_md.name);
+ root = of_find_node_by_path("/");
+ if (root)
+ model = of_get_property(root, "model", NULL);
+ if (model)
+ seq_printf(m, "model\t\t: %s\n", model);
+ of_node_put(root);
+
+ if (ppc_md.show_cpuinfo != NULL)
+ ppc_md.show_cpuinfo(m);
+
+#ifdef CONFIG_PPC32
+ /* Display the amount of memory */
+ seq_printf(m, "Memory\t\t: %d MB\n",
+ (unsigned int)(total_memory / (1024 * 1024)));
+#endif
+}
+
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+ unsigned long cpu_id = (unsigned long)v - 1;
+ unsigned int pvr;
+ unsigned short maj;
+ unsigned short min;
+
+ /* We only show online cpus: disable preempt (overzealous, I
+ * knew) to prevent cpu going down. */
+ preempt_disable();
+ if (!cpu_online(cpu_id)) {
+ preempt_enable();
+ return 0;
+ }
+
+#ifdef CONFIG_SMP
+ pvr = per_cpu(cpu_pvr, cpu_id);
+#else
+ pvr = mfspr(SPRN_PVR);
+#endif
+ maj = (pvr >> 8) & 0xFF;
+ min = pvr & 0xFF;
+
+ seq_printf(m, "processor\t: %lu\n", cpu_id);
+ seq_printf(m, "cpu\t\t: ");
+
+ if (cur_cpu_spec->pvr_mask)
+ seq_printf(m, "%s", cur_cpu_spec->cpu_name);
+ else
+ seq_printf(m, "unknown (%08x)", pvr);
+
+#ifdef CONFIG_ALTIVEC
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ seq_printf(m, ", altivec supported");
+#endif /* CONFIG_ALTIVEC */
+
+ seq_printf(m, "\n");
+
+#ifdef CONFIG_TAU
+ if (cur_cpu_spec->cpu_features & CPU_FTR_TAU) {
+#ifdef CONFIG_TAU_AVERAGE
+ /* more straightforward, but potentially misleading */
+ seq_printf(m, "temperature \t: %u C (uncalibrated)\n",
+ cpu_temp(cpu_id));
+#else
+ /* show the actual temp sensor range */
+ u32 temp;
+ temp = cpu_temp_both(cpu_id);
+ seq_printf(m, "temperature \t: %u-%u C (uncalibrated)\n",
+ temp & 0xff, temp >> 16);
+#endif
+ }
+#endif /* CONFIG_TAU */
+
+ /*
+ * Assume here that all clock rates are the same in a
+ * smp system. -- Cort
+ */
+ if (ppc_proc_freq)
+ seq_printf(m, "clock\t\t: %lu.%06luMHz\n",
+ ppc_proc_freq / 1000000, ppc_proc_freq % 1000000);
+
+ if (ppc_md.show_percpuinfo != NULL)
+ ppc_md.show_percpuinfo(m, cpu_id);
+
+ /* If we are a Freescale core do a simple check so
+ * we dont have to keep adding cases in the future */
+ if (PVR_VER(pvr) & 0x8000) {
+ switch (PVR_VER(pvr)) {
+ case 0x8000: /* 7441/7450/7451, Voyager */
+ case 0x8001: /* 7445/7455, Apollo 6 */
+ case 0x8002: /* 7447/7457, Apollo 7 */
+ case 0x8003: /* 7447A, Apollo 7 PM */
+ case 0x8004: /* 7448, Apollo 8 */
+ case 0x800c: /* 7410, Nitro */
+ maj = ((pvr >> 8) & 0xF);
+ min = PVR_MIN(pvr);
+ break;
+ default: /* e500/book-e */
+ maj = PVR_MAJ(pvr);
+ min = PVR_MIN(pvr);
+ break;
+ }
+ } else {
+ switch (PVR_VER(pvr)) {
+ case 0x0020: /* 403 family */
+ maj = PVR_MAJ(pvr) + 1;
+ min = PVR_MIN(pvr);
+ break;
+ case 0x1008: /* 740P/750P ?? */
+ maj = ((pvr >> 8) & 0xFF) - 1;
+ min = pvr & 0xFF;
+ break;
+ default:
+ maj = (pvr >> 8) & 0xFF;
+ min = pvr & 0xFF;
+ break;
+ }
+ }
+
+ seq_printf(m, "revision\t: %hd.%hd (pvr %04x %04x)\n",
+ maj, min, PVR_VER(pvr), PVR_REV(pvr));
+
+#ifdef CONFIG_PPC32
+ seq_printf(m, "bogomips\t: %lu.%02lu\n",
+ loops_per_jiffy / (500000/HZ),
+ (loops_per_jiffy / (5000/HZ)) % 100);
+#endif
+
+#ifdef CONFIG_SMP
+ seq_printf(m, "\n");
+#endif
+
+ preempt_enable();
+
+ /* If this is the last cpu, print the summary */
+ if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids)
+ show_cpuinfo_summary(m);
+
+ return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+ if (*pos == 0) /* just in case, cpu 0 is not the first */
+ *pos = cpumask_first(cpu_online_mask);
+ else
+ *pos = cpumask_next(*pos - 1, cpu_online_mask);
+ if ((*pos) < nr_cpu_ids)
+ return (void *)(unsigned long)(*pos + 1);
+ return NULL;
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ (*pos)++;
+ return c_start(m, pos);
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+
+const struct seq_operations cpuinfo_op = {
+ .start =c_start,
+ .next = c_next,
+ .stop = c_stop,
+ .show = show_cpuinfo,
+};
+
+void __init check_for_initrd(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+ DBG(" -> check_for_initrd() initrd_start=0x%lx initrd_end=0x%lx\n",
+ initrd_start, initrd_end);
+
+ /* If we were passed an initrd, set the ROOT_DEV properly if the values
+ * look sensible. If not, clear initrd reference.
+ */
+ if (is_kernel_addr(initrd_start) && is_kernel_addr(initrd_end) &&
+ initrd_end > initrd_start)
+ ROOT_DEV = Root_RAM0;
+ else
+ initrd_start = initrd_end = 0;
+
+ if (initrd_start)
+ printk("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end);
+
+ DBG(" <- check_for_initrd()\n");
+#endif /* CONFIG_BLK_DEV_INITRD */
+}
+
+#ifdef CONFIG_SMP
+
+int threads_per_core, threads_shift;
+cpumask_t threads_core_mask;
+EXPORT_SYMBOL_GPL(threads_per_core);
+EXPORT_SYMBOL_GPL(threads_shift);
+EXPORT_SYMBOL_GPL(threads_core_mask);
+
+static void __init cpu_init_thread_core_maps(int tpc)
+{
+ int i;
+
+ threads_per_core = tpc;
+ cpumask_clear(&threads_core_mask);
+
+ /* This implementation only supports power of 2 number of threads
+ * for simplicity and performance
+ */
+ threads_shift = ilog2(tpc);
+ BUG_ON(tpc != (1 << threads_shift));
+
+ for (i = 0; i < tpc; i++)
+ cpumask_set_cpu(i, &threads_core_mask);
+
+ printk(KERN_INFO "CPU maps initialized for %d thread%s per core\n",
+ tpc, tpc > 1 ? "s" : "");
+ printk(KERN_DEBUG " (thread shift is %d)\n", threads_shift);
+}
+
+
+/**
+ * setup_cpu_maps - initialize the following cpu maps:
+ * cpu_possible_mask
+ * cpu_present_mask
+ *
+ * Having the possible map set up early allows us to restrict allocations
+ * of things like irqstacks to nr_cpu_ids rather than NR_CPUS.
+ *
+ * We do not initialize the online map here; cpus set their own bits in
+ * cpu_online_mask as they come up.
+ *
+ * This function is valid only for Open Firmware systems. finish_device_tree
+ * must be called before using this.
+ *
+ * While we're here, we may as well set the "physical" cpu ids in the paca.
+ *
+ * NOTE: This must match the parsing done in early_init_dt_scan_cpus.
+ */
+void __init smp_setup_cpu_maps(void)
+{
+ struct device_node *dn = NULL;
+ int cpu = 0;
+ int nthreads = 1;
+
+ DBG("smp_setup_cpu_maps()\n");
+
+ while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < nr_cpu_ids) {
+ const int *intserv;
+ int j, len;
+
+ DBG(" * %s...\n", dn->full_name);
+
+ intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s",
+ &len);
+ if (intserv) {
+ nthreads = len / sizeof(int);
+ DBG(" ibm,ppc-interrupt-server#s -> %d threads\n",
+ nthreads);
+ } else {
+ DBG(" no ibm,ppc-interrupt-server#s -> 1 thread\n");
+ intserv = of_get_property(dn, "reg", NULL);
+ if (!intserv)
+ intserv = &cpu; /* assume logical == phys */
+ }
+
+ for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) {
+ DBG(" thread %d -> cpu %d (hard id %d)\n",
+ j, cpu, intserv[j]);
+ set_cpu_present(cpu, true);
+ set_hard_smp_processor_id(cpu, intserv[j]);
+ set_cpu_possible(cpu, true);
+ cpu++;
+ }
+ }
+
+ /* If no SMT supported, nthreads is forced to 1 */
+ if (!cpu_has_feature(CPU_FTR_SMT)) {
+ DBG(" SMT disabled ! nthreads forced to 1\n");
+ nthreads = 1;
+ }
+
+#ifdef CONFIG_PPC64
+ /*
+ * On pSeries LPAR, we need to know how many cpus
+ * could possibly be added to this partition.
+ */
+ if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR) &&
+ (dn = of_find_node_by_path("/rtas"))) {
+ int num_addr_cell, num_size_cell, maxcpus;
+ const unsigned int *ireg;
+
+ num_addr_cell = of_n_addr_cells(dn);
+ num_size_cell = of_n_size_cells(dn);
+
+ ireg = of_get_property(dn, "ibm,lrdr-capacity", NULL);
+
+ if (!ireg)
+ goto out;
+
+ maxcpus = ireg[num_addr_cell + num_size_cell];
+
+ /* Double maxcpus for processors which have SMT capability */
+ if (cpu_has_feature(CPU_FTR_SMT))
+ maxcpus *= nthreads;
+
+ if (maxcpus > nr_cpu_ids) {
+ printk(KERN_WARNING
+ "Partition configured for %d cpus, "
+ "operating system maximum is %d.\n",
+ maxcpus, nr_cpu_ids);
+ maxcpus = nr_cpu_ids;
+ } else
+ printk(KERN_INFO "Partition configured for %d cpus.\n",
+ maxcpus);
+
+ for (cpu = 0; cpu < maxcpus; cpu++)
+ set_cpu_possible(cpu, true);
+ out:
+ of_node_put(dn);
+ }
+ vdso_data->processorCount = num_present_cpus();
+#endif /* CONFIG_PPC64 */
+
+ /* Initialize CPU <=> thread mapping/
+ *
+ * WARNING: We assume that the number of threads is the same for
+ * every CPU in the system. If that is not the case, then some code
+ * here will have to be reworked
+ */
+ cpu_init_thread_core_maps(nthreads);
+
+ /* Now that possible cpus are set, set nr_cpu_ids for later use */
+ setup_nr_cpu_ids();
+
+ free_unused_pacas();
+}
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_PCSPKR_PLATFORM
+static __init int add_pcspkr(void)
+{
+ struct device_node *np;
+ struct platform_device *pd;
+ int ret;
+
+ np = of_find_compatible_node(NULL, NULL, "pnpPNP,100");
+ of_node_put(np);
+ if (!np)
+ return -ENODEV;
+
+ pd = platform_device_alloc("pcspkr", -1);
+ if (!pd)
+ return -ENOMEM;
+
+ ret = platform_device_add(pd);
+ if (ret)
+ platform_device_put(pd);
+
+ return ret;
+}
+device_initcall(add_pcspkr);
+#endif /* CONFIG_PCSPKR_PLATFORM */
+
+void probe_machine(void)
+{
+ extern struct machdep_calls __machine_desc_start;
+ extern struct machdep_calls __machine_desc_end;
+
+ /*
+ * Iterate all ppc_md structures until we find the proper
+ * one for the current machine type
+ */
+ DBG("Probing machine type ...\n");
+
+ for (machine_id = &__machine_desc_start;
+ machine_id < &__machine_desc_end;
+ machine_id++) {
+ DBG(" %s ...", machine_id->name);
+ memcpy(&ppc_md, machine_id, sizeof(struct machdep_calls));
+ if (ppc_md.probe()) {
+ DBG(" match !\n");
+ break;
+ }
+ DBG("\n");
+ }
+ /* What can we do if we didn't find ? */
+ if (machine_id >= &__machine_desc_end) {
+ DBG("No suitable machine found !\n");
+ for (;;);
+ }
+
+ printk(KERN_INFO "Using %s machine description\n", ppc_md.name);
+}
+
+/* Match a class of boards, not a specific device configuration. */
+int check_legacy_ioport(unsigned long base_port)
+{
+ struct device_node *parent, *np = NULL;
+ int ret = -ENODEV;
+
+ switch(base_port) {
+ case I8042_DATA_REG:
+ if (!(np = of_find_compatible_node(NULL, NULL, "pnpPNP,303")))
+ np = of_find_compatible_node(NULL, NULL, "pnpPNP,f03");
+ if (np) {
+ parent = of_get_parent(np);
+
+ of_i8042_kbd_irq = irq_of_parse_and_map(parent, 0);
+ if (!of_i8042_kbd_irq)
+ of_i8042_kbd_irq = 1;
+
+ of_i8042_aux_irq = irq_of_parse_and_map(parent, 1);
+ if (!of_i8042_aux_irq)
+ of_i8042_aux_irq = 12;
+
+ of_node_put(np);
+ np = parent;
+ break;
+ }
+ np = of_find_node_by_type(NULL, "8042");
+ /* Pegasos has no device_type on its 8042 node, look for the
+ * name instead */
+ if (!np)
+ np = of_find_node_by_name(NULL, "8042");
+ if (np) {
+ of_i8042_kbd_irq = 1;
+ of_i8042_aux_irq = 12;
+ }
+ break;
+ case FDC_BASE: /* FDC1 */
+ np = of_find_node_by_type(NULL, "fdc");
+ break;
+#ifdef CONFIG_PPC_PREP
+ case _PIDXR:
+ case _PNPWRP:
+ case PNPBIOS_BASE:
+ /* implement me */
+#endif
+ default:
+ /* ipmi is supposed to fail here */
+ break;
+ }
+ if (!np)
+ return ret;
+ parent = of_get_parent(np);
+ if (parent) {
+ if (strcmp(parent->type, "isa") == 0)
+ ret = 0;
+ of_node_put(parent);
+ }
+ of_node_put(np);
+ return ret;
+}
+EXPORT_SYMBOL(check_legacy_ioport);
+
+static int ppc_panic_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ /*
+ * If firmware-assisted dump has been registered then trigger
+ * firmware-assisted dump and let firmware handle everything else.
+ */
+ crash_fadump(NULL, ptr);
+ ppc_md.panic(ptr); /* May not return */
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ppc_panic_block = {
+ .notifier_call = ppc_panic_event,
+ .priority = INT_MIN /* may not return; must be done last */
+};
+
+void __init setup_panic(void)
+{
+ atomic_notifier_chain_register(&panic_notifier_list, &ppc_panic_block);
+}
+
+#ifdef CONFIG_CHECK_CACHE_COHERENCY
+/*
+ * For platforms that have configurable cache-coherency. This function
+ * checks that the cache coherency setting of the kernel matches the setting
+ * left by the firmware, as indicated in the device tree. Since a mismatch
+ * will eventually result in DMA failures, we print * and error and call
+ * BUG() in that case.
+ */
+
+#ifdef CONFIG_NOT_COHERENT_CACHE
+#define KERNEL_COHERENCY 0
+#else
+#define KERNEL_COHERENCY 1
+#endif
+
+static int __init check_cache_coherency(void)
+{
+ struct device_node *np;
+ const void *prop;
+ int devtree_coherency;
+
+ np = of_find_node_by_path("/");
+ prop = of_get_property(np, "coherency-off", NULL);
+ of_node_put(np);
+
+ devtree_coherency = prop ? 0 : 1;
+
+ if (devtree_coherency != KERNEL_COHERENCY) {
+ printk(KERN_ERR
+ "kernel coherency:%s != device tree_coherency:%s\n",
+ KERNEL_COHERENCY ? "on" : "off",
+ devtree_coherency ? "on" : "off");
+ BUG();
+ }
+
+ return 0;
+}
+
+late_initcall(check_cache_coherency);
+#endif /* CONFIG_CHECK_CACHE_COHERENCY */
+
+#ifdef CONFIG_DEBUG_FS
+struct dentry *powerpc_debugfs_root;
+EXPORT_SYMBOL(powerpc_debugfs_root);
+
+static int powerpc_debugfs_init(void)
+{
+ powerpc_debugfs_root = debugfs_create_dir("powerpc", NULL);
+
+ return powerpc_debugfs_root == NULL;
+}
+arch_initcall(powerpc_debugfs_init);
+#endif
+
+void ppc_printk_progress(char *s, unsigned short hex)
+{
+ pr_info("%s\n", s);
+}
+
+void arch_setup_pdev_archdata(struct platform_device *pdev)
+{
+ pdev->archdata.dma_mask = DMA_BIT_MASK(32);
+ pdev->dev.dma_mask = &pdev->archdata.dma_mask;
+ set_dma_ops(&pdev->dev, &dma_direct_ops);
+}
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
new file mode 100644
index 00000000..4c67ad7f
--- /dev/null
+++ b/arch/powerpc/kernel/setup.h
@@ -0,0 +1,9 @@
+#ifndef _POWERPC_KERNEL_SETUP_H
+#define _POWERPC_KERNEL_SETUP_H
+
+void check_for_initrd(void);
+void do_init_bootmem(void);
+void setup_panic(void);
+extern int do_early_xmon;
+
+#endif /* _POWERPC_KERNEL_SETUP_H */
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
new file mode 100644
index 00000000..ec8a53fa
--- /dev/null
+++ b/arch/powerpc/kernel/setup_32.c
@@ -0,0 +1,354 @@
+/*
+ * Common prep/pmac/chrp boot and setup code.
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/delay.h>
+#include <linux/initrd.h>
+#include <linux/tty.h>
+#include <linux/bootmem.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/cpu.h>
+#include <linux/console.h>
+#include <linux/memblock.h>
+
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/processor.h>
+#include <asm/pgtable.h>
+#include <asm/setup.h>
+#include <asm/smp.h>
+#include <asm/elf.h>
+#include <asm/cputable.h>
+#include <asm/bootx.h>
+#include <asm/btext.h>
+#include <asm/machdep.h>
+#include <asm/uaccess.h>
+#include <asm/pmac_feature.h>
+#include <asm/sections.h>
+#include <asm/nvram.h>
+#include <asm/xmon.h>
+#include <asm/time.h>
+#include <asm/serial.h>
+#include <asm/udbg.h>
+#include <asm/mmu_context.h>
+
+#include "setup.h"
+
+#define DBG(fmt...)
+
+extern void bootx_init(unsigned long r4, unsigned long phys);
+
+int boot_cpuid = -1;
+EXPORT_SYMBOL_GPL(boot_cpuid);
+int boot_cpuid_phys;
+EXPORT_SYMBOL_GPL(boot_cpuid_phys);
+
+int smp_hw_index[NR_CPUS];
+
+unsigned long ISA_DMA_THRESHOLD;
+unsigned int DMA_MODE_READ;
+unsigned int DMA_MODE_WRITE;
+
+#ifdef CONFIG_VGA_CONSOLE
+unsigned long vgacon_remap_base;
+EXPORT_SYMBOL(vgacon_remap_base);
+#endif
+
+/*
+ * These are used in binfmt_elf.c to put aux entries on the stack
+ * for each elf executable being started.
+ */
+int dcache_bsize;
+int icache_bsize;
+int ucache_bsize;
+
+/*
+ * We're called here very early in the boot. We determine the machine
+ * type and call the appropriate low-level setup functions.
+ * -- Cort <cort@fsmlabs.com>
+ *
+ * Note that the kernel may be running at an address which is different
+ * from the address that it was linked at, so we must use RELOC/PTRRELOC
+ * to access static data (including strings). -- paulus
+ */
+notrace unsigned long __init early_init(unsigned long dt_ptr)
+{
+ unsigned long offset = reloc_offset();
+ struct cpu_spec *spec;
+
+ /* First zero the BSS -- use memset_io, some platforms don't have
+ * caches on yet */
+ memset_io((void __iomem *)PTRRELOC(&__bss_start), 0,
+ __bss_stop - __bss_start);
+
+ /*
+ * Identify the CPU type and fix up code sections
+ * that depend on which cpu we have.
+ */
+ spec = identify_cpu(offset, mfspr(SPRN_PVR));
+
+ do_feature_fixups(spec->cpu_features,
+ PTRRELOC(&__start___ftr_fixup),
+ PTRRELOC(&__stop___ftr_fixup));
+
+ do_feature_fixups(spec->mmu_features,
+ PTRRELOC(&__start___mmu_ftr_fixup),
+ PTRRELOC(&__stop___mmu_ftr_fixup));
+
+ do_lwsync_fixups(spec->cpu_features,
+ PTRRELOC(&__start___lwsync_fixup),
+ PTRRELOC(&__stop___lwsync_fixup));
+
+ do_final_fixups();
+
+ return KERNELBASE + offset;
+}
+
+
+/*
+ * Find out what kind of machine we're on and save any data we need
+ * from the early boot process (devtree is copied on pmac by prom_init()).
+ * This is called very early on the boot process, after a minimal
+ * MMU environment has been set up but before MMU_init is called.
+ */
+notrace void __init machine_init(u64 dt_ptr)
+{
+ lockdep_init();
+
+ /* Enable early debugging if any specified (see udbg.h) */
+ udbg_early_init();
+
+ /* Do some early initialization based on the flat device tree */
+ early_init_devtree(__va(dt_ptr));
+
+ early_init_mmu();
+
+ probe_machine();
+
+ setup_kdump_trampoline();
+
+#ifdef CONFIG_6xx
+ if (cpu_has_feature(CPU_FTR_CAN_DOZE) ||
+ cpu_has_feature(CPU_FTR_CAN_NAP))
+ ppc_md.power_save = ppc6xx_idle;
+#endif
+
+#ifdef CONFIG_E500
+ if (cpu_has_feature(CPU_FTR_CAN_DOZE) ||
+ cpu_has_feature(CPU_FTR_CAN_NAP))
+ ppc_md.power_save = e500_idle;
+#endif
+ if (ppc_md.progress)
+ ppc_md.progress("id mach(): done", 0x200);
+}
+
+#ifdef CONFIG_BOOKE_WDT
+extern u32 booke_wdt_enabled;
+extern u32 booke_wdt_period;
+
+/* Checks wdt=x and wdt_period=xx command-line option */
+notrace int __init early_parse_wdt(char *p)
+{
+ if (p && strncmp(p, "0", 1) != 0)
+ booke_wdt_enabled = 1;
+
+ return 0;
+}
+early_param("wdt", early_parse_wdt);
+
+int __init early_parse_wdt_period (char *p)
+{
+ if (p)
+ booke_wdt_period = simple_strtoul(p, NULL, 0);
+
+ return 0;
+}
+early_param("wdt_period", early_parse_wdt_period);
+#endif /* CONFIG_BOOKE_WDT */
+
+/* Checks "l2cr=xxxx" command-line option */
+int __init ppc_setup_l2cr(char *str)
+{
+ if (cpu_has_feature(CPU_FTR_L2CR)) {
+ unsigned long val = simple_strtoul(str, NULL, 0);
+ printk(KERN_INFO "l2cr set to %lx\n", val);
+ _set_L2CR(0); /* force invalidate by disable cache */
+ _set_L2CR(val); /* and enable it */
+ }
+ return 1;
+}
+__setup("l2cr=", ppc_setup_l2cr);
+
+/* Checks "l3cr=xxxx" command-line option */
+int __init ppc_setup_l3cr(char *str)
+{
+ if (cpu_has_feature(CPU_FTR_L3CR)) {
+ unsigned long val = simple_strtoul(str, NULL, 0);
+ printk(KERN_INFO "l3cr set to %lx\n", val);
+ _set_L3CR(val); /* and enable it */
+ }
+ return 1;
+}
+__setup("l3cr=", ppc_setup_l3cr);
+
+#ifdef CONFIG_GENERIC_NVRAM
+
+/* Generic nvram hooks used by drivers/char/gen_nvram.c */
+unsigned char nvram_read_byte(int addr)
+{
+ if (ppc_md.nvram_read_val)
+ return ppc_md.nvram_read_val(addr);
+ return 0xff;
+}
+EXPORT_SYMBOL(nvram_read_byte);
+
+void nvram_write_byte(unsigned char val, int addr)
+{
+ if (ppc_md.nvram_write_val)
+ ppc_md.nvram_write_val(addr, val);
+}
+EXPORT_SYMBOL(nvram_write_byte);
+
+ssize_t nvram_get_size(void)
+{
+ if (ppc_md.nvram_size)
+ return ppc_md.nvram_size();
+ return -1;
+}
+EXPORT_SYMBOL(nvram_get_size);
+
+void nvram_sync(void)
+{
+ if (ppc_md.nvram_sync)
+ ppc_md.nvram_sync();
+}
+EXPORT_SYMBOL(nvram_sync);
+
+#endif /* CONFIG_NVRAM */
+
+int __init ppc_init(void)
+{
+ /* clear the progress line */
+ if (ppc_md.progress)
+ ppc_md.progress(" ", 0xffff);
+
+ /* call platform init */
+ if (ppc_md.init != NULL) {
+ ppc_md.init();
+ }
+ return 0;
+}
+
+arch_initcall(ppc_init);
+
+static void __init irqstack_early_init(void)
+{
+ unsigned int i;
+
+ /* interrupt stacks must be in lowmem, we get that for free on ppc32
+ * as the memblock is limited to lowmem by default */
+ for_each_possible_cpu(i) {
+ softirq_ctx[i] = (struct thread_info *)
+ __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
+ hardirq_ctx[i] = (struct thread_info *)
+ __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
+ }
+}
+
+#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+static void __init exc_lvl_early_init(void)
+{
+ unsigned int i, hw_cpu;
+
+ /* interrupt stacks must be in lowmem, we get that for free on ppc32
+ * as the memblock is limited to lowmem by MEMBLOCK_REAL_LIMIT */
+ for_each_possible_cpu(i) {
+ hw_cpu = get_hard_smp_processor_id(i);
+ critirq_ctx[hw_cpu] = (struct thread_info *)
+ __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
+#ifdef CONFIG_BOOKE
+ dbgirq_ctx[hw_cpu] = (struct thread_info *)
+ __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
+ mcheckirq_ctx[hw_cpu] = (struct thread_info *)
+ __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
+#endif
+ }
+}
+#else
+#define exc_lvl_early_init()
+#endif
+
+/* Warning, IO base is not yet inited */
+void __init setup_arch(char **cmdline_p)
+{
+ *cmdline_p = cmd_line;
+
+ /* so udelay does something sensible, assume <= 1000 bogomips */
+ loops_per_jiffy = 500000000 / HZ;
+
+ unflatten_device_tree();
+ check_for_initrd();
+
+ if (ppc_md.init_early)
+ ppc_md.init_early();
+
+ find_legacy_serial_ports();
+
+ smp_setup_cpu_maps();
+
+ /* Register early console */
+ register_early_udbg_console();
+
+ xmon_setup();
+
+ /*
+ * Set cache line size based on type of cpu as a default.
+ * Systems with OF can look in the properties on the cpu node(s)
+ * for a possibly more accurate value.
+ */
+ dcache_bsize = cur_cpu_spec->dcache_bsize;
+ icache_bsize = cur_cpu_spec->icache_bsize;
+ ucache_bsize = 0;
+ if (cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE))
+ ucache_bsize = icache_bsize = dcache_bsize;
+
+ /* reboot on panic */
+ panic_timeout = 180;
+
+ if (ppc_md.panic)
+ setup_panic();
+
+ init_mm.start_code = (unsigned long)_stext;
+ init_mm.end_code = (unsigned long) _etext;
+ init_mm.end_data = (unsigned long) _edata;
+ init_mm.brk = klimit;
+
+ exc_lvl_early_init();
+
+ irqstack_early_init();
+
+ /* set up the bootmem stuff with available memory */
+ do_init_bootmem();
+ if ( ppc_md.progress ) ppc_md.progress("setup_arch: bootmem", 0x3eab);
+
+#ifdef CONFIG_DUMMY_CONSOLE
+ conswitchp = &dummy_con;
+#endif
+
+ if (ppc_md.setup_arch)
+ ppc_md.setup_arch();
+ if ( ppc_md.progress ) ppc_md.progress("arch: exit", 0x3eab);
+
+ paging_init();
+
+ /* Initialize the MMU context management stuff */
+ mmu_context_init();
+
+}
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
new file mode 100644
index 00000000..389bd4f0
--- /dev/null
+++ b/arch/powerpc/kernel/setup_64.c
@@ -0,0 +1,691 @@
+/*
+ *
+ * Common boot and setup code.
+ *
+ * Copyright (C) 2001 PPC64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+
+#include <linux/export.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/delay.h>
+#include <linux/initrd.h>
+#include <linux/seq_file.h>
+#include <linux/ioport.h>
+#include <linux/console.h>
+#include <linux/utsname.h>
+#include <linux/tty.h>
+#include <linux/root_dev.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/unistd.h>
+#include <linux/serial.h>
+#include <linux/serial_8250.h>
+#include <linux/bootmem.h>
+#include <linux/pci.h>
+#include <linux/lockdep.h>
+#include <linux/memblock.h>
+#include <linux/hugetlb.h>
+
+#include <asm/io.h>
+#include <asm/kdump.h>
+#include <asm/prom.h>
+#include <asm/processor.h>
+#include <asm/pgtable.h>
+#include <asm/smp.h>
+#include <asm/elf.h>
+#include <asm/machdep.h>
+#include <asm/paca.h>
+#include <asm/time.h>
+#include <asm/cputable.h>
+#include <asm/sections.h>
+#include <asm/btext.h>
+#include <asm/nvram.h>
+#include <asm/setup.h>
+#include <asm/rtas.h>
+#include <asm/iommu.h>
+#include <asm/serial.h>
+#include <asm/cache.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/firmware.h>
+#include <asm/xmon.h>
+#include <asm/udbg.h>
+#include <asm/kexec.h>
+#include <asm/mmu_context.h>
+#include <asm/code-patching.h>
+#include <asm/kvm_ppc.h>
+#include <asm/hugetlb.h>
+
+#include "setup.h"
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+int boot_cpuid = 0;
+int __initdata spinning_secondaries;
+u64 ppc64_pft_size;
+
+/* Pick defaults since we might want to patch instructions
+ * before we've read this from the device tree.
+ */
+struct ppc64_caches ppc64_caches = {
+ .dline_size = 0x40,
+ .log_dline_size = 6,
+ .iline_size = 0x40,
+ .log_iline_size = 6
+};
+EXPORT_SYMBOL_GPL(ppc64_caches);
+
+/*
+ * These are used in binfmt_elf.c to put aux entries on the stack
+ * for each elf executable being started.
+ */
+int dcache_bsize;
+int icache_bsize;
+int ucache_bsize;
+
+#ifdef CONFIG_SMP
+
+static char *smt_enabled_cmdline;
+
+/* Look for ibm,smt-enabled OF option */
+static void check_smt_enabled(void)
+{
+ struct device_node *dn;
+ const char *smt_option;
+
+ /* Default to enabling all threads */
+ smt_enabled_at_boot = threads_per_core;
+
+ /* Allow the command line to overrule the OF option */
+ if (smt_enabled_cmdline) {
+ if (!strcmp(smt_enabled_cmdline, "on"))
+ smt_enabled_at_boot = threads_per_core;
+ else if (!strcmp(smt_enabled_cmdline, "off"))
+ smt_enabled_at_boot = 0;
+ else {
+ long smt;
+ int rc;
+
+ rc = strict_strtol(smt_enabled_cmdline, 10, &smt);
+ if (!rc)
+ smt_enabled_at_boot =
+ min(threads_per_core, (int)smt);
+ }
+ } else {
+ dn = of_find_node_by_path("/options");
+ if (dn) {
+ smt_option = of_get_property(dn, "ibm,smt-enabled",
+ NULL);
+
+ if (smt_option) {
+ if (!strcmp(smt_option, "on"))
+ smt_enabled_at_boot = threads_per_core;
+ else if (!strcmp(smt_option, "off"))
+ smt_enabled_at_boot = 0;
+ }
+
+ of_node_put(dn);
+ }
+ }
+}
+
+/* Look for smt-enabled= cmdline option */
+static int __init early_smt_enabled(char *p)
+{
+ smt_enabled_cmdline = p;
+ return 0;
+}
+early_param("smt-enabled", early_smt_enabled);
+
+#else
+#define check_smt_enabled()
+#endif /* CONFIG_SMP */
+
+/*
+ * Early initialization entry point. This is called by head.S
+ * with MMU translation disabled. We rely on the "feature" of
+ * the CPU that ignores the top 2 bits of the address in real
+ * mode so we can access kernel globals normally provided we
+ * only toy with things in the RMO region. From here, we do
+ * some early parsing of the device-tree to setup out MEMBLOCK
+ * data structures, and allocate & initialize the hash table
+ * and segment tables so we can start running with translation
+ * enabled.
+ *
+ * It is this function which will call the probe() callback of
+ * the various platform types and copy the matching one to the
+ * global ppc_md structure. Your platform can eventually do
+ * some very early initializations from the probe() routine, but
+ * this is not recommended, be very careful as, for example, the
+ * device-tree is not accessible via normal means at this point.
+ */
+
+void __init early_setup(unsigned long dt_ptr)
+{
+ /* -------- printk is _NOT_ safe to use here ! ------- */
+
+ /* Identify CPU type */
+ identify_cpu(0, mfspr(SPRN_PVR));
+
+ /* Assume we're on cpu 0 for now. Don't write to the paca yet! */
+ initialise_paca(&boot_paca, 0);
+ setup_paca(&boot_paca);
+
+ /* Initialize lockdep early or else spinlocks will blow */
+ lockdep_init();
+
+ /* -------- printk is now safe to use ------- */
+
+ /* Enable early debugging if any specified (see udbg.h) */
+ udbg_early_init();
+
+ DBG(" -> early_setup(), dt_ptr: 0x%lx\n", dt_ptr);
+
+ /*
+ * Do early initialization using the flattened device
+ * tree, such as retrieving the physical memory map or
+ * calculating/retrieving the hash table size.
+ */
+ early_init_devtree(__va(dt_ptr));
+
+ /* Now we know the logical id of our boot cpu, setup the paca. */
+ setup_paca(&paca[boot_cpuid]);
+
+ /* Fix up paca fields required for the boot cpu */
+ get_paca()->cpu_start = 1;
+
+ /* Probe the machine type */
+ probe_machine();
+
+ setup_kdump_trampoline();
+
+ DBG("Found, Initializing memory management...\n");
+
+ /* Initialize the hash table or TLB handling */
+ early_init_mmu();
+
+ /*
+ * Reserve any gigantic pages requested on the command line.
+ * memblock needs to have been initialized by the time this is
+ * called since this will reserve memory.
+ */
+ reserve_hugetlb_gpages();
+
+ DBG(" <- early_setup()\n");
+}
+
+#ifdef CONFIG_SMP
+void early_setup_secondary(void)
+{
+ /* Mark interrupts enabled in PACA */
+ get_paca()->soft_enabled = 0;
+
+ /* Initialize the hash table or TLB handling */
+ early_init_mmu_secondary();
+}
+
+#endif /* CONFIG_SMP */
+
+#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC)
+void smp_release_cpus(void)
+{
+ unsigned long *ptr;
+ int i;
+
+ DBG(" -> smp_release_cpus()\n");
+
+ /* All secondary cpus are spinning on a common spinloop, release them
+ * all now so they can start to spin on their individual paca
+ * spinloops. For non SMP kernels, the secondary cpus never get out
+ * of the common spinloop.
+ */
+
+ ptr = (unsigned long *)((unsigned long)&__secondary_hold_spinloop
+ - PHYSICAL_START);
+ *ptr = __pa(generic_secondary_smp_init);
+
+ /* And wait a bit for them to catch up */
+ for (i = 0; i < 100000; i++) {
+ mb();
+ HMT_low();
+ if (spinning_secondaries == 0)
+ break;
+ udelay(1);
+ }
+ DBG("spinning_secondaries = %d\n", spinning_secondaries);
+
+ DBG(" <- smp_release_cpus()\n");
+}
+#endif /* CONFIG_SMP || CONFIG_KEXEC */
+
+/*
+ * Initialize some remaining members of the ppc64_caches and systemcfg
+ * structures
+ * (at least until we get rid of them completely). This is mostly some
+ * cache informations about the CPU that will be used by cache flush
+ * routines and/or provided to userland
+ */
+static void __init initialize_cache_info(void)
+{
+ struct device_node *np;
+ unsigned long num_cpus = 0;
+
+ DBG(" -> initialize_cache_info()\n");
+
+ for_each_node_by_type(np, "cpu") {
+ num_cpus += 1;
+
+ /*
+ * We're assuming *all* of the CPUs have the same
+ * d-cache and i-cache sizes... -Peter
+ */
+ if (num_cpus == 1) {
+ const u32 *sizep, *lsizep;
+ u32 size, lsize;
+
+ size = 0;
+ lsize = cur_cpu_spec->dcache_bsize;
+ sizep = of_get_property(np, "d-cache-size", NULL);
+ if (sizep != NULL)
+ size = *sizep;
+ lsizep = of_get_property(np, "d-cache-block-size",
+ NULL);
+ /* fallback if block size missing */
+ if (lsizep == NULL)
+ lsizep = of_get_property(np,
+ "d-cache-line-size",
+ NULL);
+ if (lsizep != NULL)
+ lsize = *lsizep;
+ if (sizep == 0 || lsizep == 0)
+ DBG("Argh, can't find dcache properties ! "
+ "sizep: %p, lsizep: %p\n", sizep, lsizep);
+
+ ppc64_caches.dsize = size;
+ ppc64_caches.dline_size = lsize;
+ ppc64_caches.log_dline_size = __ilog2(lsize);
+ ppc64_caches.dlines_per_page = PAGE_SIZE / lsize;
+
+ size = 0;
+ lsize = cur_cpu_spec->icache_bsize;
+ sizep = of_get_property(np, "i-cache-size", NULL);
+ if (sizep != NULL)
+ size = *sizep;
+ lsizep = of_get_property(np, "i-cache-block-size",
+ NULL);
+ if (lsizep == NULL)
+ lsizep = of_get_property(np,
+ "i-cache-line-size",
+ NULL);
+ if (lsizep != NULL)
+ lsize = *lsizep;
+ if (sizep == 0 || lsizep == 0)
+ DBG("Argh, can't find icache properties ! "
+ "sizep: %p, lsizep: %p\n", sizep, lsizep);
+
+ ppc64_caches.isize = size;
+ ppc64_caches.iline_size = lsize;
+ ppc64_caches.log_iline_size = __ilog2(lsize);
+ ppc64_caches.ilines_per_page = PAGE_SIZE / lsize;
+ }
+ }
+
+ DBG(" <- initialize_cache_info()\n");
+}
+
+
+/*
+ * Do some initial setup of the system. The parameters are those which
+ * were passed in from the bootloader.
+ */
+void __init setup_system(void)
+{
+ DBG(" -> setup_system()\n");
+
+ /* Apply the CPUs-specific and firmware specific fixups to kernel
+ * text (nop out sections not relevant to this CPU or this firmware)
+ */
+ do_feature_fixups(cur_cpu_spec->cpu_features,
+ &__start___ftr_fixup, &__stop___ftr_fixup);
+ do_feature_fixups(cur_cpu_spec->mmu_features,
+ &__start___mmu_ftr_fixup, &__stop___mmu_ftr_fixup);
+ do_feature_fixups(powerpc_firmware_features,
+ &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup);
+ do_lwsync_fixups(cur_cpu_spec->cpu_features,
+ &__start___lwsync_fixup, &__stop___lwsync_fixup);
+ do_final_fixups();
+
+ /*
+ * Unflatten the device-tree passed by prom_init or kexec
+ */
+ unflatten_device_tree();
+
+ /*
+ * Fill the ppc64_caches & systemcfg structures with informations
+ * retrieved from the device-tree.
+ */
+ initialize_cache_info();
+
+#ifdef CONFIG_PPC_RTAS
+ /*
+ * Initialize RTAS if available
+ */
+ rtas_initialize();
+#endif /* CONFIG_PPC_RTAS */
+
+ /*
+ * Check if we have an initrd provided via the device-tree
+ */
+ check_for_initrd();
+
+ /*
+ * Do some platform specific early initializations, that includes
+ * setting up the hash table pointers. It also sets up some interrupt-mapping
+ * related options that will be used by finish_device_tree()
+ */
+ if (ppc_md.init_early)
+ ppc_md.init_early();
+
+ /*
+ * We can discover serial ports now since the above did setup the
+ * hash table management for us, thus ioremap works. We do that early
+ * so that further code can be debugged
+ */
+ find_legacy_serial_ports();
+
+ /*
+ * Register early console
+ */
+ register_early_udbg_console();
+
+ /*
+ * Initialize xmon
+ */
+ xmon_setup();
+
+ smp_setup_cpu_maps();
+ check_smt_enabled();
+
+#ifdef CONFIG_SMP
+ /* Release secondary cpus out of their spinloops at 0x60 now that
+ * we can map physical -> logical CPU ids
+ */
+ smp_release_cpus();
+#endif
+
+ printk("Starting Linux PPC64 %s\n", init_utsname()->version);
+
+ printk("-----------------------------------------------------\n");
+ printk("ppc64_pft_size = 0x%llx\n", ppc64_pft_size);
+ printk("physicalMemorySize = 0x%llx\n", memblock_phys_mem_size());
+ if (ppc64_caches.dline_size != 0x80)
+ printk("ppc64_caches.dcache_line_size = 0x%x\n",
+ ppc64_caches.dline_size);
+ if (ppc64_caches.iline_size != 0x80)
+ printk("ppc64_caches.icache_line_size = 0x%x\n",
+ ppc64_caches.iline_size);
+#ifdef CONFIG_PPC_STD_MMU_64
+ if (htab_address)
+ printk("htab_address = 0x%p\n", htab_address);
+ printk("htab_hash_mask = 0x%lx\n", htab_hash_mask);
+#endif /* CONFIG_PPC_STD_MMU_64 */
+ if (PHYSICAL_START > 0)
+ printk("physical_start = 0x%llx\n",
+ (unsigned long long)PHYSICAL_START);
+ printk("-----------------------------------------------------\n");
+
+ DBG(" <- setup_system()\n");
+}
+
+/* This returns the limit below which memory accesses to the linear
+ * mapping are guarnateed not to cause a TLB or SLB miss. This is
+ * used to allocate interrupt or emergency stacks for which our
+ * exception entry path doesn't deal with being interrupted.
+ */
+static u64 safe_stack_limit(void)
+{
+#ifdef CONFIG_PPC_BOOK3E
+ /* Freescale BookE bolts the entire linear mapping */
+ if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
+ return linear_map_top;
+ /* Other BookE, we assume the first GB is bolted */
+ return 1ul << 30;
+#else
+ /* BookS, the first segment is bolted */
+ if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
+ return 1UL << SID_SHIFT_1T;
+ return 1UL << SID_SHIFT;
+#endif
+}
+
+static void __init irqstack_early_init(void)
+{
+ u64 limit = safe_stack_limit();
+ unsigned int i;
+
+ /*
+ * Interrupt stacks must be in the first segment since we
+ * cannot afford to take SLB misses on them.
+ */
+ for_each_possible_cpu(i) {
+ softirq_ctx[i] = (struct thread_info *)
+ __va(memblock_alloc_base(THREAD_SIZE,
+ THREAD_SIZE, limit));
+ hardirq_ctx[i] = (struct thread_info *)
+ __va(memblock_alloc_base(THREAD_SIZE,
+ THREAD_SIZE, limit));
+ }
+}
+
+#ifdef CONFIG_PPC_BOOK3E
+static void __init exc_lvl_early_init(void)
+{
+ extern unsigned int interrupt_base_book3e;
+ extern unsigned int exc_debug_debug_book3e;
+
+ unsigned int i;
+
+ for_each_possible_cpu(i) {
+ critirq_ctx[i] = (struct thread_info *)
+ __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
+ dbgirq_ctx[i] = (struct thread_info *)
+ __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
+ mcheckirq_ctx[i] = (struct thread_info *)
+ __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
+ }
+
+ if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
+ patch_branch(&interrupt_base_book3e + (0x040 / 4) + 1,
+ (unsigned long)&exc_debug_debug_book3e, 0);
+}
+#else
+#define exc_lvl_early_init()
+#endif
+
+/*
+ * Stack space used when we detect a bad kernel stack pointer, and
+ * early in SMP boots before relocation is enabled.
+ */
+static void __init emergency_stack_init(void)
+{
+ u64 limit;
+ unsigned int i;
+
+ /*
+ * Emergency stacks must be under 256MB, we cannot afford to take
+ * SLB misses on them. The ABI also requires them to be 128-byte
+ * aligned.
+ *
+ * Since we use these as temporary stacks during secondary CPU
+ * bringup, we need to get at them in real mode. This means they
+ * must also be within the RMO region.
+ */
+ limit = min(safe_stack_limit(), ppc64_rma_size);
+
+ for_each_possible_cpu(i) {
+ unsigned long sp;
+ sp = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
+ sp += THREAD_SIZE;
+ paca[i].emergency_sp = __va(sp);
+ }
+}
+
+/*
+ * Called into from start_kernel this initializes bootmem, which is used
+ * to manage page allocation until mem_init is called.
+ */
+void __init setup_arch(char **cmdline_p)
+{
+ ppc64_boot_msg(0x12, "Setup Arch");
+
+ *cmdline_p = cmd_line;
+
+ /*
+ * Set cache line size based on type of cpu as a default.
+ * Systems with OF can look in the properties on the cpu node(s)
+ * for a possibly more accurate value.
+ */
+ dcache_bsize = ppc64_caches.dline_size;
+ icache_bsize = ppc64_caches.iline_size;
+
+ /* reboot on panic */
+ panic_timeout = 180;
+
+ if (ppc_md.panic)
+ setup_panic();
+
+ init_mm.start_code = (unsigned long)_stext;
+ init_mm.end_code = (unsigned long) _etext;
+ init_mm.end_data = (unsigned long) _edata;
+ init_mm.brk = klimit;
+
+ irqstack_early_init();
+ exc_lvl_early_init();
+ emergency_stack_init();
+
+#ifdef CONFIG_PPC_STD_MMU_64
+ stabs_alloc();
+#endif
+ /* set up the bootmem stuff with available memory */
+ do_init_bootmem();
+ sparse_init();
+
+#ifdef CONFIG_DUMMY_CONSOLE
+ conswitchp = &dummy_con;
+#endif
+
+ if (ppc_md.setup_arch)
+ ppc_md.setup_arch();
+
+ paging_init();
+
+ /* Initialize the MMU context management stuff */
+ mmu_context_init();
+
+ kvm_linear_init();
+
+ ppc64_boot_msg(0x15, "Setup Done");
+}
+
+
+/* ToDo: do something useful if ppc_md is not yet setup. */
+#define PPC64_LINUX_FUNCTION 0x0f000000
+#define PPC64_IPL_MESSAGE 0xc0000000
+#define PPC64_TERM_MESSAGE 0xb0000000
+
+static void ppc64_do_msg(unsigned int src, const char *msg)
+{
+ if (ppc_md.progress) {
+ char buf[128];
+
+ sprintf(buf, "%08X\n", src);
+ ppc_md.progress(buf, 0);
+ snprintf(buf, 128, "%s", msg);
+ ppc_md.progress(buf, 0);
+ }
+}
+
+/* Print a boot progress message. */
+void ppc64_boot_msg(unsigned int src, const char *msg)
+{
+ ppc64_do_msg(PPC64_LINUX_FUNCTION|PPC64_IPL_MESSAGE|src, msg);
+ printk("[boot]%04x %s\n", src, msg);
+}
+
+#ifdef CONFIG_SMP
+#define PCPU_DYN_SIZE ()
+
+static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
+{
+ return __alloc_bootmem_node(NODE_DATA(cpu_to_node(cpu)), size, align,
+ __pa(MAX_DMA_ADDRESS));
+}
+
+static void __init pcpu_fc_free(void *ptr, size_t size)
+{
+ free_bootmem(__pa(ptr), size);
+}
+
+static int pcpu_cpu_distance(unsigned int from, unsigned int to)
+{
+ if (cpu_to_node(from) == cpu_to_node(to))
+ return LOCAL_DISTANCE;
+ else
+ return REMOTE_DISTANCE;
+}
+
+unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(__per_cpu_offset);
+
+void __init setup_per_cpu_areas(void)
+{
+ const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
+ size_t atom_size;
+ unsigned long delta;
+ unsigned int cpu;
+ int rc;
+
+ /*
+ * Linear mapping is one of 4K, 1M and 16M. For 4K, no need
+ * to group units. For larger mappings, use 1M atom which
+ * should be large enough to contain a number of units.
+ */
+ if (mmu_linear_psize == MMU_PAGE_4K)
+ atom_size = PAGE_SIZE;
+ else
+ atom_size = 1 << 20;
+
+ rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
+ pcpu_fc_alloc, pcpu_fc_free);
+ if (rc < 0)
+ panic("cannot initialize percpu area (err=%d)", rc);
+
+ delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
+ for_each_possible_cpu(cpu) {
+ __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
+ paca[cpu].data_offset = __per_cpu_offset[cpu];
+ }
+}
+#endif
+
+
+#ifdef CONFIG_PPC_INDIRECT_IO
+struct ppc_pci_io ppc_pci_io;
+EXPORT_SYMBOL(ppc_pci_io);
+#endif /* CONFIG_PPC_INDIRECT_IO */
+
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
new file mode 100644
index 00000000..651c5963
--- /dev/null
+++ b/arch/powerpc/kernel/signal.c
@@ -0,0 +1,206 @@
+/*
+ * Common signal handling code for both 32 and 64 bits
+ *
+ * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Coproration
+ * Extracted from signal_32.c and signal_64.c
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file README.legal in the main directory of
+ * this archive for more details.
+ */
+
+#include <linux/tracehook.h>
+#include <linux/signal.h>
+#include <linux/key.h>
+#include <asm/hw_breakpoint.h>
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+#include <asm/debug.h>
+
+#include "signal.h"
+
+/* Log an error when sending an unhandled signal to a process. Controlled
+ * through debug.exception-trace sysctl.
+ */
+
+int show_unhandled_signals = 0;
+
+/*
+ * Allocate space for the signal frame
+ */
+void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
+ size_t frame_size, int is_32)
+{
+ unsigned long oldsp, newsp;
+
+ /* Default to using normal stack */
+ oldsp = get_clean_sp(regs, is_32);
+
+ /* Check for alt stack */
+ if ((ka->sa.sa_flags & SA_ONSTACK) &&
+ current->sas_ss_size && !on_sig_stack(oldsp))
+ oldsp = (current->sas_ss_sp + current->sas_ss_size);
+
+ /* Get aligned frame */
+ newsp = (oldsp - frame_size) & ~0xFUL;
+
+ /* Check access */
+ if (!access_ok(VERIFY_WRITE, (void __user *)newsp, oldsp - newsp))
+ return NULL;
+
+ return (void __user *)newsp;
+}
+
+
+/*
+ * Restore the user process's signal mask
+ */
+void restore_sigmask(sigset_t *set)
+{
+ sigdelsetmask(set, ~_BLOCKABLE);
+ set_current_blocked(set);
+}
+
+static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka,
+ int has_handler)
+{
+ unsigned long ret = regs->gpr[3];
+ int restart = 1;
+
+ /* syscall ? */
+ if (TRAP(regs) != 0x0C00)
+ return;
+
+ /* error signalled ? */
+ if (!(regs->ccr & 0x10000000))
+ return;
+
+ switch (ret) {
+ case ERESTART_RESTARTBLOCK:
+ case ERESTARTNOHAND:
+ /* ERESTARTNOHAND means that the syscall should only be
+ * restarted if there was no handler for the signal, and since
+ * we only get here if there is a handler, we dont restart.
+ */
+ restart = !has_handler;
+ break;
+ case ERESTARTSYS:
+ /* ERESTARTSYS means to restart the syscall if there is no
+ * handler or the handler was registered with SA_RESTART
+ */
+ restart = !has_handler || (ka->sa.sa_flags & SA_RESTART) != 0;
+ break;
+ case ERESTARTNOINTR:
+ /* ERESTARTNOINTR means that the syscall should be
+ * called again after the signal handler returns.
+ */
+ break;
+ default:
+ return;
+ }
+ if (restart) {
+ if (ret == ERESTART_RESTARTBLOCK)
+ regs->gpr[0] = __NR_restart_syscall;
+ else
+ regs->gpr[3] = regs->orig_gpr3;
+ regs->nip -= 4;
+ regs->result = 0;
+ } else {
+ regs->result = -EINTR;
+ regs->gpr[3] = EINTR;
+ regs->ccr |= 0x10000000;
+ }
+}
+
+static int do_signal(struct pt_regs *regs)
+{
+ sigset_t *oldset;
+ siginfo_t info;
+ int signr;
+ struct k_sigaction ka;
+ int ret;
+ int is32 = is_32bit_task();
+
+ if (current_thread_info()->local_flags & _TLF_RESTORE_SIGMASK)
+ oldset = &current->saved_sigmask;
+ else
+ oldset = &current->blocked;
+
+ signr = get_signal_to_deliver(&info, &ka, regs, NULL);
+
+ /* Is there any syscall restart business here ? */
+ check_syscall_restart(regs, &ka, signr > 0);
+
+ if (signr <= 0) {
+ struct thread_info *ti = current_thread_info();
+ /* No signal to deliver -- put the saved sigmask back */
+ if (ti->local_flags & _TLF_RESTORE_SIGMASK) {
+ ti->local_flags &= ~_TLF_RESTORE_SIGMASK;
+ sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
+ }
+ regs->trap = 0;
+ return 0; /* no signals delivered */
+ }
+
+#ifndef CONFIG_PPC_ADV_DEBUG_REGS
+ /*
+ * Reenable the DABR before delivering the signal to
+ * user space. The DABR will have been cleared if it
+ * triggered inside the kernel.
+ */
+ if (current->thread.dabr)
+ set_dabr(current->thread.dabr);
+#endif
+ /* Re-enable the breakpoints for the signal stack */
+ thread_change_pc(current, regs);
+
+ if (is32) {
+ if (ka.sa.sa_flags & SA_SIGINFO)
+ ret = handle_rt_signal32(signr, &ka, &info, oldset,
+ regs);
+ else
+ ret = handle_signal32(signr, &ka, &info, oldset,
+ regs);
+ } else {
+ ret = handle_rt_signal64(signr, &ka, &info, oldset, regs);
+ }
+
+ regs->trap = 0;
+ if (ret) {
+ block_sigmask(&ka, signr);
+
+ /*
+ * A signal was successfully delivered; the saved sigmask is in
+ * its frame, and we can clear the TLF_RESTORE_SIGMASK flag.
+ */
+ current_thread_info()->local_flags &= ~_TLF_RESTORE_SIGMASK;
+
+ /*
+ * Let tracing know that we've done the handler setup.
+ */
+ tracehook_signal_handler(signr, &info, &ka, regs,
+ test_thread_flag(TIF_SINGLESTEP));
+ }
+
+ return ret;
+}
+
+void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
+{
+ if (thread_info_flags & _TIF_SIGPENDING)
+ do_signal(regs);
+
+ if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+ clear_thread_flag(TIF_NOTIFY_RESUME);
+ tracehook_notify_resume(regs);
+ if (current->replacement_session_keyring)
+ key_replace_session_keyring();
+ }
+}
+
+long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
+ unsigned long r5, unsigned long r6, unsigned long r7,
+ unsigned long r8, struct pt_regs *regs)
+{
+ return do_sigaltstack(uss, uoss, regs->gpr[1]);
+}
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
new file mode 100644
index 00000000..8dde973a
--- /dev/null
+++ b/arch/powerpc/kernel/signal.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Coproration
+ * Extracted from signal_32.c and signal_64.c
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file README.legal in the main directory of
+ * this archive for more details.
+ */
+
+#ifndef _POWERPC_ARCH_SIGNAL_H
+#define _POWERPC_ARCH_SIGNAL_H
+
+#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+extern void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags);
+
+extern void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
+ size_t frame_size, int is_32);
+extern void restore_sigmask(sigset_t *set);
+
+extern int handle_signal32(unsigned long sig, struct k_sigaction *ka,
+ siginfo_t *info, sigset_t *oldset,
+ struct pt_regs *regs);
+
+extern int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka,
+ siginfo_t *info, sigset_t *oldset,
+ struct pt_regs *regs);
+
+extern unsigned long copy_fpr_to_user(void __user *to,
+ struct task_struct *task);
+extern unsigned long copy_fpr_from_user(struct task_struct *task,
+ void __user *from);
+#ifdef CONFIG_VSX
+extern unsigned long copy_vsx_to_user(void __user *to,
+ struct task_struct *task);
+extern unsigned long copy_vsx_from_user(struct task_struct *task,
+ void __user *from);
+#endif
+
+#ifdef CONFIG_PPC64
+
+extern int handle_rt_signal64(int signr, struct k_sigaction *ka,
+ siginfo_t *info, sigset_t *set,
+ struct pt_regs *regs);
+
+#else /* CONFIG_PPC64 */
+
+static inline int handle_rt_signal64(int signr, struct k_sigaction *ka,
+ siginfo_t *info, sigset_t *set,
+ struct pt_regs *regs)
+{
+ return -EFAULT;
+}
+
+#endif /* !defined(CONFIG_PPC64) */
+
+#endif /* _POWERPC_ARCH_SIGNAL_H */
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
new file mode 100644
index 00000000..45eb9985
--- /dev/null
+++ b/arch/powerpc/kernel/signal_32.c
@@ -0,0 +1,1306 @@
+/*
+ * Signal handling for 32bit PPC and 32bit tasks on 64bit PPC
+ *
+ * PowerPC version
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ * Copyright (C) 2001 IBM
+ * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ *
+ * Derived from "arch/i386/kernel/signal.c"
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/elf.h>
+#include <linux/ptrace.h>
+#include <linux/ratelimit.h>
+#ifdef CONFIG_PPC64
+#include <linux/syscalls.h>
+#include <linux/compat.h>
+#else
+#include <linux/wait.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/tty.h>
+#include <linux/binfmts.h>
+#include <linux/freezer.h>
+#endif
+
+#include <asm/uaccess.h>
+#include <asm/cacheflush.h>
+#include <asm/syscalls.h>
+#include <asm/sigcontext.h>
+#include <asm/vdso.h>
+#include <asm/switch_to.h>
+#ifdef CONFIG_PPC64
+#include "ppc32.h"
+#include <asm/unistd.h>
+#else
+#include <asm/ucontext.h>
+#include <asm/pgtable.h>
+#endif
+
+#include "signal.h"
+
+#undef DEBUG_SIG
+
+#ifdef CONFIG_PPC64
+#define sys_sigsuspend compat_sys_sigsuspend
+#define sys_rt_sigsuspend compat_sys_rt_sigsuspend
+#define sys_rt_sigreturn compat_sys_rt_sigreturn
+#define sys_sigaction compat_sys_sigaction
+#define sys_swapcontext compat_sys_swapcontext
+#define sys_sigreturn compat_sys_sigreturn
+
+#define old_sigaction old_sigaction32
+#define sigcontext sigcontext32
+#define mcontext mcontext32
+#define ucontext ucontext32
+
+/*
+ * Userspace code may pass a ucontext which doesn't include VSX added
+ * at the end. We need to check for this case.
+ */
+#define UCONTEXTSIZEWITHOUTVSX \
+ (sizeof(struct ucontext) - sizeof(elf_vsrreghalf_t32))
+
+/*
+ * Returning 0 means we return to userspace via
+ * ret_from_except and thus restore all user
+ * registers from *regs. This is what we need
+ * to do when a signal has been delivered.
+ */
+
+#define GP_REGS_SIZE min(sizeof(elf_gregset_t32), sizeof(struct pt_regs32))
+#undef __SIGNAL_FRAMESIZE
+#define __SIGNAL_FRAMESIZE __SIGNAL_FRAMESIZE32
+#undef ELF_NVRREG
+#define ELF_NVRREG ELF_NVRREG32
+
+/*
+ * Functions for flipping sigsets (thanks to brain dead generic
+ * implementation that makes things simple for little endian only)
+ */
+static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set)
+{
+ compat_sigset_t cset;
+
+ switch (_NSIG_WORDS) {
+ case 4: cset.sig[6] = set->sig[3] & 0xffffffffull;
+ cset.sig[7] = set->sig[3] >> 32;
+ case 3: cset.sig[4] = set->sig[2] & 0xffffffffull;
+ cset.sig[5] = set->sig[2] >> 32;
+ case 2: cset.sig[2] = set->sig[1] & 0xffffffffull;
+ cset.sig[3] = set->sig[1] >> 32;
+ case 1: cset.sig[0] = set->sig[0] & 0xffffffffull;
+ cset.sig[1] = set->sig[0] >> 32;
+ }
+ return copy_to_user(uset, &cset, sizeof(*uset));
+}
+
+static inline int get_sigset_t(sigset_t *set,
+ const compat_sigset_t __user *uset)
+{
+ compat_sigset_t s32;
+
+ if (copy_from_user(&s32, uset, sizeof(*uset)))
+ return -EFAULT;
+
+ /*
+ * Swap the 2 words of the 64-bit sigset_t (they are stored
+ * in the "wrong" endian in 32-bit user storage).
+ */
+ switch (_NSIG_WORDS) {
+ case 4: set->sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32);
+ case 3: set->sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32);
+ case 2: set->sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32);
+ case 1: set->sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32);
+ }
+ return 0;
+}
+
+static inline int get_old_sigaction(struct k_sigaction *new_ka,
+ struct old_sigaction __user *act)
+{
+ compat_old_sigset_t mask;
+ compat_uptr_t handler, restorer;
+
+ if (get_user(handler, &act->sa_handler) ||
+ __get_user(restorer, &act->sa_restorer) ||
+ __get_user(new_ka->sa.sa_flags, &act->sa_flags) ||
+ __get_user(mask, &act->sa_mask))
+ return -EFAULT;
+ new_ka->sa.sa_handler = compat_ptr(handler);
+ new_ka->sa.sa_restorer = compat_ptr(restorer);
+ siginitset(&new_ka->sa.sa_mask, mask);
+ return 0;
+}
+
+#define to_user_ptr(p) ptr_to_compat(p)
+#define from_user_ptr(p) compat_ptr(p)
+
+static inline int save_general_regs(struct pt_regs *regs,
+ struct mcontext __user *frame)
+{
+ elf_greg_t64 *gregs = (elf_greg_t64 *)regs;
+ int i;
+
+ WARN_ON(!FULL_REGS(regs));
+
+ for (i = 0; i <= PT_RESULT; i ++) {
+ if (i == 14 && !FULL_REGS(regs))
+ i = 32;
+ if (__put_user((unsigned int)gregs[i], &frame->mc_gregs[i]))
+ return -EFAULT;
+ }
+ return 0;
+}
+
+static inline int restore_general_regs(struct pt_regs *regs,
+ struct mcontext __user *sr)
+{
+ elf_greg_t64 *gregs = (elf_greg_t64 *)regs;
+ int i;
+
+ for (i = 0; i <= PT_RESULT; i++) {
+ if ((i == PT_MSR) || (i == PT_SOFTE))
+ continue;
+ if (__get_user(gregs[i], &sr->mc_gregs[i]))
+ return -EFAULT;
+ }
+ return 0;
+}
+
+#else /* CONFIG_PPC64 */
+
+#define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs))
+
+static inline int put_sigset_t(sigset_t __user *uset, sigset_t *set)
+{
+ return copy_to_user(uset, set, sizeof(*uset));
+}
+
+static inline int get_sigset_t(sigset_t *set, const sigset_t __user *uset)
+{
+ return copy_from_user(set, uset, sizeof(*uset));
+}
+
+static inline int get_old_sigaction(struct k_sigaction *new_ka,
+ struct old_sigaction __user *act)
+{
+ old_sigset_t mask;
+
+ if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
+ __get_user(new_ka->sa.sa_handler, &act->sa_handler) ||
+ __get_user(new_ka->sa.sa_restorer, &act->sa_restorer))
+ return -EFAULT;
+ __get_user(new_ka->sa.sa_flags, &act->sa_flags);
+ __get_user(mask, &act->sa_mask);
+ siginitset(&new_ka->sa.sa_mask, mask);
+ return 0;
+}
+
+#define to_user_ptr(p) ((unsigned long)(p))
+#define from_user_ptr(p) ((void __user *)(p))
+
+static inline int save_general_regs(struct pt_regs *regs,
+ struct mcontext __user *frame)
+{
+ WARN_ON(!FULL_REGS(regs));
+ return __copy_to_user(&frame->mc_gregs, regs, GP_REGS_SIZE);
+}
+
+static inline int restore_general_regs(struct pt_regs *regs,
+ struct mcontext __user *sr)
+{
+ /* copy up to but not including MSR */
+ if (__copy_from_user(regs, &sr->mc_gregs,
+ PT_MSR * sizeof(elf_greg_t)))
+ return -EFAULT;
+ /* copy from orig_r3 (the word after the MSR) up to the end */
+ if (__copy_from_user(&regs->orig_gpr3, &sr->mc_gregs[PT_ORIG_R3],
+ GP_REGS_SIZE - PT_ORIG_R3 * sizeof(elf_greg_t)))
+ return -EFAULT;
+ return 0;
+}
+
+#endif /* CONFIG_PPC64 */
+
+/*
+ * Atomically swap in the new signal mask, and wait for a signal.
+ */
+long sys_sigsuspend(old_sigset_t mask)
+{
+ sigset_t blocked;
+
+ current->saved_sigmask = current->blocked;
+
+ mask &= _BLOCKABLE;
+ siginitset(&blocked, mask);
+ set_current_blocked(&blocked);
+
+ current->state = TASK_INTERRUPTIBLE;
+ schedule();
+ set_restore_sigmask();
+ return -ERESTARTNOHAND;
+}
+
+long sys_sigaction(int sig, struct old_sigaction __user *act,
+ struct old_sigaction __user *oact)
+{
+ struct k_sigaction new_ka, old_ka;
+ int ret;
+
+#ifdef CONFIG_PPC64
+ if (sig < 0)
+ sig = -sig;
+#endif
+
+ if (act) {
+ if (get_old_sigaction(&new_ka, act))
+ return -EFAULT;
+ }
+
+ ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+ if (!ret && oact) {
+ if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
+ __put_user(to_user_ptr(old_ka.sa.sa_handler),
+ &oact->sa_handler) ||
+ __put_user(to_user_ptr(old_ka.sa.sa_restorer),
+ &oact->sa_restorer) ||
+ __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
+ __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
+ return -EFAULT;
+ }
+
+ return ret;
+}
+
+/*
+ * When we have signals to deliver, we set up on the
+ * user stack, going down from the original stack pointer:
+ * an ABI gap of 56 words
+ * an mcontext struct
+ * a sigcontext struct
+ * a gap of __SIGNAL_FRAMESIZE bytes
+ *
+ * Each of these things must be a multiple of 16 bytes in size. The following
+ * structure represent all of this except the __SIGNAL_FRAMESIZE gap
+ *
+ */
+struct sigframe {
+ struct sigcontext sctx; /* the sigcontext */
+ struct mcontext mctx; /* all the register values */
+ /*
+ * Programs using the rs6000/xcoff abi can save up to 19 gp
+ * regs and 18 fp regs below sp before decrementing it.
+ */
+ int abigap[56];
+};
+
+/* We use the mc_pad field for the signal return trampoline. */
+#define tramp mc_pad
+
+/*
+ * When we have rt signals to deliver, we set up on the
+ * user stack, going down from the original stack pointer:
+ * one rt_sigframe struct (siginfo + ucontext + ABI gap)
+ * a gap of __SIGNAL_FRAMESIZE+16 bytes
+ * (the +16 is to get the siginfo and ucontext in the same
+ * positions as in older kernels).
+ *
+ * Each of these things must be a multiple of 16 bytes in size.
+ *
+ */
+struct rt_sigframe {
+#ifdef CONFIG_PPC64
+ compat_siginfo_t info;
+#else
+ struct siginfo info;
+#endif
+ struct ucontext uc;
+ /*
+ * Programs using the rs6000/xcoff abi can save up to 19 gp
+ * regs and 18 fp regs below sp before decrementing it.
+ */
+ int abigap[56];
+};
+
+#ifdef CONFIG_VSX
+unsigned long copy_fpr_to_user(void __user *to,
+ struct task_struct *task)
+{
+ double buf[ELF_NFPREG];
+ int i;
+
+ /* save FPR copy to local buffer then write to the thread_struct */
+ for (i = 0; i < (ELF_NFPREG - 1) ; i++)
+ buf[i] = task->thread.TS_FPR(i);
+ memcpy(&buf[i], &task->thread.fpscr, sizeof(double));
+ return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
+}
+
+unsigned long copy_fpr_from_user(struct task_struct *task,
+ void __user *from)
+{
+ double buf[ELF_NFPREG];
+ int i;
+
+ if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
+ return 1;
+ for (i = 0; i < (ELF_NFPREG - 1) ; i++)
+ task->thread.TS_FPR(i) = buf[i];
+ memcpy(&task->thread.fpscr, &buf[i], sizeof(double));
+
+ return 0;
+}
+
+unsigned long copy_vsx_to_user(void __user *to,
+ struct task_struct *task)
+{
+ double buf[ELF_NVSRHALFREG];
+ int i;
+
+ /* save FPR copy to local buffer then write to the thread_struct */
+ for (i = 0; i < ELF_NVSRHALFREG; i++)
+ buf[i] = task->thread.fpr[i][TS_VSRLOWOFFSET];
+ return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
+}
+
+unsigned long copy_vsx_from_user(struct task_struct *task,
+ void __user *from)
+{
+ double buf[ELF_NVSRHALFREG];
+ int i;
+
+ if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
+ return 1;
+ for (i = 0; i < ELF_NVSRHALFREG ; i++)
+ task->thread.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+ return 0;
+}
+#else
+inline unsigned long copy_fpr_to_user(void __user *to,
+ struct task_struct *task)
+{
+ return __copy_to_user(to, task->thread.fpr,
+ ELF_NFPREG * sizeof(double));
+}
+
+inline unsigned long copy_fpr_from_user(struct task_struct *task,
+ void __user *from)
+{
+ return __copy_from_user(task->thread.fpr, from,
+ ELF_NFPREG * sizeof(double));
+}
+#endif
+
+/*
+ * Save the current user registers on the user stack.
+ * We only save the altivec/spe registers if the process has used
+ * altivec/spe instructions at some point.
+ */
+static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
+ int sigret, int ctx_has_vsx_region)
+{
+ unsigned long msr = regs->msr;
+
+ /* Make sure floating point registers are stored in regs */
+ flush_fp_to_thread(current);
+
+ /* save general registers */
+ if (save_general_regs(regs, frame))
+ return 1;
+
+#ifdef CONFIG_ALTIVEC
+ /* save altivec registers */
+ if (current->thread.used_vr) {
+ flush_altivec_to_thread(current);
+ if (__copy_to_user(&frame->mc_vregs, current->thread.vr,
+ ELF_NVRREG * sizeof(vector128)))
+ return 1;
+ /* set MSR_VEC in the saved MSR value to indicate that
+ frame->mc_vregs contains valid data */
+ msr |= MSR_VEC;
+ }
+ /* else assert((regs->msr & MSR_VEC) == 0) */
+
+ /* We always copy to/from vrsave, it's 0 if we don't have or don't
+ * use altivec. Since VSCR only contains 32 bits saved in the least
+ * significant bits of a vector, we "cheat" and stuff VRSAVE in the
+ * most significant bits of that same vector. --BenH
+ */
+ if (__put_user(current->thread.vrsave, (u32 __user *)&frame->mc_vregs[32]))
+ return 1;
+#endif /* CONFIG_ALTIVEC */
+ if (copy_fpr_to_user(&frame->mc_fregs, current))
+ return 1;
+#ifdef CONFIG_VSX
+ /*
+ * Copy VSR 0-31 upper half from thread_struct to local
+ * buffer, then write that to userspace. Also set MSR_VSX in
+ * the saved MSR value to indicate that frame->mc_vregs
+ * contains valid data
+ */
+ if (current->thread.used_vsr && ctx_has_vsx_region) {
+ __giveup_vsx(current);
+ if (copy_vsx_to_user(&frame->mc_vsregs, current))
+ return 1;
+ msr |= MSR_VSX;
+ }
+#endif /* CONFIG_VSX */
+#ifdef CONFIG_SPE
+ /* save spe registers */
+ if (current->thread.used_spe) {
+ flush_spe_to_thread(current);
+ if (__copy_to_user(&frame->mc_vregs, current->thread.evr,
+ ELF_NEVRREG * sizeof(u32)))
+ return 1;
+ /* set MSR_SPE in the saved MSR value to indicate that
+ frame->mc_vregs contains valid data */
+ msr |= MSR_SPE;
+ }
+ /* else assert((regs->msr & MSR_SPE) == 0) */
+
+ /* We always copy to/from spefscr */
+ if (__put_user(current->thread.spefscr, (u32 __user *)&frame->mc_vregs + ELF_NEVRREG))
+ return 1;
+#endif /* CONFIG_SPE */
+
+ if (__put_user(msr, &frame->mc_gregs[PT_MSR]))
+ return 1;
+ if (sigret) {
+ /* Set up the sigreturn trampoline: li r0,sigret; sc */
+ if (__put_user(0x38000000UL + sigret, &frame->tramp[0])
+ || __put_user(0x44000002UL, &frame->tramp[1]))
+ return 1;
+ flush_icache_range((unsigned long) &frame->tramp[0],
+ (unsigned long) &frame->tramp[2]);
+ }
+
+ return 0;
+}
+
+/*
+ * Restore the current user register values from the user stack,
+ * (except for MSR).
+ */
+static long restore_user_regs(struct pt_regs *regs,
+ struct mcontext __user *sr, int sig)
+{
+ long err;
+ unsigned int save_r2 = 0;
+ unsigned long msr;
+#ifdef CONFIG_VSX
+ int i;
+#endif
+
+ /*
+ * restore general registers but not including MSR or SOFTE. Also
+ * take care of keeping r2 (TLS) intact if not a signal
+ */
+ if (!sig)
+ save_r2 = (unsigned int)regs->gpr[2];
+ err = restore_general_regs(regs, sr);
+ regs->trap = 0;
+ err |= __get_user(msr, &sr->mc_gregs[PT_MSR]);
+ if (!sig)
+ regs->gpr[2] = (unsigned long) save_r2;
+ if (err)
+ return 1;
+
+ /* if doing signal return, restore the previous little-endian mode */
+ if (sig)
+ regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
+
+ /*
+ * Do this before updating the thread state in
+ * current->thread.fpr/vr/evr. That way, if we get preempted
+ * and another task grabs the FPU/Altivec/SPE, it won't be
+ * tempted to save the current CPU state into the thread_struct
+ * and corrupt what we are writing there.
+ */
+ discard_lazy_cpu_state();
+
+#ifdef CONFIG_ALTIVEC
+ /*
+ * Force the process to reload the altivec registers from
+ * current->thread when it next does altivec instructions
+ */
+ regs->msr &= ~MSR_VEC;
+ if (msr & MSR_VEC) {
+ /* restore altivec registers from the stack */
+ if (__copy_from_user(current->thread.vr, &sr->mc_vregs,
+ sizeof(sr->mc_vregs)))
+ return 1;
+ } else if (current->thread.used_vr)
+ memset(current->thread.vr, 0, ELF_NVRREG * sizeof(vector128));
+
+ /* Always get VRSAVE back */
+ if (__get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32]))
+ return 1;
+#endif /* CONFIG_ALTIVEC */
+ if (copy_fpr_from_user(current, &sr->mc_fregs))
+ return 1;
+
+#ifdef CONFIG_VSX
+ /*
+ * Force the process to reload the VSX registers from
+ * current->thread when it next does VSX instruction.
+ */
+ regs->msr &= ~MSR_VSX;
+ if (msr & MSR_VSX) {
+ /*
+ * Restore altivec registers from the stack to a local
+ * buffer, then write this out to the thread_struct
+ */
+ if (copy_vsx_from_user(current, &sr->mc_vsregs))
+ return 1;
+ } else if (current->thread.used_vsr)
+ for (i = 0; i < 32 ; i++)
+ current->thread.fpr[i][TS_VSRLOWOFFSET] = 0;
+#endif /* CONFIG_VSX */
+ /*
+ * force the process to reload the FP registers from
+ * current->thread when it next does FP instructions
+ */
+ regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1);
+
+#ifdef CONFIG_SPE
+ /* force the process to reload the spe registers from
+ current->thread when it next does spe instructions */
+ regs->msr &= ~MSR_SPE;
+ if (msr & MSR_SPE) {
+ /* restore spe registers from the stack */
+ if (__copy_from_user(current->thread.evr, &sr->mc_vregs,
+ ELF_NEVRREG * sizeof(u32)))
+ return 1;
+ } else if (current->thread.used_spe)
+ memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32));
+
+ /* Always get SPEFSCR back */
+ if (__get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs + ELF_NEVRREG))
+ return 1;
+#endif /* CONFIG_SPE */
+
+ return 0;
+}
+
+#ifdef CONFIG_PPC64
+long compat_sys_rt_sigaction(int sig, const struct sigaction32 __user *act,
+ struct sigaction32 __user *oact, size_t sigsetsize)
+{
+ struct k_sigaction new_ka, old_ka;
+ int ret;
+
+ /* XXX: Don't preclude handling different sized sigset_t's. */
+ if (sigsetsize != sizeof(compat_sigset_t))
+ return -EINVAL;
+
+ if (act) {
+ compat_uptr_t handler;
+
+ ret = get_user(handler, &act->sa_handler);
+ new_ka.sa.sa_handler = compat_ptr(handler);
+ ret |= get_sigset_t(&new_ka.sa.sa_mask, &act->sa_mask);
+ ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags);
+ if (ret)
+ return -EFAULT;
+ }
+
+ ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+ if (!ret && oact) {
+ ret = put_user(to_user_ptr(old_ka.sa.sa_handler), &oact->sa_handler);
+ ret |= put_sigset_t(&oact->sa_mask, &old_ka.sa.sa_mask);
+ ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+ }
+ return ret;
+}
+
+/*
+ * Note: it is necessary to treat how as an unsigned int, with the
+ * corresponding cast to a signed int to insure that the proper
+ * conversion (sign extension) between the register representation
+ * of a signed int (msr in 32-bit mode) and the register representation
+ * of a signed int (msr in 64-bit mode) is performed.
+ */
+long compat_sys_rt_sigprocmask(u32 how, compat_sigset_t __user *set,
+ compat_sigset_t __user *oset, size_t sigsetsize)
+{
+ sigset_t s;
+ sigset_t __user *up;
+ int ret;
+ mm_segment_t old_fs = get_fs();
+
+ if (set) {
+ if (get_sigset_t(&s, set))
+ return -EFAULT;
+ }
+
+ set_fs(KERNEL_DS);
+ /* This is valid because of the set_fs() */
+ up = (sigset_t __user *) &s;
+ ret = sys_rt_sigprocmask((int)how, set ? up : NULL, oset ? up : NULL,
+ sigsetsize);
+ set_fs(old_fs);
+ if (ret)
+ return ret;
+ if (oset) {
+ if (put_sigset_t(oset, &s))
+ return -EFAULT;
+ }
+ return 0;
+}
+
+long compat_sys_rt_sigpending(compat_sigset_t __user *set, compat_size_t sigsetsize)
+{
+ sigset_t s;
+ int ret;
+ mm_segment_t old_fs = get_fs();
+
+ set_fs(KERNEL_DS);
+ /* The __user pointer cast is valid because of the set_fs() */
+ ret = sys_rt_sigpending((sigset_t __user *) &s, sigsetsize);
+ set_fs(old_fs);
+ if (!ret) {
+ if (put_sigset_t(set, &s))
+ return -EFAULT;
+ }
+ return ret;
+}
+
+
+int copy_siginfo_to_user32(struct compat_siginfo __user *d, siginfo_t *s)
+{
+ int err;
+
+ if (!access_ok (VERIFY_WRITE, d, sizeof(*d)))
+ return -EFAULT;
+
+ /* If you change siginfo_t structure, please be sure
+ * this code is fixed accordingly.
+ * It should never copy any pad contained in the structure
+ * to avoid security leaks, but must copy the generic
+ * 3 ints plus the relevant union member.
+ * This routine must convert siginfo from 64bit to 32bit as well
+ * at the same time.
+ */
+ err = __put_user(s->si_signo, &d->si_signo);
+ err |= __put_user(s->si_errno, &d->si_errno);
+ err |= __put_user((short)s->si_code, &d->si_code);
+ if (s->si_code < 0)
+ err |= __copy_to_user(&d->_sifields._pad, &s->_sifields._pad,
+ SI_PAD_SIZE32);
+ else switch(s->si_code >> 16) {
+ case __SI_CHLD >> 16:
+ err |= __put_user(s->si_pid, &d->si_pid);
+ err |= __put_user(s->si_uid, &d->si_uid);
+ err |= __put_user(s->si_utime, &d->si_utime);
+ err |= __put_user(s->si_stime, &d->si_stime);
+ err |= __put_user(s->si_status, &d->si_status);
+ break;
+ case __SI_FAULT >> 16:
+ err |= __put_user((unsigned int)(unsigned long)s->si_addr,
+ &d->si_addr);
+ break;
+ case __SI_POLL >> 16:
+ err |= __put_user(s->si_band, &d->si_band);
+ err |= __put_user(s->si_fd, &d->si_fd);
+ break;
+ case __SI_TIMER >> 16:
+ err |= __put_user(s->si_tid, &d->si_tid);
+ err |= __put_user(s->si_overrun, &d->si_overrun);
+ err |= __put_user(s->si_int, &d->si_int);
+ break;
+ case __SI_RT >> 16: /* This is not generated by the kernel as of now. */
+ case __SI_MESGQ >> 16:
+ err |= __put_user(s->si_int, &d->si_int);
+ /* fallthrough */
+ case __SI_KILL >> 16:
+ default:
+ err |= __put_user(s->si_pid, &d->si_pid);
+ err |= __put_user(s->si_uid, &d->si_uid);
+ break;
+ }
+ return err;
+}
+
+#define copy_siginfo_to_user copy_siginfo_to_user32
+
+int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from)
+{
+ memset(to, 0, sizeof *to);
+
+ if (copy_from_user(to, from, 3*sizeof(int)) ||
+ copy_from_user(to->_sifields._pad,
+ from->_sifields._pad, SI_PAD_SIZE32))
+ return -EFAULT;
+
+ return 0;
+}
+
+/*
+ * Note: it is necessary to treat pid and sig as unsigned ints, with the
+ * corresponding cast to a signed int to insure that the proper conversion
+ * (sign extension) between the register representation of a signed int
+ * (msr in 32-bit mode) and the register representation of a signed int
+ * (msr in 64-bit mode) is performed.
+ */
+long compat_sys_rt_sigqueueinfo(u32 pid, u32 sig, compat_siginfo_t __user *uinfo)
+{
+ siginfo_t info;
+ int ret;
+ mm_segment_t old_fs = get_fs();
+
+ ret = copy_siginfo_from_user32(&info, uinfo);
+ if (unlikely(ret))
+ return ret;
+
+ set_fs (KERNEL_DS);
+ /* The __user pointer cast is valid becasuse of the set_fs() */
+ ret = sys_rt_sigqueueinfo((int)pid, (int)sig, (siginfo_t __user *) &info);
+ set_fs (old_fs);
+ return ret;
+}
+/*
+ * Start Alternate signal stack support
+ *
+ * System Calls
+ * sigaltatck compat_sys_sigaltstack
+ */
+
+int compat_sys_sigaltstack(u32 __new, u32 __old, int r5,
+ int r6, int r7, int r8, struct pt_regs *regs)
+{
+ stack_32_t __user * newstack = compat_ptr(__new);
+ stack_32_t __user * oldstack = compat_ptr(__old);
+ stack_t uss, uoss;
+ int ret;
+ mm_segment_t old_fs;
+ unsigned long sp;
+ compat_uptr_t ss_sp;
+
+ /*
+ * set sp to the user stack on entry to the system call
+ * the system call router sets R9 to the saved registers
+ */
+ sp = regs->gpr[1];
+
+ /* Put new stack info in local 64 bit stack struct */
+ if (newstack) {
+ if (get_user(ss_sp, &newstack->ss_sp) ||
+ __get_user(uss.ss_flags, &newstack->ss_flags) ||
+ __get_user(uss.ss_size, &newstack->ss_size))
+ return -EFAULT;
+ uss.ss_sp = compat_ptr(ss_sp);
+ }
+
+ old_fs = get_fs();
+ set_fs(KERNEL_DS);
+ /* The __user pointer casts are valid because of the set_fs() */
+ ret = do_sigaltstack(
+ newstack ? (stack_t __user *) &uss : NULL,
+ oldstack ? (stack_t __user *) &uoss : NULL,
+ sp);
+ set_fs(old_fs);
+ /* Copy the stack information to the user output buffer */
+ if (!ret && oldstack &&
+ (put_user(ptr_to_compat(uoss.ss_sp), &oldstack->ss_sp) ||
+ __put_user(uoss.ss_flags, &oldstack->ss_flags) ||
+ __put_user(uoss.ss_size, &oldstack->ss_size)))
+ return -EFAULT;
+ return ret;
+}
+#endif /* CONFIG_PPC64 */
+
+/*
+ * Set up a signal frame for a "real-time" signal handler
+ * (one which gets siginfo).
+ */
+int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka,
+ siginfo_t *info, sigset_t *oldset,
+ struct pt_regs *regs)
+{
+ struct rt_sigframe __user *rt_sf;
+ struct mcontext __user *frame;
+ void __user *addr;
+ unsigned long newsp = 0;
+
+ /* Set up Signal Frame */
+ /* Put a Real Time Context onto stack */
+ rt_sf = get_sigframe(ka, regs, sizeof(*rt_sf), 1);
+ addr = rt_sf;
+ if (unlikely(rt_sf == NULL))
+ goto badframe;
+
+ /* Put the siginfo & fill in most of the ucontext */
+ if (copy_siginfo_to_user(&rt_sf->info, info)
+ || __put_user(0, &rt_sf->uc.uc_flags)
+ || __put_user(0, &rt_sf->uc.uc_link)
+ || __put_user(current->sas_ss_sp, &rt_sf->uc.uc_stack.ss_sp)
+ || __put_user(sas_ss_flags(regs->gpr[1]),
+ &rt_sf->uc.uc_stack.ss_flags)
+ || __put_user(current->sas_ss_size, &rt_sf->uc.uc_stack.ss_size)
+ || __put_user(to_user_ptr(&rt_sf->uc.uc_mcontext),
+ &rt_sf->uc.uc_regs)
+ || put_sigset_t(&rt_sf->uc.uc_sigmask, oldset))
+ goto badframe;
+
+ /* Save user registers on the stack */
+ frame = &rt_sf->uc.uc_mcontext;
+ addr = frame;
+ if (vdso32_rt_sigtramp && current->mm->context.vdso_base) {
+ if (save_user_regs(regs, frame, 0, 1))
+ goto badframe;
+ regs->link = current->mm->context.vdso_base + vdso32_rt_sigtramp;
+ } else {
+ if (save_user_regs(regs, frame, __NR_rt_sigreturn, 1))
+ goto badframe;
+ regs->link = (unsigned long) frame->tramp;
+ }
+
+ current->thread.fpscr.val = 0; /* turn off all fp exceptions */
+
+ /* create a stack frame for the caller of the handler */
+ newsp = ((unsigned long)rt_sf) - (__SIGNAL_FRAMESIZE + 16);
+ addr = (void __user *)regs->gpr[1];
+ if (put_user(regs->gpr[1], (u32 __user *)newsp))
+ goto badframe;
+
+ /* Fill registers for signal handler */
+ regs->gpr[1] = newsp;
+ regs->gpr[3] = sig;
+ regs->gpr[4] = (unsigned long) &rt_sf->info;
+ regs->gpr[5] = (unsigned long) &rt_sf->uc;
+ regs->gpr[6] = (unsigned long) rt_sf;
+ regs->nip = (unsigned long) ka->sa.sa_handler;
+ /* enter the signal handler in big-endian mode */
+ regs->msr &= ~MSR_LE;
+ return 1;
+
+badframe:
+#ifdef DEBUG_SIG
+ printk("badframe in handle_rt_signal, regs=%p frame=%p newsp=%lx\n",
+ regs, frame, newsp);
+#endif
+ if (show_unhandled_signals)
+ printk_ratelimited(KERN_INFO
+ "%s[%d]: bad frame in handle_rt_signal32: "
+ "%p nip %08lx lr %08lx\n",
+ current->comm, current->pid,
+ addr, regs->nip, regs->link);
+
+ force_sigsegv(sig, current);
+ return 0;
+}
+
+static int do_setcontext(struct ucontext __user *ucp, struct pt_regs *regs, int sig)
+{
+ sigset_t set;
+ struct mcontext __user *mcp;
+
+ if (get_sigset_t(&set, &ucp->uc_sigmask))
+ return -EFAULT;
+#ifdef CONFIG_PPC64
+ {
+ u32 cmcp;
+
+ if (__get_user(cmcp, &ucp->uc_regs))
+ return -EFAULT;
+ mcp = (struct mcontext __user *)(u64)cmcp;
+ /* no need to check access_ok(mcp), since mcp < 4GB */
+ }
+#else
+ if (__get_user(mcp, &ucp->uc_regs))
+ return -EFAULT;
+ if (!access_ok(VERIFY_READ, mcp, sizeof(*mcp)))
+ return -EFAULT;
+#endif
+ restore_sigmask(&set);
+ if (restore_user_regs(regs, mcp, sig))
+ return -EFAULT;
+
+ return 0;
+}
+
+long sys_swapcontext(struct ucontext __user *old_ctx,
+ struct ucontext __user *new_ctx,
+ int ctx_size, int r6, int r7, int r8, struct pt_regs *regs)
+{
+ unsigned char tmp;
+ int ctx_has_vsx_region = 0;
+
+#ifdef CONFIG_PPC64
+ unsigned long new_msr = 0;
+
+ if (new_ctx) {
+ struct mcontext __user *mcp;
+ u32 cmcp;
+
+ /*
+ * Get pointer to the real mcontext. No need for
+ * access_ok since we are dealing with compat
+ * pointers.
+ */
+ if (__get_user(cmcp, &new_ctx->uc_regs))
+ return -EFAULT;
+ mcp = (struct mcontext __user *)(u64)cmcp;
+ if (__get_user(new_msr, &mcp->mc_gregs[PT_MSR]))
+ return -EFAULT;
+ }
+ /*
+ * Check that the context is not smaller than the original
+ * size (with VMX but without VSX)
+ */
+ if (ctx_size < UCONTEXTSIZEWITHOUTVSX)
+ return -EINVAL;
+ /*
+ * If the new context state sets the MSR VSX bits but
+ * it doesn't provide VSX state.
+ */
+ if ((ctx_size < sizeof(struct ucontext)) &&
+ (new_msr & MSR_VSX))
+ return -EINVAL;
+ /* Does the context have enough room to store VSX data? */
+ if (ctx_size >= sizeof(struct ucontext))
+ ctx_has_vsx_region = 1;
+#else
+ /* Context size is for future use. Right now, we only make sure
+ * we are passed something we understand
+ */
+ if (ctx_size < sizeof(struct ucontext))
+ return -EINVAL;
+#endif
+ if (old_ctx != NULL) {
+ struct mcontext __user *mctx;
+
+ /*
+ * old_ctx might not be 16-byte aligned, in which
+ * case old_ctx->uc_mcontext won't be either.
+ * Because we have the old_ctx->uc_pad2 field
+ * before old_ctx->uc_mcontext, we need to round down
+ * from &old_ctx->uc_mcontext to a 16-byte boundary.
+ */
+ mctx = (struct mcontext __user *)
+ ((unsigned long) &old_ctx->uc_mcontext & ~0xfUL);
+ if (!access_ok(VERIFY_WRITE, old_ctx, ctx_size)
+ || save_user_regs(regs, mctx, 0, ctx_has_vsx_region)
+ || put_sigset_t(&old_ctx->uc_sigmask, &current->blocked)
+ || __put_user(to_user_ptr(mctx), &old_ctx->uc_regs))
+ return -EFAULT;
+ }
+ if (new_ctx == NULL)
+ return 0;
+ if (!access_ok(VERIFY_READ, new_ctx, ctx_size)
+ || __get_user(tmp, (u8 __user *) new_ctx)
+ || __get_user(tmp, (u8 __user *) new_ctx + ctx_size - 1))
+ return -EFAULT;
+
+ /*
+ * If we get a fault copying the context into the kernel's
+ * image of the user's registers, we can't just return -EFAULT
+ * because the user's registers will be corrupted. For instance
+ * the NIP value may have been updated but not some of the
+ * other registers. Given that we have done the access_ok
+ * and successfully read the first and last bytes of the region
+ * above, this should only happen in an out-of-memory situation
+ * or if another thread unmaps the region containing the context.
+ * We kill the task with a SIGSEGV in this situation.
+ */
+ if (do_setcontext(new_ctx, regs, 0))
+ do_exit(SIGSEGV);
+
+ set_thread_flag(TIF_RESTOREALL);
+ return 0;
+}
+
+long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
+ struct pt_regs *regs)
+{
+ struct rt_sigframe __user *rt_sf;
+
+ /* Always make any pending restarted system calls return -EINTR */
+ current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
+ rt_sf = (struct rt_sigframe __user *)
+ (regs->gpr[1] + __SIGNAL_FRAMESIZE + 16);
+ if (!access_ok(VERIFY_READ, rt_sf, sizeof(*rt_sf)))
+ goto bad;
+ if (do_setcontext(&rt_sf->uc, regs, 1))
+ goto bad;
+
+ /*
+ * It's not clear whether or why it is desirable to save the
+ * sigaltstack setting on signal delivery and restore it on
+ * signal return. But other architectures do this and we have
+ * always done it up until now so it is probably better not to
+ * change it. -- paulus
+ */
+#ifdef CONFIG_PPC64
+ /*
+ * We use the compat_sys_ version that does the 32/64 bits conversion
+ * and takes userland pointer directly. What about error checking ?
+ * nobody does any...
+ */
+ compat_sys_sigaltstack((u32)(u64)&rt_sf->uc.uc_stack, 0, 0, 0, 0, 0, regs);
+#else
+ do_sigaltstack(&rt_sf->uc.uc_stack, NULL, regs->gpr[1]);
+#endif
+ set_thread_flag(TIF_RESTOREALL);
+ return 0;
+
+ bad:
+ if (show_unhandled_signals)
+ printk_ratelimited(KERN_INFO
+ "%s[%d]: bad frame in sys_rt_sigreturn: "
+ "%p nip %08lx lr %08lx\n",
+ current->comm, current->pid,
+ rt_sf, regs->nip, regs->link);
+
+ force_sig(SIGSEGV, current);
+ return 0;
+}
+
+#ifdef CONFIG_PPC32
+int sys_debug_setcontext(struct ucontext __user *ctx,
+ int ndbg, struct sig_dbg_op __user *dbg,
+ int r6, int r7, int r8,
+ struct pt_regs *regs)
+{
+ struct sig_dbg_op op;
+ int i;
+ unsigned char tmp;
+ unsigned long new_msr = regs->msr;
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+ unsigned long new_dbcr0 = current->thread.dbcr0;
+#endif
+
+ for (i=0; i<ndbg; i++) {
+ if (copy_from_user(&op, dbg + i, sizeof(op)))
+ return -EFAULT;
+ switch (op.dbg_type) {
+ case SIG_DBG_SINGLE_STEPPING:
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+ if (op.dbg_value) {
+ new_msr |= MSR_DE;
+ new_dbcr0 |= (DBCR0_IDM | DBCR0_IC);
+ } else {
+ new_dbcr0 &= ~DBCR0_IC;
+ if (!DBCR_ACTIVE_EVENTS(new_dbcr0,
+ current->thread.dbcr1)) {
+ new_msr &= ~MSR_DE;
+ new_dbcr0 &= ~DBCR0_IDM;
+ }
+ }
+#else
+ if (op.dbg_value)
+ new_msr |= MSR_SE;
+ else
+ new_msr &= ~MSR_SE;
+#endif
+ break;
+ case SIG_DBG_BRANCH_TRACING:
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+ return -EINVAL;
+#else
+ if (op.dbg_value)
+ new_msr |= MSR_BE;
+ else
+ new_msr &= ~MSR_BE;
+#endif
+ break;
+
+ default:
+ return -EINVAL;
+ }
+ }
+
+ /* We wait until here to actually install the values in the
+ registers so if we fail in the above loop, it will not
+ affect the contents of these registers. After this point,
+ failure is a problem, anyway, and it's very unlikely unless
+ the user is really doing something wrong. */
+ regs->msr = new_msr;
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+ current->thread.dbcr0 = new_dbcr0;
+#endif
+
+ if (!access_ok(VERIFY_READ, ctx, sizeof(*ctx))
+ || __get_user(tmp, (u8 __user *) ctx)
+ || __get_user(tmp, (u8 __user *) (ctx + 1) - 1))
+ return -EFAULT;
+
+ /*
+ * If we get a fault copying the context into the kernel's
+ * image of the user's registers, we can't just return -EFAULT
+ * because the user's registers will be corrupted. For instance
+ * the NIP value may have been updated but not some of the
+ * other registers. Given that we have done the access_ok
+ * and successfully read the first and last bytes of the region
+ * above, this should only happen in an out-of-memory situation
+ * or if another thread unmaps the region containing the context.
+ * We kill the task with a SIGSEGV in this situation.
+ */
+ if (do_setcontext(ctx, regs, 1)) {
+ if (show_unhandled_signals)
+ printk_ratelimited(KERN_INFO "%s[%d]: bad frame in "
+ "sys_debug_setcontext: %p nip %08lx "
+ "lr %08lx\n",
+ current->comm, current->pid,
+ ctx, regs->nip, regs->link);
+
+ force_sig(SIGSEGV, current);
+ goto out;
+ }
+
+ /*
+ * It's not clear whether or why it is desirable to save the
+ * sigaltstack setting on signal delivery and restore it on
+ * signal return. But other architectures do this and we have
+ * always done it up until now so it is probably better not to
+ * change it. -- paulus
+ */
+ do_sigaltstack(&ctx->uc_stack, NULL, regs->gpr[1]);
+
+ set_thread_flag(TIF_RESTOREALL);
+ out:
+ return 0;
+}
+#endif
+
+/*
+ * OK, we're invoking a handler
+ */
+int handle_signal32(unsigned long sig, struct k_sigaction *ka,
+ siginfo_t *info, sigset_t *oldset, struct pt_regs *regs)
+{
+ struct sigcontext __user *sc;
+ struct sigframe __user *frame;
+ unsigned long newsp = 0;
+
+ /* Set up Signal Frame */
+ frame = get_sigframe(ka, regs, sizeof(*frame), 1);
+ if (unlikely(frame == NULL))
+ goto badframe;
+ sc = (struct sigcontext __user *) &frame->sctx;
+
+#if _NSIG != 64
+#error "Please adjust handle_signal()"
+#endif
+ if (__put_user(to_user_ptr(ka->sa.sa_handler), &sc->handler)
+ || __put_user(oldset->sig[0], &sc->oldmask)
+#ifdef CONFIG_PPC64
+ || __put_user((oldset->sig[0] >> 32), &sc->_unused[3])
+#else
+ || __put_user(oldset->sig[1], &sc->_unused[3])
+#endif
+ || __put_user(to_user_ptr(&frame->mctx), &sc->regs)
+ || __put_user(sig, &sc->signal))
+ goto badframe;
+
+ if (vdso32_sigtramp && current->mm->context.vdso_base) {
+ if (save_user_regs(regs, &frame->mctx, 0, 1))
+ goto badframe;
+ regs->link = current->mm->context.vdso_base + vdso32_sigtramp;
+ } else {
+ if (save_user_regs(regs, &frame->mctx, __NR_sigreturn, 1))
+ goto badframe;
+ regs->link = (unsigned long) frame->mctx.tramp;
+ }
+
+ current->thread.fpscr.val = 0; /* turn off all fp exceptions */
+
+ /* create a stack frame for the caller of the handler */
+ newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
+ if (put_user(regs->gpr[1], (u32 __user *)newsp))
+ goto badframe;
+
+ regs->gpr[1] = newsp;
+ regs->gpr[3] = sig;
+ regs->gpr[4] = (unsigned long) sc;
+ regs->nip = (unsigned long) ka->sa.sa_handler;
+ /* enter the signal handler in big-endian mode */
+ regs->msr &= ~MSR_LE;
+
+ return 1;
+
+badframe:
+#ifdef DEBUG_SIG
+ printk("badframe in handle_signal, regs=%p frame=%p newsp=%lx\n",
+ regs, frame, newsp);
+#endif
+ if (show_unhandled_signals)
+ printk_ratelimited(KERN_INFO
+ "%s[%d]: bad frame in handle_signal32: "
+ "%p nip %08lx lr %08lx\n",
+ current->comm, current->pid,
+ frame, regs->nip, regs->link);
+
+ force_sigsegv(sig, current);
+ return 0;
+}
+
+/*
+ * Do a signal return; undo the signal stack.
+ */
+long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
+ struct pt_regs *regs)
+{
+ struct sigcontext __user *sc;
+ struct sigcontext sigctx;
+ struct mcontext __user *sr;
+ void __user *addr;
+ sigset_t set;
+
+ /* Always make any pending restarted system calls return -EINTR */
+ current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
+ sc = (struct sigcontext __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE);
+ addr = sc;
+ if (copy_from_user(&sigctx, sc, sizeof(sigctx)))
+ goto badframe;
+
+#ifdef CONFIG_PPC64
+ /*
+ * Note that PPC32 puts the upper 32 bits of the sigmask in the
+ * unused part of the signal stackframe
+ */
+ set.sig[0] = sigctx.oldmask + ((long)(sigctx._unused[3]) << 32);
+#else
+ set.sig[0] = sigctx.oldmask;
+ set.sig[1] = sigctx._unused[3];
+#endif
+ restore_sigmask(&set);
+
+ sr = (struct mcontext __user *)from_user_ptr(sigctx.regs);
+ addr = sr;
+ if (!access_ok(VERIFY_READ, sr, sizeof(*sr))
+ || restore_user_regs(regs, sr, 1))
+ goto badframe;
+
+ set_thread_flag(TIF_RESTOREALL);
+ return 0;
+
+badframe:
+ if (show_unhandled_signals)
+ printk_ratelimited(KERN_INFO
+ "%s[%d]: bad frame in sys_sigreturn: "
+ "%p nip %08lx lr %08lx\n",
+ current->comm, current->pid,
+ addr, regs->nip, regs->link);
+
+ force_sig(SIGSEGV, current);
+ return 0;
+}
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
new file mode 100644
index 00000000..2692efdb
--- /dev/null
+++ b/arch/powerpc/kernel/signal_64.c
@@ -0,0 +1,479 @@
+/*
+ * PowerPC version
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Derived from "arch/i386/kernel/signal.c"
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/elf.h>
+#include <linux/ptrace.h>
+#include <linux/ratelimit.h>
+
+#include <asm/sigcontext.h>
+#include <asm/ucontext.h>
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/unistd.h>
+#include <asm/cacheflush.h>
+#include <asm/syscalls.h>
+#include <asm/vdso.h>
+#include <asm/switch_to.h>
+
+#include "signal.h"
+
+#define DEBUG_SIG 0
+
+#define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs))
+#define FP_REGS_SIZE sizeof(elf_fpregset_t)
+
+#define TRAMP_TRACEBACK 3
+#define TRAMP_SIZE 6
+
+/*
+ * When we have signals to deliver, we set up on the user stack,
+ * going down from the original stack pointer:
+ * 1) a rt_sigframe struct which contains the ucontext
+ * 2) a gap of __SIGNAL_FRAMESIZE bytes which acts as a dummy caller
+ * frame for the signal handler.
+ */
+
+struct rt_sigframe {
+ /* sys_rt_sigreturn requires the ucontext be the first field */
+ struct ucontext uc;
+ unsigned long _unused[2];
+ unsigned int tramp[TRAMP_SIZE];
+ struct siginfo __user *pinfo;
+ void __user *puc;
+ struct siginfo info;
+ /* 64 bit ABI allows for 288 bytes below sp before decrementing it. */
+ char abigap[288];
+} __attribute__ ((aligned (16)));
+
+static const char fmt32[] = KERN_INFO \
+ "%s[%d]: bad frame in %s: %08lx nip %08lx lr %08lx\n";
+static const char fmt64[] = KERN_INFO \
+ "%s[%d]: bad frame in %s: %016lx nip %016lx lr %016lx\n";
+
+/*
+ * Set up the sigcontext for the signal frame.
+ */
+
+static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
+ int signr, sigset_t *set, unsigned long handler,
+ int ctx_has_vsx_region)
+{
+ /* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the
+ * process never used altivec yet (MSR_VEC is zero in pt_regs of
+ * the context). This is very important because we must ensure we
+ * don't lose the VRSAVE content that may have been set prior to
+ * the process doing its first vector operation
+ * Userland shall check AT_HWCAP to know wether it can rely on the
+ * v_regs pointer or not
+ */
+#ifdef CONFIG_ALTIVEC
+ elf_vrreg_t __user *v_regs = (elf_vrreg_t __user *)(((unsigned long)sc->vmx_reserve + 15) & ~0xful);
+#endif
+ unsigned long msr = regs->msr;
+ long err = 0;
+
+ flush_fp_to_thread(current);
+
+#ifdef CONFIG_ALTIVEC
+ err |= __put_user(v_regs, &sc->v_regs);
+
+ /* save altivec registers */
+ if (current->thread.used_vr) {
+ flush_altivec_to_thread(current);
+ /* Copy 33 vec registers (vr0..31 and vscr) to the stack */
+ err |= __copy_to_user(v_regs, current->thread.vr, 33 * sizeof(vector128));
+ /* set MSR_VEC in the MSR value in the frame to indicate that sc->v_reg)
+ * contains valid data.
+ */
+ msr |= MSR_VEC;
+ }
+ /* We always copy to/from vrsave, it's 0 if we don't have or don't
+ * use altivec.
+ */
+ err |= __put_user(current->thread.vrsave, (u32 __user *)&v_regs[33]);
+#else /* CONFIG_ALTIVEC */
+ err |= __put_user(0, &sc->v_regs);
+#endif /* CONFIG_ALTIVEC */
+ flush_fp_to_thread(current);
+ /* copy fpr regs and fpscr */
+ err |= copy_fpr_to_user(&sc->fp_regs, current);
+#ifdef CONFIG_VSX
+ /*
+ * Copy VSX low doubleword to local buffer for formatting,
+ * then out to userspace. Update v_regs to point after the
+ * VMX data.
+ */
+ if (current->thread.used_vsr && ctx_has_vsx_region) {
+ __giveup_vsx(current);
+ v_regs += ELF_NVRREG;
+ err |= copy_vsx_to_user(v_regs, current);
+ /* set MSR_VSX in the MSR value in the frame to
+ * indicate that sc->vs_reg) contains valid data.
+ */
+ msr |= MSR_VSX;
+ }
+#endif /* CONFIG_VSX */
+ err |= __put_user(&sc->gp_regs, &sc->regs);
+ WARN_ON(!FULL_REGS(regs));
+ err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE);
+ err |= __put_user(msr, &sc->gp_regs[PT_MSR]);
+ err |= __put_user(signr, &sc->signal);
+ err |= __put_user(handler, &sc->handler);
+ if (set != NULL)
+ err |= __put_user(set->sig[0], &sc->oldmask);
+
+ return err;
+}
+
+/*
+ * Restore the sigcontext from the signal frame.
+ */
+
+static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig,
+ struct sigcontext __user *sc)
+{
+#ifdef CONFIG_ALTIVEC
+ elf_vrreg_t __user *v_regs;
+#endif
+ unsigned long err = 0;
+ unsigned long save_r13 = 0;
+ unsigned long msr;
+#ifdef CONFIG_VSX
+ int i;
+#endif
+
+ /* If this is not a signal return, we preserve the TLS in r13 */
+ if (!sig)
+ save_r13 = regs->gpr[13];
+
+ /* copy the GPRs */
+ err |= __copy_from_user(regs->gpr, sc->gp_regs, sizeof(regs->gpr));
+ err |= __get_user(regs->nip, &sc->gp_regs[PT_NIP]);
+ /* get MSR separately, transfer the LE bit if doing signal return */
+ err |= __get_user(msr, &sc->gp_regs[PT_MSR]);
+ if (sig)
+ regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
+ err |= __get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3]);
+ err |= __get_user(regs->ctr, &sc->gp_regs[PT_CTR]);
+ err |= __get_user(regs->link, &sc->gp_regs[PT_LNK]);
+ err |= __get_user(regs->xer, &sc->gp_regs[PT_XER]);
+ err |= __get_user(regs->ccr, &sc->gp_regs[PT_CCR]);
+ /* skip SOFTE */
+ regs->trap = 0;
+ err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]);
+ err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]);
+ err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]);
+
+ if (!sig)
+ regs->gpr[13] = save_r13;
+ if (set != NULL)
+ err |= __get_user(set->sig[0], &sc->oldmask);
+
+ /*
+ * Do this before updating the thread state in
+ * current->thread.fpr/vr. That way, if we get preempted
+ * and another task grabs the FPU/Altivec, it won't be
+ * tempted to save the current CPU state into the thread_struct
+ * and corrupt what we are writing there.
+ */
+ discard_lazy_cpu_state();
+
+ /*
+ * Force reload of FP/VEC.
+ * This has to be done before copying stuff into current->thread.fpr/vr
+ * for the reasons explained in the previous comment.
+ */
+ regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX);
+
+#ifdef CONFIG_ALTIVEC
+ err |= __get_user(v_regs, &sc->v_regs);
+ if (err)
+ return err;
+ if (v_regs && !access_ok(VERIFY_READ, v_regs, 34 * sizeof(vector128)))
+ return -EFAULT;
+ /* Copy 33 vec registers (vr0..31 and vscr) from the stack */
+ if (v_regs != 0 && (msr & MSR_VEC) != 0)
+ err |= __copy_from_user(current->thread.vr, v_regs,
+ 33 * sizeof(vector128));
+ else if (current->thread.used_vr)
+ memset(current->thread.vr, 0, 33 * sizeof(vector128));
+ /* Always get VRSAVE back */
+ if (v_regs != 0)
+ err |= __get_user(current->thread.vrsave, (u32 __user *)&v_regs[33]);
+ else
+ current->thread.vrsave = 0;
+#endif /* CONFIG_ALTIVEC */
+ /* restore floating point */
+ err |= copy_fpr_from_user(current, &sc->fp_regs);
+#ifdef CONFIG_VSX
+ /*
+ * Get additional VSX data. Update v_regs to point after the
+ * VMX data. Copy VSX low doubleword from userspace to local
+ * buffer for formatting, then into the taskstruct.
+ */
+ v_regs += ELF_NVRREG;
+ if ((msr & MSR_VSX) != 0)
+ err |= copy_vsx_from_user(current, v_regs);
+ else
+ for (i = 0; i < 32 ; i++)
+ current->thread.fpr[i][TS_VSRLOWOFFSET] = 0;
+#endif
+ return err;
+}
+
+/*
+ * Setup the trampoline code on the stack
+ */
+static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp)
+{
+ int i;
+ long err = 0;
+
+ /* addi r1, r1, __SIGNAL_FRAMESIZE # Pop the dummy stackframe */
+ err |= __put_user(0x38210000UL | (__SIGNAL_FRAMESIZE & 0xffff), &tramp[0]);
+ /* li r0, __NR_[rt_]sigreturn| */
+ err |= __put_user(0x38000000UL | (syscall & 0xffff), &tramp[1]);
+ /* sc */
+ err |= __put_user(0x44000002UL, &tramp[2]);
+
+ /* Minimal traceback info */
+ for (i=TRAMP_TRACEBACK; i < TRAMP_SIZE ;i++)
+ err |= __put_user(0, &tramp[i]);
+
+ if (!err)
+ flush_icache_range((unsigned long) &tramp[0],
+ (unsigned long) &tramp[TRAMP_SIZE]);
+
+ return err;
+}
+
+/*
+ * Userspace code may pass a ucontext which doesn't include VSX added
+ * at the end. We need to check for this case.
+ */
+#define UCONTEXTSIZEWITHOUTVSX \
+ (sizeof(struct ucontext) - 32*sizeof(long))
+
+/*
+ * Handle {get,set,swap}_context operations
+ */
+int sys_swapcontext(struct ucontext __user *old_ctx,
+ struct ucontext __user *new_ctx,
+ long ctx_size, long r6, long r7, long r8, struct pt_regs *regs)
+{
+ unsigned char tmp;
+ sigset_t set;
+ unsigned long new_msr = 0;
+ int ctx_has_vsx_region = 0;
+
+ if (new_ctx &&
+ get_user(new_msr, &new_ctx->uc_mcontext.gp_regs[PT_MSR]))
+ return -EFAULT;
+ /*
+ * Check that the context is not smaller than the original
+ * size (with VMX but without VSX)
+ */
+ if (ctx_size < UCONTEXTSIZEWITHOUTVSX)
+ return -EINVAL;
+ /*
+ * If the new context state sets the MSR VSX bits but
+ * it doesn't provide VSX state.
+ */
+ if ((ctx_size < sizeof(struct ucontext)) &&
+ (new_msr & MSR_VSX))
+ return -EINVAL;
+ /* Does the context have enough room to store VSX data? */
+ if (ctx_size >= sizeof(struct ucontext))
+ ctx_has_vsx_region = 1;
+
+ if (old_ctx != NULL) {
+ if (!access_ok(VERIFY_WRITE, old_ctx, ctx_size)
+ || setup_sigcontext(&old_ctx->uc_mcontext, regs, 0, NULL, 0,
+ ctx_has_vsx_region)
+ || __copy_to_user(&old_ctx->uc_sigmask,
+ &current->blocked, sizeof(sigset_t)))
+ return -EFAULT;
+ }
+ if (new_ctx == NULL)
+ return 0;
+ if (!access_ok(VERIFY_READ, new_ctx, ctx_size)
+ || __get_user(tmp, (u8 __user *) new_ctx)
+ || __get_user(tmp, (u8 __user *) new_ctx + ctx_size - 1))
+ return -EFAULT;
+
+ /*
+ * If we get a fault copying the context into the kernel's
+ * image of the user's registers, we can't just return -EFAULT
+ * because the user's registers will be corrupted. For instance
+ * the NIP value may have been updated but not some of the
+ * other registers. Given that we have done the access_ok
+ * and successfully read the first and last bytes of the region
+ * above, this should only happen in an out-of-memory situation
+ * or if another thread unmaps the region containing the context.
+ * We kill the task with a SIGSEGV in this situation.
+ */
+
+ if (__copy_from_user(&set, &new_ctx->uc_sigmask, sizeof(set)))
+ do_exit(SIGSEGV);
+ restore_sigmask(&set);
+ if (restore_sigcontext(regs, NULL, 0, &new_ctx->uc_mcontext))
+ do_exit(SIGSEGV);
+
+ /* This returns like rt_sigreturn */
+ set_thread_flag(TIF_RESTOREALL);
+ return 0;
+}
+
+
+/*
+ * Do a signal return; undo the signal stack.
+ */
+
+int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5,
+ unsigned long r6, unsigned long r7, unsigned long r8,
+ struct pt_regs *regs)
+{
+ struct ucontext __user *uc = (struct ucontext __user *)regs->gpr[1];
+ sigset_t set;
+
+ /* Always make any pending restarted system calls return -EINTR */
+ current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
+ if (!access_ok(VERIFY_READ, uc, sizeof(*uc)))
+ goto badframe;
+
+ if (__copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
+ goto badframe;
+ restore_sigmask(&set);
+ if (restore_sigcontext(regs, NULL, 1, &uc->uc_mcontext))
+ goto badframe;
+
+ /* do_sigaltstack expects a __user pointer and won't modify
+ * what's in there anyway
+ */
+ do_sigaltstack(&uc->uc_stack, NULL, regs->gpr[1]);
+
+ set_thread_flag(TIF_RESTOREALL);
+ return 0;
+
+badframe:
+#if DEBUG_SIG
+ printk("badframe in sys_rt_sigreturn, regs=%p uc=%p &uc->uc_mcontext=%p\n",
+ regs, uc, &uc->uc_mcontext);
+#endif
+ if (show_unhandled_signals)
+ printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32,
+ current->comm, current->pid, "rt_sigreturn",
+ (long)uc, regs->nip, regs->link);
+
+ force_sig(SIGSEGV, current);
+ return 0;
+}
+
+int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info,
+ sigset_t *set, struct pt_regs *regs)
+{
+ /* Handler is *really* a pointer to the function descriptor for
+ * the signal routine. The first entry in the function
+ * descriptor is the entry address of signal and the second
+ * entry is the TOC value we need to use.
+ */
+ func_descr_t __user *funct_desc_ptr;
+ struct rt_sigframe __user *frame;
+ unsigned long newsp = 0;
+ long err = 0;
+
+ frame = get_sigframe(ka, regs, sizeof(*frame), 0);
+ if (unlikely(frame == NULL))
+ goto badframe;
+
+ err |= __put_user(&frame->info, &frame->pinfo);
+ err |= __put_user(&frame->uc, &frame->puc);
+ err |= copy_siginfo_to_user(&frame->info, info);
+ if (err)
+ goto badframe;
+
+ /* Create the ucontext. */
+ err |= __put_user(0, &frame->uc.uc_flags);
+ err |= __put_user(0, &frame->uc.uc_link);
+ err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+ err |= __put_user(sas_ss_flags(regs->gpr[1]),
+ &frame->uc.uc_stack.ss_flags);
+ err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+ err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, signr, NULL,
+ (unsigned long)ka->sa.sa_handler, 1);
+ err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+ if (err)
+ goto badframe;
+
+ /* Make sure signal handler doesn't get spurious FP exceptions */
+ current->thread.fpscr.val = 0;
+
+ /* Set up to return from userspace. */
+ if (vdso64_rt_sigtramp && current->mm->context.vdso_base) {
+ regs->link = current->mm->context.vdso_base + vdso64_rt_sigtramp;
+ } else {
+ err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]);
+ if (err)
+ goto badframe;
+ regs->link = (unsigned long) &frame->tramp[0];
+ }
+ funct_desc_ptr = (func_descr_t __user *) ka->sa.sa_handler;
+
+ /* Allocate a dummy caller frame for the signal handler. */
+ newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
+ err |= put_user(regs->gpr[1], (unsigned long __user *)newsp);
+
+ /* Set up "regs" so we "return" to the signal handler. */
+ err |= get_user(regs->nip, &funct_desc_ptr->entry);
+ /* enter the signal handler in big-endian mode */
+ regs->msr &= ~MSR_LE;
+ regs->gpr[1] = newsp;
+ err |= get_user(regs->gpr[2], &funct_desc_ptr->toc);
+ regs->gpr[3] = signr;
+ regs->result = 0;
+ if (ka->sa.sa_flags & SA_SIGINFO) {
+ err |= get_user(regs->gpr[4], (unsigned long __user *)&frame->pinfo);
+ err |= get_user(regs->gpr[5], (unsigned long __user *)&frame->puc);
+ regs->gpr[6] = (unsigned long) frame;
+ } else {
+ regs->gpr[4] = (unsigned long)&frame->uc.uc_mcontext;
+ }
+ if (err)
+ goto badframe;
+
+ return 1;
+
+badframe:
+#if DEBUG_SIG
+ printk("badframe in setup_rt_frame, regs=%p frame=%p newsp=%lx\n",
+ regs, frame, newsp);
+#endif
+ if (show_unhandled_signals)
+ printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32,
+ current->comm, current->pid, "setup_rt_frame",
+ (long)frame, regs->nip, regs->link);
+
+ force_sigsegv(signr, current);
+ return 0;
+}
diff --git a/arch/powerpc/kernel/smp-tbsync.c b/arch/powerpc/kernel/smp-tbsync.c
new file mode 100644
index 00000000..640de836
--- /dev/null
+++ b/arch/powerpc/kernel/smp-tbsync.c
@@ -0,0 +1,171 @@
+/*
+ * Smp timebase synchronization for ppc.
+ *
+ * Copyright (C) 2003 Samuel Rydh (samuel@ibrium.se)
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/unistd.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/atomic.h>
+#include <asm/smp.h>
+#include <asm/time.h>
+
+#define NUM_ITER 300
+
+enum {
+ kExit=0, kSetAndTest, kTest
+};
+
+static struct {
+ volatile u64 tb;
+ volatile u64 mark;
+ volatile int cmd;
+ volatile int handshake;
+ int filler[2];
+
+ volatile int ack;
+ int filler2[7];
+
+ volatile int race_result;
+} *tbsync;
+
+static volatile int running;
+
+static void __devinit enter_contest(u64 mark, long add)
+{
+ while (get_tb() < mark)
+ tbsync->race_result = add;
+}
+
+void __devinit smp_generic_take_timebase(void)
+{
+ int cmd;
+ u64 tb;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ while (!running)
+ barrier();
+ rmb();
+
+ for (;;) {
+ tbsync->ack = 1;
+ while (!tbsync->handshake)
+ barrier();
+ rmb();
+
+ cmd = tbsync->cmd;
+ tb = tbsync->tb;
+ mb();
+ tbsync->ack = 0;
+ if (cmd == kExit)
+ break;
+
+ while (tbsync->handshake)
+ barrier();
+ if (cmd == kSetAndTest)
+ set_tb(tb >> 32, tb & 0xfffffffful);
+ enter_contest(tbsync->mark, -1);
+ }
+ local_irq_restore(flags);
+}
+
+static int __devinit start_contest(int cmd, long offset, int num)
+{
+ int i, score=0;
+ u64 tb;
+ u64 mark;
+
+ tbsync->cmd = cmd;
+
+ local_irq_disable();
+ for (i = -3; i < num; ) {
+ tb = get_tb() + 400;
+ tbsync->tb = tb + offset;
+ tbsync->mark = mark = tb + 400;
+
+ wmb();
+
+ tbsync->handshake = 1;
+ while (tbsync->ack)
+ barrier();
+
+ while (get_tb() <= tb)
+ barrier();
+ tbsync->handshake = 0;
+ enter_contest(mark, 1);
+
+ while (!tbsync->ack)
+ barrier();
+
+ if (i++ > 0)
+ score += tbsync->race_result;
+ }
+ local_irq_enable();
+ return score;
+}
+
+void __devinit smp_generic_give_timebase(void)
+{
+ int i, score, score2, old, min=0, max=5000, offset=1000;
+
+ pr_debug("Software timebase sync\n");
+
+ /* if this fails then this kernel won't work anyway... */
+ tbsync = kzalloc( sizeof(*tbsync), GFP_KERNEL );
+ mb();
+ running = 1;
+
+ while (!tbsync->ack)
+ barrier();
+
+ pr_debug("Got ack\n");
+
+ /* binary search */
+ for (old = -1; old != offset ; offset = (min+max) / 2) {
+ score = start_contest(kSetAndTest, offset, NUM_ITER);
+
+ pr_debug("score %d, offset %d\n", score, offset );
+
+ if( score > 0 )
+ max = offset;
+ else
+ min = offset;
+ old = offset;
+ }
+ score = start_contest(kSetAndTest, min, NUM_ITER);
+ score2 = start_contest(kSetAndTest, max, NUM_ITER);
+
+ pr_debug("Min %d (score %d), Max %d (score %d)\n",
+ min, score, max, score2);
+ score = abs(score);
+ score2 = abs(score2);
+ offset = (score < score2) ? min : max;
+
+ /* guard against inaccurate mttb */
+ for (i = 0; i < 10; i++) {
+ start_contest(kSetAndTest, offset, NUM_ITER/10);
+
+ if ((score2 = start_contest(kTest, offset, NUM_ITER)) < 0)
+ score2 = -score2;
+ if (score2 <= score || score2 < 20)
+ break;
+ }
+ pr_debug("Final offset: %d (%d/%d)\n", offset, score2, NUM_ITER );
+
+ /* exiting */
+ tbsync->cmd = kExit;
+ wmb();
+ tbsync->handshake = 1;
+ while (tbsync->ack)
+ barrier();
+ tbsync->handshake = 0;
+ kfree(tbsync);
+ tbsync = NULL;
+ running = 0;
+}
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
new file mode 100644
index 00000000..d9f94410
--- /dev/null
+++ b/arch/powerpc/kernel/smp.c
@@ -0,0 +1,785 @@
+/*
+ * SMP support for ppc.
+ *
+ * Written by Cort Dougan (cort@cs.nmt.edu) borrowing a great
+ * deal of code from the sparc and intel versions.
+ *
+ * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
+ *
+ * PowerPC-64 Support added by Dave Engebretsen, Peter Bergner, and
+ * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/cache.h>
+#include <linux/err.h>
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <linux/notifier.h>
+#include <linux/topology.h>
+
+#include <asm/ptrace.h>
+#include <linux/atomic.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/prom.h>
+#include <asm/smp.h>
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/cputhreads.h>
+#include <asm/cputable.h>
+#include <asm/mpic.h>
+#include <asm/vdso_datapage.h>
+#ifdef CONFIG_PPC64
+#include <asm/paca.h>
+#endif
+#include <asm/debug.h>
+
+#ifdef DEBUG
+#include <asm/udbg.h>
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+
+/* Store all idle threads, this can be reused instead of creating
+* a new thread. Also avoids complicated thread destroy functionality
+* for idle threads.
+*/
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * Needed only for CONFIG_HOTPLUG_CPU because __cpuinitdata is
+ * removed after init for !CONFIG_HOTPLUG_CPU.
+ */
+static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
+#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x))
+#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p))
+
+/* State of each CPU during hotplug phases */
+static DEFINE_PER_CPU(int, cpu_state) = { 0 };
+
+#else
+static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
+#define get_idle_for_cpu(x) (idle_thread_array[(x)])
+#define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p))
+#endif
+
+struct thread_info *secondary_ti;
+
+DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
+DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
+
+EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
+EXPORT_PER_CPU_SYMBOL(cpu_core_map);
+
+/* SMP operations for this machine */
+struct smp_ops_t *smp_ops;
+
+/* Can't be static due to PowerMac hackery */
+volatile unsigned int cpu_callin_map[NR_CPUS];
+
+int smt_enabled_at_boot = 1;
+
+static void (*crash_ipi_function_ptr)(struct pt_regs *) = NULL;
+
+#ifdef CONFIG_PPC64
+int __devinit smp_generic_kick_cpu(int nr)
+{
+ BUG_ON(nr < 0 || nr >= NR_CPUS);
+
+ /*
+ * The processor is currently spinning, waiting for the
+ * cpu_start field to become non-zero After we set cpu_start,
+ * the processor will continue on to secondary_start
+ */
+ if (!paca[nr].cpu_start) {
+ paca[nr].cpu_start = 1;
+ smp_mb();
+ return 0;
+ }
+
+#ifdef CONFIG_HOTPLUG_CPU
+ /*
+ * Ok it's not there, so it might be soft-unplugged, let's
+ * try to bring it back
+ */
+ per_cpu(cpu_state, nr) = CPU_UP_PREPARE;
+ smp_wmb();
+ smp_send_reschedule(nr);
+#endif /* CONFIG_HOTPLUG_CPU */
+
+ return 0;
+}
+#endif /* CONFIG_PPC64 */
+
+static irqreturn_t call_function_action(int irq, void *data)
+{
+ generic_smp_call_function_interrupt();
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t reschedule_action(int irq, void *data)
+{
+ scheduler_ipi();
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t call_function_single_action(int irq, void *data)
+{
+ generic_smp_call_function_single_interrupt();
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t debug_ipi_action(int irq, void *data)
+{
+ if (crash_ipi_function_ptr) {
+ crash_ipi_function_ptr(get_irq_regs());
+ return IRQ_HANDLED;
+ }
+
+#ifdef CONFIG_DEBUGGER
+ debugger_ipi(get_irq_regs());
+#endif /* CONFIG_DEBUGGER */
+
+ return IRQ_HANDLED;
+}
+
+static irq_handler_t smp_ipi_action[] = {
+ [PPC_MSG_CALL_FUNCTION] = call_function_action,
+ [PPC_MSG_RESCHEDULE] = reschedule_action,
+ [PPC_MSG_CALL_FUNC_SINGLE] = call_function_single_action,
+ [PPC_MSG_DEBUGGER_BREAK] = debug_ipi_action,
+};
+
+const char *smp_ipi_name[] = {
+ [PPC_MSG_CALL_FUNCTION] = "ipi call function",
+ [PPC_MSG_RESCHEDULE] = "ipi reschedule",
+ [PPC_MSG_CALL_FUNC_SINGLE] = "ipi call function single",
+ [PPC_MSG_DEBUGGER_BREAK] = "ipi debugger",
+};
+
+/* optional function to request ipi, for controllers with >= 4 ipis */
+int smp_request_message_ipi(int virq, int msg)
+{
+ int err;
+
+ if (msg < 0 || msg > PPC_MSG_DEBUGGER_BREAK) {
+ return -EINVAL;
+ }
+#if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC)
+ if (msg == PPC_MSG_DEBUGGER_BREAK) {
+ return 1;
+ }
+#endif
+ err = request_irq(virq, smp_ipi_action[msg],
+ IRQF_PERCPU | IRQF_NO_THREAD,
+ smp_ipi_name[msg], 0);
+ WARN(err < 0, "unable to request_irq %d for %s (rc %d)\n",
+ virq, smp_ipi_name[msg], err);
+
+ return err;
+}
+
+#ifdef CONFIG_PPC_SMP_MUXED_IPI
+struct cpu_messages {
+ int messages; /* current messages */
+ unsigned long data; /* data for cause ipi */
+};
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message);
+
+void smp_muxed_ipi_set_data(int cpu, unsigned long data)
+{
+ struct cpu_messages *info = &per_cpu(ipi_message, cpu);
+
+ info->data = data;
+}
+
+void smp_muxed_ipi_message_pass(int cpu, int msg)
+{
+ struct cpu_messages *info = &per_cpu(ipi_message, cpu);
+ char *message = (char *)&info->messages;
+
+ message[msg] = 1;
+ mb();
+ smp_ops->cause_ipi(cpu, info->data);
+}
+
+irqreturn_t smp_ipi_demux(void)
+{
+ struct cpu_messages *info = &__get_cpu_var(ipi_message);
+ unsigned int all;
+
+ mb(); /* order any irq clear */
+
+ do {
+ all = xchg_local(&info->messages, 0);
+
+#ifdef __BIG_ENDIAN
+ if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNCTION)))
+ generic_smp_call_function_interrupt();
+ if (all & (1 << (24 - 8 * PPC_MSG_RESCHEDULE)))
+ scheduler_ipi();
+ if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNC_SINGLE)))
+ generic_smp_call_function_single_interrupt();
+ if (all & (1 << (24 - 8 * PPC_MSG_DEBUGGER_BREAK)))
+ debug_ipi_action(0, NULL);
+#else
+#error Unsupported ENDIAN
+#endif
+ } while (info->messages);
+
+ return IRQ_HANDLED;
+}
+#endif /* CONFIG_PPC_SMP_MUXED_IPI */
+
+static inline void do_message_pass(int cpu, int msg)
+{
+ if (smp_ops->message_pass)
+ smp_ops->message_pass(cpu, msg);
+#ifdef CONFIG_PPC_SMP_MUXED_IPI
+ else
+ smp_muxed_ipi_message_pass(cpu, msg);
+#endif
+}
+
+void smp_send_reschedule(int cpu)
+{
+ if (likely(smp_ops))
+ do_message_pass(cpu, PPC_MSG_RESCHEDULE);
+}
+EXPORT_SYMBOL_GPL(smp_send_reschedule);
+
+void arch_send_call_function_single_ipi(int cpu)
+{
+ do_message_pass(cpu, PPC_MSG_CALL_FUNC_SINGLE);
+}
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+ unsigned int cpu;
+
+ for_each_cpu(cpu, mask)
+ do_message_pass(cpu, PPC_MSG_CALL_FUNCTION);
+}
+
+#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
+void smp_send_debugger_break(void)
+{
+ int cpu;
+ int me = raw_smp_processor_id();
+
+ if (unlikely(!smp_ops))
+ return;
+
+ for_each_online_cpu(cpu)
+ if (cpu != me)
+ do_message_pass(cpu, PPC_MSG_DEBUGGER_BREAK);
+}
+#endif
+
+#ifdef CONFIG_KEXEC
+void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
+{
+ crash_ipi_function_ptr = crash_ipi_callback;
+ if (crash_ipi_callback) {
+ mb();
+ smp_send_debugger_break();
+ }
+}
+#endif
+
+static void stop_this_cpu(void *dummy)
+{
+ /* Remove this CPU */
+ set_cpu_online(smp_processor_id(), false);
+
+ local_irq_disable();
+ while (1)
+ ;
+}
+
+void smp_send_stop(void)
+{
+ smp_call_function(stop_this_cpu, NULL, 0);
+}
+
+struct thread_info *current_set[NR_CPUS];
+
+static void __devinit smp_store_cpu_info(int id)
+{
+ per_cpu(cpu_pvr, id) = mfspr(SPRN_PVR);
+#ifdef CONFIG_PPC_FSL_BOOK3E
+ per_cpu(next_tlbcam_idx, id)
+ = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1;
+#endif
+}
+
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+ unsigned int cpu;
+
+ DBG("smp_prepare_cpus\n");
+
+ /*
+ * setup_cpu may need to be called on the boot cpu. We havent
+ * spun any cpus up but lets be paranoid.
+ */
+ BUG_ON(boot_cpuid != smp_processor_id());
+
+ /* Fixup boot cpu */
+ smp_store_cpu_info(boot_cpuid);
+ cpu_callin_map[boot_cpuid] = 1;
+
+ for_each_possible_cpu(cpu) {
+ zalloc_cpumask_var_node(&per_cpu(cpu_sibling_map, cpu),
+ GFP_KERNEL, cpu_to_node(cpu));
+ zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu),
+ GFP_KERNEL, cpu_to_node(cpu));
+ }
+
+ cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
+ cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
+
+ if (smp_ops)
+ if (smp_ops->probe)
+ max_cpus = smp_ops->probe();
+ else
+ max_cpus = NR_CPUS;
+ else
+ max_cpus = 1;
+}
+
+void __devinit smp_prepare_boot_cpu(void)
+{
+ BUG_ON(smp_processor_id() != boot_cpuid);
+#ifdef CONFIG_PPC64
+ paca[boot_cpuid].__current = current;
+#endif
+ current_set[boot_cpuid] = task_thread_info(current);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+int generic_cpu_disable(void)
+{
+ unsigned int cpu = smp_processor_id();
+
+ if (cpu == boot_cpuid)
+ return -EBUSY;
+
+ set_cpu_online(cpu, false);
+#ifdef CONFIG_PPC64
+ vdso_data->processorCount--;
+#endif
+ migrate_irqs();
+ return 0;
+}
+
+void generic_cpu_die(unsigned int cpu)
+{
+ int i;
+
+ for (i = 0; i < 100; i++) {
+ smp_rmb();
+ if (per_cpu(cpu_state, cpu) == CPU_DEAD)
+ return;
+ msleep(100);
+ }
+ printk(KERN_ERR "CPU%d didn't die...\n", cpu);
+}
+
+void generic_mach_cpu_die(void)
+{
+ unsigned int cpu;
+
+ local_irq_disable();
+ idle_task_exit();
+ cpu = smp_processor_id();
+ printk(KERN_DEBUG "CPU%d offline\n", cpu);
+ __get_cpu_var(cpu_state) = CPU_DEAD;
+ smp_wmb();
+ while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
+ cpu_relax();
+}
+
+void generic_set_cpu_dead(unsigned int cpu)
+{
+ per_cpu(cpu_state, cpu) = CPU_DEAD;
+}
+
+int generic_check_cpu_restart(unsigned int cpu)
+{
+ return per_cpu(cpu_state, cpu) == CPU_UP_PREPARE;
+}
+#endif
+
+struct create_idle {
+ struct work_struct work;
+ struct task_struct *idle;
+ struct completion done;
+ int cpu;
+};
+
+static void __cpuinit do_fork_idle(struct work_struct *work)
+{
+ struct create_idle *c_idle =
+ container_of(work, struct create_idle, work);
+
+ c_idle->idle = fork_idle(c_idle->cpu);
+ complete(&c_idle->done);
+}
+
+static int __cpuinit create_idle(unsigned int cpu)
+{
+ struct thread_info *ti;
+ struct create_idle c_idle = {
+ .cpu = cpu,
+ .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
+ };
+ INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle);
+
+ c_idle.idle = get_idle_for_cpu(cpu);
+
+ /* We can't use kernel_thread since we must avoid to
+ * reschedule the child. We use a workqueue because
+ * we want to fork from a kernel thread, not whatever
+ * userspace process happens to be trying to online us.
+ */
+ if (!c_idle.idle) {
+ schedule_work(&c_idle.work);
+ wait_for_completion(&c_idle.done);
+ } else
+ init_idle(c_idle.idle, cpu);
+ if (IS_ERR(c_idle.idle)) {
+ pr_err("Failed fork for CPU %u: %li", cpu, PTR_ERR(c_idle.idle));
+ return PTR_ERR(c_idle.idle);
+ }
+ ti = task_thread_info(c_idle.idle);
+
+#ifdef CONFIG_PPC64
+ paca[cpu].__current = c_idle.idle;
+ paca[cpu].kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD;
+#endif
+ ti->cpu = cpu;
+ current_set[cpu] = ti;
+
+ return 0;
+}
+
+int __cpuinit __cpu_up(unsigned int cpu)
+{
+ int rc, c;
+
+ if (smp_ops == NULL ||
+ (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu)))
+ return -EINVAL;
+
+ /* Make sure we have an idle thread */
+ rc = create_idle(cpu);
+ if (rc)
+ return rc;
+
+ secondary_ti = current_set[cpu];
+
+ /* Make sure callin-map entry is 0 (can be leftover a CPU
+ * hotplug
+ */
+ cpu_callin_map[cpu] = 0;
+
+ /* The information for processor bringup must
+ * be written out to main store before we release
+ * the processor.
+ */
+ smp_mb();
+
+ /* wake up cpus */
+ DBG("smp: kicking cpu %d\n", cpu);
+ rc = smp_ops->kick_cpu(cpu);
+ if (rc) {
+ pr_err("smp: failed starting cpu %d (rc %d)\n", cpu, rc);
+ return rc;
+ }
+
+ /*
+ * wait to see if the cpu made a callin (is actually up).
+ * use this value that I found through experimentation.
+ * -- Cort
+ */
+ if (system_state < SYSTEM_RUNNING)
+ for (c = 50000; c && !cpu_callin_map[cpu]; c--)
+ udelay(100);
+#ifdef CONFIG_HOTPLUG_CPU
+ else
+ /*
+ * CPUs can take much longer to come up in the
+ * hotplug case. Wait five seconds.
+ */
+ for (c = 5000; c && !cpu_callin_map[cpu]; c--)
+ msleep(1);
+#endif
+
+ if (!cpu_callin_map[cpu]) {
+ printk(KERN_ERR "Processor %u is stuck.\n", cpu);
+ return -ENOENT;
+ }
+
+ DBG("Processor %u found.\n", cpu);
+
+ if (smp_ops->give_timebase)
+ smp_ops->give_timebase();
+
+ /* Wait until cpu puts itself in the online map */
+ while (!cpu_online(cpu))
+ cpu_relax();
+
+ return 0;
+}
+
+/* Return the value of the reg property corresponding to the given
+ * logical cpu.
+ */
+int cpu_to_core_id(int cpu)
+{
+ struct device_node *np;
+ const int *reg;
+ int id = -1;
+
+ np = of_get_cpu_node(cpu, NULL);
+ if (!np)
+ goto out;
+
+ reg = of_get_property(np, "reg", NULL);
+ if (!reg)
+ goto out;
+
+ id = *reg;
+out:
+ of_node_put(np);
+ return id;
+}
+
+/* Helper routines for cpu to core mapping */
+int cpu_core_index_of_thread(int cpu)
+{
+ return cpu >> threads_shift;
+}
+EXPORT_SYMBOL_GPL(cpu_core_index_of_thread);
+
+int cpu_first_thread_of_core(int core)
+{
+ return core << threads_shift;
+}
+EXPORT_SYMBOL_GPL(cpu_first_thread_of_core);
+
+/* Must be called when no change can occur to cpu_present_mask,
+ * i.e. during cpu online or offline.
+ */
+static struct device_node *cpu_to_l2cache(int cpu)
+{
+ struct device_node *np;
+ struct device_node *cache;
+
+ if (!cpu_present(cpu))
+ return NULL;
+
+ np = of_get_cpu_node(cpu, NULL);
+ if (np == NULL)
+ return NULL;
+
+ cache = of_find_next_cache_node(np);
+
+ of_node_put(np);
+
+ return cache;
+}
+
+/* Activate a secondary processor. */
+void __devinit start_secondary(void *unused)
+{
+ unsigned int cpu = smp_processor_id();
+ struct device_node *l2_cache;
+ int i, base;
+
+ atomic_inc(&init_mm.mm_count);
+ current->active_mm = &init_mm;
+
+ smp_store_cpu_info(cpu);
+ set_dec(tb_ticks_per_jiffy);
+ preempt_disable();
+ cpu_callin_map[cpu] = 1;
+
+ if (smp_ops->setup_cpu)
+ smp_ops->setup_cpu(cpu);
+ if (smp_ops->take_timebase)
+ smp_ops->take_timebase();
+
+ secondary_cpu_time_init();
+
+#ifdef CONFIG_PPC64
+ if (system_state == SYSTEM_RUNNING)
+ vdso_data->processorCount++;
+#endif
+ ipi_call_lock();
+ notify_cpu_starting(cpu);
+ set_cpu_online(cpu, true);
+ /* Update sibling maps */
+ base = cpu_first_thread_sibling(cpu);
+ for (i = 0; i < threads_per_core; i++) {
+ if (cpu_is_offline(base + i))
+ continue;
+ cpumask_set_cpu(cpu, cpu_sibling_mask(base + i));
+ cpumask_set_cpu(base + i, cpu_sibling_mask(cpu));
+
+ /* cpu_core_map should be a superset of
+ * cpu_sibling_map even if we don't have cache
+ * information, so update the former here, too.
+ */
+ cpumask_set_cpu(cpu, cpu_core_mask(base + i));
+ cpumask_set_cpu(base + i, cpu_core_mask(cpu));
+ }
+ l2_cache = cpu_to_l2cache(cpu);
+ for_each_online_cpu(i) {
+ struct device_node *np = cpu_to_l2cache(i);
+ if (!np)
+ continue;
+ if (np == l2_cache) {
+ cpumask_set_cpu(cpu, cpu_core_mask(i));
+ cpumask_set_cpu(i, cpu_core_mask(cpu));
+ }
+ of_node_put(np);
+ }
+ of_node_put(l2_cache);
+ ipi_call_unlock();
+
+ local_irq_enable();
+
+ cpu_idle();
+
+ BUG();
+}
+
+int setup_profiling_timer(unsigned int multiplier)
+{
+ return 0;
+}
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+ cpumask_var_t old_mask;
+
+ /* We want the setup_cpu() here to be called from CPU 0, but our
+ * init thread may have been "borrowed" by another CPU in the meantime
+ * se we pin us down to CPU 0 for a short while
+ */
+ alloc_cpumask_var(&old_mask, GFP_NOWAIT);
+ cpumask_copy(old_mask, tsk_cpus_allowed(current));
+ set_cpus_allowed_ptr(current, cpumask_of(boot_cpuid));
+
+ if (smp_ops && smp_ops->setup_cpu)
+ smp_ops->setup_cpu(boot_cpuid);
+
+ set_cpus_allowed_ptr(current, old_mask);
+
+ free_cpumask_var(old_mask);
+
+ if (smp_ops && smp_ops->bringup_done)
+ smp_ops->bringup_done();
+
+ dump_numa_cpu_topology();
+
+}
+
+int arch_sd_sibling_asym_packing(void)
+{
+ if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
+ printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
+ return SD_ASYM_PACKING;
+ }
+ return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+int __cpu_disable(void)
+{
+ struct device_node *l2_cache;
+ int cpu = smp_processor_id();
+ int base, i;
+ int err;
+
+ if (!smp_ops->cpu_disable)
+ return -ENOSYS;
+
+ err = smp_ops->cpu_disable();
+ if (err)
+ return err;
+
+ /* Update sibling maps */
+ base = cpu_first_thread_sibling(cpu);
+ for (i = 0; i < threads_per_core; i++) {
+ cpumask_clear_cpu(cpu, cpu_sibling_mask(base + i));
+ cpumask_clear_cpu(base + i, cpu_sibling_mask(cpu));
+ cpumask_clear_cpu(cpu, cpu_core_mask(base + i));
+ cpumask_clear_cpu(base + i, cpu_core_mask(cpu));
+ }
+
+ l2_cache = cpu_to_l2cache(cpu);
+ for_each_present_cpu(i) {
+ struct device_node *np = cpu_to_l2cache(i);
+ if (!np)
+ continue;
+ if (np == l2_cache) {
+ cpumask_clear_cpu(cpu, cpu_core_mask(i));
+ cpumask_clear_cpu(i, cpu_core_mask(cpu));
+ }
+ of_node_put(np);
+ }
+ of_node_put(l2_cache);
+
+
+ return 0;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+ if (smp_ops->cpu_die)
+ smp_ops->cpu_die(cpu);
+}
+
+static DEFINE_MUTEX(powerpc_cpu_hotplug_driver_mutex);
+
+void cpu_hotplug_driver_lock()
+{
+ mutex_lock(&powerpc_cpu_hotplug_driver_mutex);
+}
+
+void cpu_hotplug_driver_unlock()
+{
+ mutex_unlock(&powerpc_cpu_hotplug_driver_mutex);
+}
+
+void cpu_die(void)
+{
+ if (ppc_md.cpu_die)
+ ppc_md.cpu_die();
+
+ /* If we return, we re-enter start_secondary */
+ start_secondary_resume();
+}
+
+#endif
diff --git a/arch/powerpc/kernel/softemu8xx.c b/arch/powerpc/kernel/softemu8xx.c
new file mode 100644
index 00000000..29b2f81d
--- /dev/null
+++ b/arch/powerpc/kernel/softemu8xx.c
@@ -0,0 +1,199 @@
+/*
+ * Software emulation of some PPC instructions for the 8xx core.
+ *
+ * Copyright (C) 1998 Dan Malek (dmalek@jlc.net)
+ *
+ * Software floating emuation for the MPC8xx processor. I did this mostly
+ * because it was easier than trying to get the libraries compiled for
+ * software floating point. The goal is still to get the libraries done,
+ * but I lost patience and needed some hacks to at least get init and
+ * shells running. The first problem is the setjmp/longjmp that save
+ * and restore the floating point registers.
+ *
+ * For this emulation, our working registers are found on the register
+ * save area.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/interrupt.h>
+
+#include <asm/pgtable.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+
+/* Eventually we may need a look-up table, but this works for now.
+*/
+#define LFS 48
+#define LFD 50
+#define LFDU 51
+#define STFD 54
+#define STFDU 55
+#define FMR 63
+
+void print_8xx_pte(struct mm_struct *mm, unsigned long addr)
+{
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ printk(" pte @ 0x%8lx: ", addr);
+ pgd = pgd_offset(mm, addr & PAGE_MASK);
+ if (pgd) {
+ pmd = pmd_offset(pud_offset(pgd, addr & PAGE_MASK),
+ addr & PAGE_MASK);
+ if (pmd && pmd_present(*pmd)) {
+ pte = pte_offset_kernel(pmd, addr & PAGE_MASK);
+ if (pte) {
+ printk(" (0x%08lx)->(0x%08lx)->0x%08lx\n",
+ (long)pgd, (long)pte, (long)pte_val(*pte));
+#define pp ((long)pte_val(*pte))
+ printk(" RPN: %05lx PP: %lx SPS: %lx SH: %lx "
+ "CI: %lx v: %lx\n",
+ pp>>12, /* rpn */
+ (pp>>10)&3, /* pp */
+ (pp>>3)&1, /* small */
+ (pp>>2)&1, /* shared */
+ (pp>>1)&1, /* cache inhibit */
+ pp&1 /* valid */
+ );
+#undef pp
+ }
+ else {
+ printk("no pte\n");
+ }
+ }
+ else {
+ printk("no pmd\n");
+ }
+ }
+ else {
+ printk("no pgd\n");
+ }
+}
+
+int get_8xx_pte(struct mm_struct *mm, unsigned long addr)
+{
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte;
+ int retval = 0;
+
+ pgd = pgd_offset(mm, addr & PAGE_MASK);
+ if (pgd) {
+ pmd = pmd_offset(pud_offset(pgd, addr & PAGE_MASK),
+ addr & PAGE_MASK);
+ if (pmd && pmd_present(*pmd)) {
+ pte = pte_offset_kernel(pmd, addr & PAGE_MASK);
+ if (pte) {
+ retval = (int)pte_val(*pte);
+ }
+ }
+ }
+ return retval;
+}
+
+/*
+ * We return 0 on success, 1 on unimplemented instruction, and EFAULT
+ * if a load/store faulted.
+ */
+int Soft_emulate_8xx(struct pt_regs *regs)
+{
+ u32 inst, instword;
+ u32 flreg, idxreg, disp;
+ int retval;
+ s16 sdisp;
+ u32 *ea, *ip;
+
+ retval = 0;
+
+ instword = *((u32 *)regs->nip);
+ inst = instword >> 26;
+
+ flreg = (instword >> 21) & 0x1f;
+ idxreg = (instword >> 16) & 0x1f;
+ disp = instword & 0xffff;
+
+ ea = (u32 *)(regs->gpr[idxreg] + disp);
+ ip = (u32 *)&current->thread.TS_FPR(flreg);
+
+ switch ( inst )
+ {
+ case LFD:
+ /* this is a 16 bit quantity that is sign extended
+ * so use a signed short here -- Cort
+ */
+ sdisp = (instword & 0xffff);
+ ea = (u32 *)(regs->gpr[idxreg] + sdisp);
+ if (copy_from_user(ip, ea, sizeof(double)))
+ retval = -EFAULT;
+ break;
+
+ case LFDU:
+ if (copy_from_user(ip, ea, sizeof(double)))
+ retval = -EFAULT;
+ else
+ regs->gpr[idxreg] = (u32)ea;
+ break;
+ case LFS:
+ sdisp = (instword & 0xffff);
+ ea = (u32 *)(regs->gpr[idxreg] + sdisp);
+ if (copy_from_user(ip, ea, sizeof(float)))
+ retval = -EFAULT;
+ break;
+ case STFD:
+ /* this is a 16 bit quantity that is sign extended
+ * so use a signed short here -- Cort
+ */
+ sdisp = (instword & 0xffff);
+ ea = (u32 *)(regs->gpr[idxreg] + sdisp);
+ if (copy_to_user(ea, ip, sizeof(double)))
+ retval = -EFAULT;
+ break;
+
+ case STFDU:
+ if (copy_to_user(ea, ip, sizeof(double)))
+ retval = -EFAULT;
+ else
+ regs->gpr[idxreg] = (u32)ea;
+ break;
+ case FMR:
+ /* assume this is a fp move -- Cort */
+ memcpy(ip, &current->thread.TS_FPR((instword>>11)&0x1f),
+ sizeof(double));
+ break;
+ default:
+ retval = 1;
+ printk("Bad emulation %s/%d\n"
+ " NIP: %08lx instruction: %08x opcode: %x "
+ "A: %x B: %x C: %x code: %x rc: %x\n",
+ current->comm,current->pid,
+ regs->nip,
+ instword,inst,
+ (instword>>16)&0x1f,
+ (instword>>11)&0x1f,
+ (instword>>6)&0x1f,
+ (instword>>1)&0x3ff,
+ instword&1);
+ {
+ int pa;
+ print_8xx_pte(current->mm,regs->nip);
+ pa = get_8xx_pte(current->mm,regs->nip) & PAGE_MASK;
+ pa |= (regs->nip & ~PAGE_MASK);
+ pa = (unsigned long)__va(pa);
+ printk("Kernel VA for NIP %x ", pa);
+ print_8xx_pte(current->mm,pa);
+ }
+ }
+
+ if (retval == 0)
+ regs->nip += 4;
+
+ return retval;
+}
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
new file mode 100644
index 00000000..3d30ef10
--- /dev/null
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -0,0 +1,63 @@
+/*
+ * Stack trace utility
+ *
+ * Copyright 2008 Christoph Hellwig, IBM Corp.
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/stacktrace.h>
+#include <asm/ptrace.h>
+#include <asm/processor.h>
+
+/*
+ * Save stack-backtrace addresses into a stack_trace buffer.
+ */
+static void save_context_stack(struct stack_trace *trace, unsigned long sp,
+ struct task_struct *tsk, int savesched)
+{
+ for (;;) {
+ unsigned long *stack = (unsigned long *) sp;
+ unsigned long newsp, ip;
+
+ if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD))
+ return;
+
+ newsp = stack[0];
+ ip = stack[STACK_FRAME_LR_SAVE];
+
+ if (savesched || !in_sched_functions(ip)) {
+ if (!trace->skip)
+ trace->entries[trace->nr_entries++] = ip;
+ else
+ trace->skip--;
+ }
+
+ if (trace->nr_entries >= trace->max_entries)
+ return;
+
+ sp = newsp;
+ }
+}
+
+void save_stack_trace(struct stack_trace *trace)
+{
+ unsigned long sp;
+
+ asm("mr %0,1" : "=r" (sp));
+
+ save_context_stack(trace, sp, current, 1);
+}
+EXPORT_SYMBOL_GPL(save_stack_trace);
+
+void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+{
+ save_context_stack(trace, tsk->thread.ksp, tsk, 0);
+}
+EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/powerpc/kernel/suspend.c b/arch/powerpc/kernel/suspend.c
new file mode 100644
index 00000000..0167d53d
--- /dev/null
+++ b/arch/powerpc/kernel/suspend.c
@@ -0,0 +1,25 @@
+/*
+ * Suspend support specific for power.
+ *
+ * Distribute under GPLv2
+ *
+ * Copyright (c) 2002 Pavel Machek <pavel@ucw.cz>
+ * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
+ */
+
+#include <linux/mm.h>
+#include <asm/page.h>
+
+/* References to section boundaries */
+extern const void __nosave_begin, __nosave_end;
+
+/*
+ * pfn_is_nosave - check if given pfn is in the 'nosave' section
+ */
+
+int pfn_is_nosave(unsigned long pfn)
+{
+ unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
+ unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT;
+ return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
+}
diff --git a/arch/powerpc/kernel/swsusp.c b/arch/powerpc/kernel/swsusp.c
new file mode 100644
index 00000000..eae33e10
--- /dev/null
+++ b/arch/powerpc/kernel/swsusp.c
@@ -0,0 +1,38 @@
+/*
+ * Common powerpc suspend code for 32 and 64 bits
+ *
+ * Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <asm/current.h>
+#include <asm/mmu_context.h>
+#include <asm/switch_to.h>
+
+void save_processor_state(void)
+{
+ /*
+ * flush out all the special registers so we don't need
+ * to save them in the snapshot
+ */
+ flush_fp_to_thread(current);
+ flush_altivec_to_thread(current);
+ flush_spe_to_thread(current);
+
+#ifdef CONFIG_PPC64
+ hard_irq_disable();
+#endif
+
+}
+
+void restore_processor_state(void)
+{
+#ifdef CONFIG_PPC32
+ switch_mmu_context(current->active_mm, current->active_mm);
+#endif
+}
diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S
new file mode 100644
index 00000000..ba4dee3d
--- /dev/null
+++ b/arch/powerpc/kernel/swsusp_32.S
@@ -0,0 +1,350 @@
+#include <linux/threads.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/mmu.h>
+
+/*
+ * Structure for storing CPU registers on the save area.
+ */
+#define SL_SP 0
+#define SL_PC 4
+#define SL_MSR 8
+#define SL_SDR1 0xc
+#define SL_SPRG0 0x10 /* 4 sprg's */
+#define SL_DBAT0 0x20
+#define SL_IBAT0 0x28
+#define SL_DBAT1 0x30
+#define SL_IBAT1 0x38
+#define SL_DBAT2 0x40
+#define SL_IBAT2 0x48
+#define SL_DBAT3 0x50
+#define SL_IBAT3 0x58
+#define SL_TB 0x60
+#define SL_R2 0x68
+#define SL_CR 0x6c
+#define SL_LR 0x70
+#define SL_R12 0x74 /* r12 to r31 */
+#define SL_SIZE (SL_R12 + 80)
+
+ .section .data
+ .align 5
+
+_GLOBAL(swsusp_save_area)
+ .space SL_SIZE
+
+
+ .section .text
+ .align 5
+
+_GLOBAL(swsusp_arch_suspend)
+
+ lis r11,swsusp_save_area@h
+ ori r11,r11,swsusp_save_area@l
+
+ mflr r0
+ stw r0,SL_LR(r11)
+ mfcr r0
+ stw r0,SL_CR(r11)
+ stw r1,SL_SP(r11)
+ stw r2,SL_R2(r11)
+ stmw r12,SL_R12(r11)
+
+ /* Save MSR & SDR1 */
+ mfmsr r4
+ stw r4,SL_MSR(r11)
+ mfsdr1 r4
+ stw r4,SL_SDR1(r11)
+
+ /* Get a stable timebase and save it */
+1: mftbu r4
+ stw r4,SL_TB(r11)
+ mftb r5
+ stw r5,SL_TB+4(r11)
+ mftbu r3
+ cmpw r3,r4
+ bne 1b
+
+ /* Save SPRGs */
+ mfsprg r4,0
+ stw r4,SL_SPRG0(r11)
+ mfsprg r4,1
+ stw r4,SL_SPRG0+4(r11)
+ mfsprg r4,2
+ stw r4,SL_SPRG0+8(r11)
+ mfsprg r4,3
+ stw r4,SL_SPRG0+12(r11)
+
+ /* Save BATs */
+ mfdbatu r4,0
+ stw r4,SL_DBAT0(r11)
+ mfdbatl r4,0
+ stw r4,SL_DBAT0+4(r11)
+ mfdbatu r4,1
+ stw r4,SL_DBAT1(r11)
+ mfdbatl r4,1
+ stw r4,SL_DBAT1+4(r11)
+ mfdbatu r4,2
+ stw r4,SL_DBAT2(r11)
+ mfdbatl r4,2
+ stw r4,SL_DBAT2+4(r11)
+ mfdbatu r4,3
+ stw r4,SL_DBAT3(r11)
+ mfdbatl r4,3
+ stw r4,SL_DBAT3+4(r11)
+ mfibatu r4,0
+ stw r4,SL_IBAT0(r11)
+ mfibatl r4,0
+ stw r4,SL_IBAT0+4(r11)
+ mfibatu r4,1
+ stw r4,SL_IBAT1(r11)
+ mfibatl r4,1
+ stw r4,SL_IBAT1+4(r11)
+ mfibatu r4,2
+ stw r4,SL_IBAT2(r11)
+ mfibatl r4,2
+ stw r4,SL_IBAT2+4(r11)
+ mfibatu r4,3
+ stw r4,SL_IBAT3(r11)
+ mfibatl r4,3
+ stw r4,SL_IBAT3+4(r11)
+
+#if 0
+ /* Backup various CPU config stuffs */
+ bl __save_cpu_setup
+#endif
+ /* Call the low level suspend stuff (we should probably have made
+ * a stackframe...
+ */
+ bl swsusp_save
+
+ /* Restore LR from the save area */
+ lis r11,swsusp_save_area@h
+ ori r11,r11,swsusp_save_area@l
+ lwz r0,SL_LR(r11)
+ mtlr r0
+
+ blr
+
+
+/* Resume code */
+_GLOBAL(swsusp_arch_resume)
+
+#ifdef CONFIG_ALTIVEC
+ /* Stop pending alitvec streams and memory accesses */
+BEGIN_FTR_SECTION
+ DSSALL
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif
+ sync
+
+ /* Disable MSR:DR to make sure we don't take a TLB or
+ * hash miss during the copy, as our hash table will
+ * for a while be unusable. For .text, we assume we are
+ * covered by a BAT. This works only for non-G5 at this
+ * point. G5 will need a better approach, possibly using
+ * a small temporary hash table filled with large mappings,
+ * disabling the MMU completely isn't a good option for
+ * performance reasons.
+ * (Note that 750's may have the same performance issue as
+ * the G5 in this case, we should investigate using moving
+ * BATs for these CPUs)
+ */
+ mfmsr r0
+ sync
+ rlwinm r0,r0,0,28,26 /* clear MSR_DR */
+ mtmsr r0
+ sync
+ isync
+
+ /* Load ptr the list of pages to copy in r3 */
+ lis r11,(restore_pblist - KERNELBASE)@h
+ ori r11,r11,restore_pblist@l
+ lwz r10,0(r11)
+
+ /* Copy the pages. This is a very basic implementation, to
+ * be replaced by something more cache efficient */
+1:
+ tophys(r3,r10)
+ li r0,256
+ mtctr r0
+ lwz r11,pbe_address(r3) /* source */
+ tophys(r5,r11)
+ lwz r10,pbe_orig_address(r3) /* destination */
+ tophys(r6,r10)
+2:
+ lwz r8,0(r5)
+ lwz r9,4(r5)
+ lwz r10,8(r5)
+ lwz r11,12(r5)
+ addi r5,r5,16
+ stw r8,0(r6)
+ stw r9,4(r6)
+ stw r10,8(r6)
+ stw r11,12(r6)
+ addi r6,r6,16
+ bdnz 2b
+ lwz r10,pbe_next(r3)
+ cmpwi 0,r10,0
+ bne 1b
+
+ /* Do a very simple cache flush/inval of the L1 to ensure
+ * coherency of the icache
+ */
+ lis r3,0x0002
+ mtctr r3
+ li r3, 0
+1:
+ lwz r0,0(r3)
+ addi r3,r3,0x0020
+ bdnz 1b
+ isync
+ sync
+
+ /* Now flush those cache lines */
+ lis r3,0x0002
+ mtctr r3
+ li r3, 0
+1:
+ dcbf 0,r3
+ addi r3,r3,0x0020
+ bdnz 1b
+ sync
+
+ /* Ok, we are now running with the kernel data of the old
+ * kernel fully restored. We can get to the save area
+ * easily now. As for the rest of the code, it assumes the
+ * loader kernel and the booted one are exactly identical
+ */
+ lis r11,swsusp_save_area@h
+ ori r11,r11,swsusp_save_area@l
+ tophys(r11,r11)
+
+#if 0
+ /* Restore various CPU config stuffs */
+ bl __restore_cpu_setup
+#endif
+ /* Restore the BATs, and SDR1. Then we can turn on the MMU.
+ * This is a bit hairy as we are running out of those BATs,
+ * but first, our code is probably in the icache, and we are
+ * writing the same value to the BAT, so that should be fine,
+ * though a better solution will have to be found long-term
+ */
+ lwz r4,SL_SDR1(r11)
+ mtsdr1 r4
+ lwz r4,SL_SPRG0(r11)
+ mtsprg 0,r4
+ lwz r4,SL_SPRG0+4(r11)
+ mtsprg 1,r4
+ lwz r4,SL_SPRG0+8(r11)
+ mtsprg 2,r4
+ lwz r4,SL_SPRG0+12(r11)
+ mtsprg 3,r4
+
+#if 0
+ lwz r4,SL_DBAT0(r11)
+ mtdbatu 0,r4
+ lwz r4,SL_DBAT0+4(r11)
+ mtdbatl 0,r4
+ lwz r4,SL_DBAT1(r11)
+ mtdbatu 1,r4
+ lwz r4,SL_DBAT1+4(r11)
+ mtdbatl 1,r4
+ lwz r4,SL_DBAT2(r11)
+ mtdbatu 2,r4
+ lwz r4,SL_DBAT2+4(r11)
+ mtdbatl 2,r4
+ lwz r4,SL_DBAT3(r11)
+ mtdbatu 3,r4
+ lwz r4,SL_DBAT3+4(r11)
+ mtdbatl 3,r4
+ lwz r4,SL_IBAT0(r11)
+ mtibatu 0,r4
+ lwz r4,SL_IBAT0+4(r11)
+ mtibatl 0,r4
+ lwz r4,SL_IBAT1(r11)
+ mtibatu 1,r4
+ lwz r4,SL_IBAT1+4(r11)
+ mtibatl 1,r4
+ lwz r4,SL_IBAT2(r11)
+ mtibatu 2,r4
+ lwz r4,SL_IBAT2+4(r11)
+ mtibatl 2,r4
+ lwz r4,SL_IBAT3(r11)
+ mtibatu 3,r4
+ lwz r4,SL_IBAT3+4(r11)
+ mtibatl 3,r4
+#endif
+
+BEGIN_MMU_FTR_SECTION
+ li r4,0
+ mtspr SPRN_DBAT4U,r4
+ mtspr SPRN_DBAT4L,r4
+ mtspr SPRN_DBAT5U,r4
+ mtspr SPRN_DBAT5L,r4
+ mtspr SPRN_DBAT6U,r4
+ mtspr SPRN_DBAT6L,r4
+ mtspr SPRN_DBAT7U,r4
+ mtspr SPRN_DBAT7L,r4
+ mtspr SPRN_IBAT4U,r4
+ mtspr SPRN_IBAT4L,r4
+ mtspr SPRN_IBAT5U,r4
+ mtspr SPRN_IBAT5L,r4
+ mtspr SPRN_IBAT6U,r4
+ mtspr SPRN_IBAT6L,r4
+ mtspr SPRN_IBAT7U,r4
+ mtspr SPRN_IBAT7L,r4
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+
+ /* Flush all TLBs */
+ lis r4,0x1000
+1: addic. r4,r4,-0x1000
+ tlbie r4
+ bgt 1b
+ sync
+
+ /* restore the MSR and turn on the MMU */
+ lwz r3,SL_MSR(r11)
+ bl turn_on_mmu
+ tovirt(r11,r11)
+
+ /* Restore TB */
+ li r3,0
+ mttbl r3
+ lwz r3,SL_TB(r11)
+ lwz r4,SL_TB+4(r11)
+ mttbu r3
+ mttbl r4
+
+ /* Kick decrementer */
+ li r0,1
+ mtdec r0
+
+ /* Restore the callee-saved registers and return */
+ lwz r0,SL_CR(r11)
+ mtcr r0
+ lwz r2,SL_R2(r11)
+ lmw r12,SL_R12(r11)
+ lwz r1,SL_SP(r11)
+ lwz r0,SL_LR(r11)
+ mtlr r0
+
+ // XXX Note: we don't really need to call swsusp_resume
+
+ li r3,0
+ blr
+
+/* FIXME:This construct is actually not useful since we don't shut
+ * down the instruction MMU, we could just flip back MSR-DR on.
+ */
+turn_on_mmu:
+ mflr r4
+ mtsrr0 r4
+ mtsrr1 r3
+ sync
+ isync
+ rfi
+
diff --git a/arch/powerpc/kernel/swsusp_64.c b/arch/powerpc/kernel/swsusp_64.c
new file mode 100644
index 00000000..0e899e47
--- /dev/null
+++ b/arch/powerpc/kernel/swsusp_64.c
@@ -0,0 +1,24 @@
+/*
+ * PowerPC 64-bit swsusp implementation
+ *
+ * Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
+ *
+ * GPLv2
+ */
+
+#include <asm/iommu.h>
+#include <linux/irq.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+
+void do_after_copyback(void)
+{
+ iommu_restore();
+ touch_softlockup_watchdog();
+ mb();
+}
+
+void _iommu_save(void)
+{
+ iommu_save();
+}
diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S
new file mode 100644
index 00000000..86ac1d90
--- /dev/null
+++ b/arch/powerpc/kernel/swsusp_asm64.S
@@ -0,0 +1,228 @@
+/*
+ * PowerPC 64-bit swsusp implementation
+ *
+ * Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
+ *
+ * GPLv2
+ */
+
+#include <linux/threads.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+/*
+ * Structure for storing CPU registers on the save area.
+ */
+#define SL_r1 0x00 /* stack pointer */
+#define SL_PC 0x08
+#define SL_MSR 0x10
+#define SL_SDR1 0x18
+#define SL_XER 0x20
+#define SL_TB 0x40
+#define SL_r2 0x48
+#define SL_CR 0x50
+#define SL_LR 0x58
+#define SL_r12 0x60
+#define SL_r13 0x68
+#define SL_r14 0x70
+#define SL_r15 0x78
+#define SL_r16 0x80
+#define SL_r17 0x88
+#define SL_r18 0x90
+#define SL_r19 0x98
+#define SL_r20 0xa0
+#define SL_r21 0xa8
+#define SL_r22 0xb0
+#define SL_r23 0xb8
+#define SL_r24 0xc0
+#define SL_r25 0xc8
+#define SL_r26 0xd0
+#define SL_r27 0xd8
+#define SL_r28 0xe0
+#define SL_r29 0xe8
+#define SL_r30 0xf0
+#define SL_r31 0xf8
+#define SL_SIZE SL_r31+8
+
+/* these macros rely on the save area being
+ * pointed to by r11 */
+#define SAVE_SPECIAL(special) \
+ mf##special r0 ;\
+ std r0, SL_##special(r11)
+#define RESTORE_SPECIAL(special) \
+ ld r0, SL_##special(r11) ;\
+ mt##special r0
+#define SAVE_REGISTER(reg) \
+ std reg, SL_##reg(r11)
+#define RESTORE_REGISTER(reg) \
+ ld reg, SL_##reg(r11)
+
+/* space for storing cpu state */
+ .section .data
+ .align 5
+swsusp_save_area:
+ .space SL_SIZE
+
+ .section ".toc","aw"
+swsusp_save_area_ptr:
+ .tc swsusp_save_area[TC],swsusp_save_area
+restore_pblist_ptr:
+ .tc restore_pblist[TC],restore_pblist
+
+ .section .text
+ .align 5
+_GLOBAL(swsusp_arch_suspend)
+ ld r11,swsusp_save_area_ptr@toc(r2)
+ SAVE_SPECIAL(LR)
+ SAVE_REGISTER(r1)
+ SAVE_SPECIAL(CR)
+ SAVE_SPECIAL(TB)
+ SAVE_REGISTER(r2)
+ SAVE_REGISTER(r12)
+ SAVE_REGISTER(r13)
+ SAVE_REGISTER(r14)
+ SAVE_REGISTER(r15)
+ SAVE_REGISTER(r16)
+ SAVE_REGISTER(r17)
+ SAVE_REGISTER(r18)
+ SAVE_REGISTER(r19)
+ SAVE_REGISTER(r20)
+ SAVE_REGISTER(r21)
+ SAVE_REGISTER(r22)
+ SAVE_REGISTER(r23)
+ SAVE_REGISTER(r24)
+ SAVE_REGISTER(r25)
+ SAVE_REGISTER(r26)
+ SAVE_REGISTER(r27)
+ SAVE_REGISTER(r28)
+ SAVE_REGISTER(r29)
+ SAVE_REGISTER(r30)
+ SAVE_REGISTER(r31)
+ SAVE_SPECIAL(MSR)
+ SAVE_SPECIAL(SDR1)
+ SAVE_SPECIAL(XER)
+
+ /* we push the stack up 128 bytes but don't store the
+ * stack pointer on the stack like a real stackframe */
+ addi r1,r1,-128
+
+ bl _iommu_save
+ bl swsusp_save
+
+ /* restore LR */
+ ld r11,swsusp_save_area_ptr@toc(r2)
+ RESTORE_SPECIAL(LR)
+ addi r1,r1,128
+
+ blr
+
+/* Resume code */
+_GLOBAL(swsusp_arch_resume)
+ /* Stop pending alitvec streams and memory accesses */
+BEGIN_FTR_SECTION
+ DSSALL
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+ sync
+
+ ld r12,restore_pblist_ptr@toc(r2)
+ ld r12,0(r12)
+
+ cmpdi r12,0
+ beq- nothing_to_copy
+ li r15,PAGE_SIZE>>3
+copyloop:
+ ld r13,pbe_address(r12)
+ ld r14,pbe_orig_address(r12)
+
+ mtctr r15
+ li r10,0
+copy_page_loop:
+ ldx r0,r10,r13
+ stdx r0,r10,r14
+ addi r10,r10,8
+ bdnz copy_page_loop
+
+ ld r12,pbe_next(r12)
+ cmpdi r12,0
+ bne+ copyloop
+nothing_to_copy:
+
+ /* flush caches */
+ lis r3, 0x10
+ mtctr r3
+ li r3, 0
+ ori r3, r3, CONFIG_KERNEL_START>>48
+ li r0, 48
+ sld r3, r3, r0
+ li r0, 0
+1:
+ dcbf r0,r3
+ addi r3,r3,0x20
+ bdnz 1b
+
+ sync
+
+ tlbia
+
+ ld r11,swsusp_save_area_ptr@toc(r2)
+
+ RESTORE_SPECIAL(CR)
+
+ /* restore timebase */
+ /* load saved tb */
+ ld r1, SL_TB(r11)
+ /* get upper 32 bits of it */
+ srdi r2, r1, 32
+ /* clear tb lower to avoid wrap */
+ li r0, 0
+ mttbl r0
+ /* set tb upper */
+ mttbu r2
+ /* set tb lower */
+ mttbl r1
+
+ /* restore registers */
+ RESTORE_REGISTER(r1)
+ RESTORE_REGISTER(r2)
+ RESTORE_REGISTER(r12)
+ RESTORE_REGISTER(r13)
+ RESTORE_REGISTER(r14)
+ RESTORE_REGISTER(r15)
+ RESTORE_REGISTER(r16)
+ RESTORE_REGISTER(r17)
+ RESTORE_REGISTER(r18)
+ RESTORE_REGISTER(r19)
+ RESTORE_REGISTER(r20)
+ RESTORE_REGISTER(r21)
+ RESTORE_REGISTER(r22)
+ RESTORE_REGISTER(r23)
+ RESTORE_REGISTER(r24)
+ RESTORE_REGISTER(r25)
+ RESTORE_REGISTER(r26)
+ RESTORE_REGISTER(r27)
+ RESTORE_REGISTER(r28)
+ RESTORE_REGISTER(r29)
+ RESTORE_REGISTER(r30)
+ RESTORE_REGISTER(r31)
+ /* can't use RESTORE_SPECIAL(MSR) */
+ ld r0, SL_MSR(r11)
+ mtmsrd r0, 0
+ RESTORE_SPECIAL(SDR1)
+ RESTORE_SPECIAL(XER)
+
+ sync
+
+ addi r1,r1,-128
+ bl slb_flush_and_rebolt
+ bl do_after_copyback
+ addi r1,r1,128
+
+ ld r11,swsusp_save_area_ptr@toc(r2)
+ RESTORE_SPECIAL(LR)
+
+ li r3, 0
+ blr
diff --git a/arch/powerpc/kernel/swsusp_booke.S b/arch/powerpc/kernel/swsusp_booke.S
new file mode 100644
index 00000000..11a39307
--- /dev/null
+++ b/arch/powerpc/kernel/swsusp_booke.S
@@ -0,0 +1,193 @@
+/*
+ * Based on swsusp_32.S, modified for FSL BookE by
+ * Anton Vorontsov <avorontsov@ru.mvista.com>
+ * Copyright (c) 2009-2010 MontaVista Software, LLC.
+ */
+
+#include <linux/threads.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/mmu.h>
+
+/*
+ * Structure for storing CPU registers on the save area.
+ */
+#define SL_SP 0
+#define SL_PC 4
+#define SL_MSR 8
+#define SL_TCR 0xc
+#define SL_SPRG0 0x10
+#define SL_SPRG1 0x14
+#define SL_SPRG2 0x18
+#define SL_SPRG3 0x1c
+#define SL_SPRG4 0x20
+#define SL_SPRG5 0x24
+#define SL_SPRG6 0x28
+#define SL_SPRG7 0x2c
+#define SL_TBU 0x30
+#define SL_TBL 0x34
+#define SL_R2 0x38
+#define SL_CR 0x3c
+#define SL_LR 0x40
+#define SL_R12 0x44 /* r12 to r31 */
+#define SL_SIZE (SL_R12 + 80)
+
+ .section .data
+ .align 5
+
+_GLOBAL(swsusp_save_area)
+ .space SL_SIZE
+
+
+ .section .text
+ .align 5
+
+_GLOBAL(swsusp_arch_suspend)
+ lis r11,swsusp_save_area@h
+ ori r11,r11,swsusp_save_area@l
+
+ mflr r0
+ stw r0,SL_LR(r11)
+ mfcr r0
+ stw r0,SL_CR(r11)
+ stw r1,SL_SP(r11)
+ stw r2,SL_R2(r11)
+ stmw r12,SL_R12(r11)
+
+ /* Save MSR & TCR */
+ mfmsr r4
+ stw r4,SL_MSR(r11)
+ mfspr r4,SPRN_TCR
+ stw r4,SL_TCR(r11)
+
+ /* Get a stable timebase and save it */
+1: mfspr r4,SPRN_TBRU
+ stw r4,SL_TBU(r11)
+ mfspr r5,SPRN_TBRL
+ stw r5,SL_TBL(r11)
+ mfspr r3,SPRN_TBRU
+ cmpw r3,r4
+ bne 1b
+
+ /* Save SPRGs */
+ mfsprg r4,0
+ stw r4,SL_SPRG0(r11)
+ mfsprg r4,1
+ stw r4,SL_SPRG1(r11)
+ mfsprg r4,2
+ stw r4,SL_SPRG2(r11)
+ mfsprg r4,3
+ stw r4,SL_SPRG3(r11)
+ mfsprg r4,4
+ stw r4,SL_SPRG4(r11)
+ mfsprg r4,5
+ stw r4,SL_SPRG5(r11)
+ mfsprg r4,6
+ stw r4,SL_SPRG6(r11)
+ mfsprg r4,7
+ stw r4,SL_SPRG7(r11)
+
+ /* Call the low level suspend stuff (we should probably have made
+ * a stackframe...
+ */
+ bl swsusp_save
+
+ /* Restore LR from the save area */
+ lis r11,swsusp_save_area@h
+ ori r11,r11,swsusp_save_area@l
+ lwz r0,SL_LR(r11)
+ mtlr r0
+
+ blr
+
+_GLOBAL(swsusp_arch_resume)
+ sync
+
+ /* Load ptr the list of pages to copy in r3 */
+ lis r11,(restore_pblist)@h
+ ori r11,r11,restore_pblist@l
+ lwz r3,0(r11)
+
+ /* Copy the pages. This is a very basic implementation, to
+ * be replaced by something more cache efficient */
+1:
+ li r0,256
+ mtctr r0
+ lwz r5,pbe_address(r3) /* source */
+ lwz r6,pbe_orig_address(r3) /* destination */
+2:
+ lwz r8,0(r5)
+ lwz r9,4(r5)
+ lwz r10,8(r5)
+ lwz r11,12(r5)
+ addi r5,r5,16
+ stw r8,0(r6)
+ stw r9,4(r6)
+ stw r10,8(r6)
+ stw r11,12(r6)
+ addi r6,r6,16
+ bdnz 2b
+ lwz r3,pbe_next(r3)
+ cmpwi 0,r3,0
+ bne 1b
+
+ bl flush_dcache_L1
+ bl flush_instruction_cache
+
+ lis r11,swsusp_save_area@h
+ ori r11,r11,swsusp_save_area@l
+
+ lwz r4,SL_SPRG0(r11)
+ mtsprg 0,r4
+ lwz r4,SL_SPRG1(r11)
+ mtsprg 1,r4
+ lwz r4,SL_SPRG2(r11)
+ mtsprg 2,r4
+ lwz r4,SL_SPRG3(r11)
+ mtsprg 3,r4
+ lwz r4,SL_SPRG4(r11)
+ mtsprg 4,r4
+ lwz r4,SL_SPRG5(r11)
+ mtsprg 5,r4
+ lwz r4,SL_SPRG6(r11)
+ mtsprg 6,r4
+ lwz r4,SL_SPRG7(r11)
+ mtsprg 7,r4
+
+ /* restore the MSR */
+ lwz r3,SL_MSR(r11)
+ mtmsr r3
+
+ /* Restore TB */
+ li r3,0
+ mtspr SPRN_TBWL,r3
+ lwz r3,SL_TBU(r11)
+ lwz r4,SL_TBL(r11)
+ mtspr SPRN_TBWU,r3
+ mtspr SPRN_TBWL,r4
+
+ /* Restore TCR and clear any pending bits in TSR. */
+ lwz r4,SL_TCR(r11)
+ mtspr SPRN_TCR,r4
+ lis r4, (TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS)@h
+ mtspr SPRN_TSR,r4
+
+ /* Kick decrementer */
+ li r0,1
+ mtdec r0
+
+ /* Restore the callee-saved registers and return */
+ lwz r0,SL_CR(r11)
+ mtcr r0
+ lwz r2,SL_R2(r11)
+ lmw r12,SL_R12(r11)
+ lwz r1,SL_SP(r11)
+ lwz r0,SL_LR(r11)
+ mtlr r0
+
+ li r3,0
+ blr
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
new file mode 100644
index 00000000..81c57063
--- /dev/null
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -0,0 +1,626 @@
+/*
+ * sys_ppc32.c: Conversion between 32bit and 64bit native syscalls.
+ *
+ * Copyright (C) 2001 IBM
+ * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ *
+ * These routines maintain argument size conversion between 32bit and 64bit
+ * environment.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/file.h>
+#include <linux/signal.h>
+#include <linux/resource.h>
+#include <linux/times.h>
+#include <linux/smp.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/shm.h>
+#include <linux/poll.h>
+#include <linux/personality.h>
+#include <linux/stat.h>
+#include <linux/mman.h>
+#include <linux/in.h>
+#include <linux/syscalls.h>
+#include <linux/unistd.h>
+#include <linux/sysctl.h>
+#include <linux/binfmts.h>
+#include <linux/security.h>
+#include <linux/compat.h>
+#include <linux/ptrace.h>
+#include <linux/elf.h>
+#include <linux/ipc.h>
+#include <linux/slab.h>
+
+#include <asm/ptrace.h>
+#include <asm/types.h>
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+#include <asm/time.h>
+#include <asm/mmu_context.h>
+#include <asm/ppc-pci.h>
+#include <asm/syscalls.h>
+#include <asm/switch_to.h>
+
+
+asmlinkage long ppc32_select(u32 n, compat_ulong_t __user *inp,
+ compat_ulong_t __user *outp, compat_ulong_t __user *exp,
+ compat_uptr_t tvp_x)
+{
+ /* sign extend n */
+ return compat_sys_select((int)n, inp, outp, exp, compat_ptr(tvp_x));
+}
+
+/* Note: it is necessary to treat option as an unsigned int,
+ * with the corresponding cast to a signed int to insure that the
+ * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
+ * and the register representation of a signed int (msr in 64-bit mode) is performed.
+ */
+asmlinkage long compat_sys_sysfs(u32 option, u32 arg1, u32 arg2)
+{
+ return sys_sysfs((int)option, arg1, arg2);
+}
+
+#ifdef CONFIG_SYSVIPC
+long compat_sys_ipc(u32 call, u32 first, u32 second, u32 third, compat_uptr_t ptr,
+ u32 fifth)
+{
+ int version;
+
+ version = call >> 16; /* hack for backward compatibility */
+ call &= 0xffff;
+
+ switch (call) {
+
+ case SEMTIMEDOP:
+ if (fifth)
+ /* sign extend semid */
+ return compat_sys_semtimedop((int)first,
+ compat_ptr(ptr), second,
+ compat_ptr(fifth));
+ /* else fall through for normal semop() */
+ case SEMOP:
+ /* struct sembuf is the same on 32 and 64bit :)) */
+ /* sign extend semid */
+ return sys_semtimedop((int)first, compat_ptr(ptr), second,
+ NULL);
+ case SEMGET:
+ /* sign extend key, nsems */
+ return sys_semget((int)first, (int)second, third);
+ case SEMCTL:
+ /* sign extend semid, semnum */
+ return compat_sys_semctl((int)first, (int)second, third,
+ compat_ptr(ptr));
+
+ case MSGSND:
+ /* sign extend msqid */
+ return compat_sys_msgsnd((int)first, (int)second, third,
+ compat_ptr(ptr));
+ case MSGRCV:
+ /* sign extend msqid, msgtyp */
+ return compat_sys_msgrcv((int)first, second, (int)fifth,
+ third, version, compat_ptr(ptr));
+ case MSGGET:
+ /* sign extend key */
+ return sys_msgget((int)first, second);
+ case MSGCTL:
+ /* sign extend msqid */
+ return compat_sys_msgctl((int)first, second, compat_ptr(ptr));
+
+ case SHMAT:
+ /* sign extend shmid */
+ return compat_sys_shmat((int)first, second, third, version,
+ compat_ptr(ptr));
+ case SHMDT:
+ return sys_shmdt(compat_ptr(ptr));
+ case SHMGET:
+ /* sign extend key_t */
+ return sys_shmget((int)first, second, third);
+ case SHMCTL:
+ /* sign extend shmid */
+ return compat_sys_shmctl((int)first, second, compat_ptr(ptr));
+
+ default:
+ return -ENOSYS;
+ }
+
+ return -ENOSYS;
+}
+#endif
+
+/* Note: it is necessary to treat out_fd and in_fd as unsigned ints,
+ * with the corresponding cast to a signed int to insure that the
+ * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
+ * and the register representation of a signed int (msr in 64-bit mode) is performed.
+ */
+asmlinkage long compat_sys_sendfile(u32 out_fd, u32 in_fd, compat_off_t __user * offset, u32 count)
+{
+ mm_segment_t old_fs = get_fs();
+ int ret;
+ off_t of;
+ off_t __user *up;
+
+ if (offset && get_user(of, offset))
+ return -EFAULT;
+
+ /* The __user pointer cast is valid because of the set_fs() */
+ set_fs(KERNEL_DS);
+ up = offset ? (off_t __user *) &of : NULL;
+ ret = sys_sendfile((int)out_fd, (int)in_fd, up, count);
+ set_fs(old_fs);
+
+ if (offset && put_user(of, offset))
+ return -EFAULT;
+
+ return ret;
+}
+
+asmlinkage int compat_sys_sendfile64(int out_fd, int in_fd, compat_loff_t __user *offset, s32 count)
+{
+ mm_segment_t old_fs = get_fs();
+ int ret;
+ loff_t lof;
+ loff_t __user *up;
+
+ if (offset && get_user(lof, offset))
+ return -EFAULT;
+
+ /* The __user pointer cast is valid because of the set_fs() */
+ set_fs(KERNEL_DS);
+ up = offset ? (loff_t __user *) &lof : NULL;
+ ret = sys_sendfile64(out_fd, in_fd, up, count);
+ set_fs(old_fs);
+
+ if (offset && put_user(lof, offset))
+ return -EFAULT;
+
+ return ret;
+}
+
+long compat_sys_execve(unsigned long a0, unsigned long a1, unsigned long a2,
+ unsigned long a3, unsigned long a4, unsigned long a5,
+ struct pt_regs *regs)
+{
+ int error;
+ char * filename;
+
+ filename = getname((char __user *) a0);
+ error = PTR_ERR(filename);
+ if (IS_ERR(filename))
+ goto out;
+ flush_fp_to_thread(current);
+ flush_altivec_to_thread(current);
+
+ error = compat_do_execve(filename, compat_ptr(a1), compat_ptr(a2), regs);
+
+ putname(filename);
+
+out:
+ return error;
+}
+
+/* Note: it is necessary to treat option as an unsigned int,
+ * with the corresponding cast to a signed int to insure that the
+ * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
+ * and the register representation of a signed int (msr in 64-bit mode) is performed.
+ */
+asmlinkage long compat_sys_prctl(u32 option, u32 arg2, u32 arg3, u32 arg4, u32 arg5)
+{
+ return sys_prctl((int)option,
+ (unsigned long) arg2,
+ (unsigned long) arg3,
+ (unsigned long) arg4,
+ (unsigned long) arg5);
+}
+
+/* Note: it is necessary to treat pid as an unsigned int,
+ * with the corresponding cast to a signed int to insure that the
+ * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
+ * and the register representation of a signed int (msr in 64-bit mode) is performed.
+ */
+asmlinkage long compat_sys_sched_rr_get_interval(u32 pid, struct compat_timespec __user *interval)
+{
+ struct timespec t;
+ int ret;
+ mm_segment_t old_fs = get_fs ();
+
+ /* The __user pointer cast is valid because of the set_fs() */
+ set_fs (KERNEL_DS);
+ ret = sys_sched_rr_get_interval((int)pid, (struct timespec __user *) &t);
+ set_fs (old_fs);
+ if (put_compat_timespec(&t, interval))
+ return -EFAULT;
+ return ret;
+}
+
+/* Note: it is necessary to treat mode as an unsigned int,
+ * with the corresponding cast to a signed int to insure that the
+ * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
+ * and the register representation of a signed int (msr in 64-bit mode) is performed.
+ */
+asmlinkage long compat_sys_access(const char __user * filename, u32 mode)
+{
+ return sys_access(filename, (int)mode);
+}
+
+
+/* Note: it is necessary to treat mode as an unsigned int,
+ * with the corresponding cast to a signed int to insure that the
+ * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
+ * and the register representation of a signed int (msr in 64-bit mode) is performed.
+ */
+asmlinkage long compat_sys_creat(const char __user * pathname, u32 mode)
+{
+ return sys_creat(pathname, (int)mode);
+}
+
+
+/* Note: it is necessary to treat pid and options as unsigned ints,
+ * with the corresponding cast to a signed int to insure that the
+ * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
+ * and the register representation of a signed int (msr in 64-bit mode) is performed.
+ */
+asmlinkage long compat_sys_waitpid(u32 pid, unsigned int __user * stat_addr, u32 options)
+{
+ return sys_waitpid((int)pid, stat_addr, (int)options);
+}
+
+
+/* Note: it is necessary to treat gidsetsize as an unsigned int,
+ * with the corresponding cast to a signed int to insure that the
+ * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
+ * and the register representation of a signed int (msr in 64-bit mode) is performed.
+ */
+asmlinkage long compat_sys_getgroups(u32 gidsetsize, gid_t __user *grouplist)
+{
+ return sys_getgroups((int)gidsetsize, grouplist);
+}
+
+
+/* Note: it is necessary to treat pid as an unsigned int,
+ * with the corresponding cast to a signed int to insure that the
+ * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
+ * and the register representation of a signed int (msr in 64-bit mode) is performed.
+ */
+asmlinkage long compat_sys_getpgid(u32 pid)
+{
+ return sys_getpgid((int)pid);
+}
+
+
+
+/* Note: it is necessary to treat pid as an unsigned int,
+ * with the corresponding cast to a signed int to insure that the
+ * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
+ * and the register representation of a signed int (msr in 64-bit mode) is performed.
+ */
+asmlinkage long compat_sys_getsid(u32 pid)
+{
+ return sys_getsid((int)pid);
+}
+
+
+/* Note: it is necessary to treat pid and sig as unsigned ints,
+ * with the corresponding cast to a signed int to insure that the
+ * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
+ * and the register representation of a signed int (msr in 64-bit mode) is performed.
+ */
+asmlinkage long compat_sys_kill(u32 pid, u32 sig)
+{
+ return sys_kill((int)pid, (int)sig);
+}
+
+
+/* Note: it is necessary to treat mode as an unsigned int,
+ * with the corresponding cast to a signed int to insure that the
+ * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
+ * and the register representation of a signed int (msr in 64-bit mode) is performed.
+ */
+asmlinkage long compat_sys_mkdir(const char __user * pathname, u32 mode)
+{
+ return sys_mkdir(pathname, (int)mode);
+}
+
+long compat_sys_nice(u32 increment)
+{
+ /* sign extend increment */
+ return sys_nice((int)increment);
+}
+
+off_t ppc32_lseek(unsigned int fd, u32 offset, unsigned int origin)
+{
+ /* sign extend n */
+ return sys_lseek(fd, (int)offset, origin);
+}
+
+long compat_sys_truncate(const char __user * path, u32 length)
+{
+ /* sign extend length */
+ return sys_truncate(path, (int)length);
+}
+
+long compat_sys_ftruncate(int fd, u32 length)
+{
+ /* sign extend length */
+ return sys_ftruncate(fd, (int)length);
+}
+
+/* Note: it is necessary to treat bufsiz as an unsigned int,
+ * with the corresponding cast to a signed int to insure that the
+ * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
+ * and the register representation of a signed int (msr in 64-bit mode) is performed.
+ */
+asmlinkage long compat_sys_readlink(const char __user * path, char __user * buf, u32 bufsiz)
+{
+ return sys_readlink(path, buf, (int)bufsiz);
+}
+
+/* Note: it is necessary to treat option as an unsigned int,
+ * with the corresponding cast to a signed int to insure that the
+ * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
+ * and the register representation of a signed int (msr in 64-bit mode) is performed.
+ */
+asmlinkage long compat_sys_sched_get_priority_max(u32 policy)
+{
+ return sys_sched_get_priority_max((int)policy);
+}
+
+
+/* Note: it is necessary to treat policy as an unsigned int,
+ * with the corresponding cast to a signed int to insure that the
+ * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
+ * and the register representation of a signed int (msr in 64-bit mode) is performed.
+ */
+asmlinkage long compat_sys_sched_get_priority_min(u32 policy)
+{
+ return sys_sched_get_priority_min((int)policy);
+}
+
+
+/* Note: it is necessary to treat pid as an unsigned int,
+ * with the corresponding cast to a signed int to insure that the
+ * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
+ * and the register representation of a signed int (msr in 64-bit mode) is performed.
+ */
+asmlinkage long compat_sys_sched_getparam(u32 pid, struct sched_param __user *param)
+{
+ return sys_sched_getparam((int)pid, param);
+}
+
+
+/* Note: it is necessary to treat pid as an unsigned int,
+ * with the corresponding cast to a signed int to insure that the
+ * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
+ * and the register representation of a signed int (msr in 64-bit mode) is performed.
+ */
+asmlinkage long compat_sys_sched_getscheduler(u32 pid)
+{
+ return sys_sched_getscheduler((int)pid);
+}
+
+
+/* Note: it is necessary to treat pid as an unsigned int,
+ * with the corresponding cast to a signed int to insure that the
+ * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
+ * and the register representation of a signed int (msr in 64-bit mode) is performed.
+ */
+asmlinkage long compat_sys_sched_setparam(u32 pid, struct sched_param __user *param)
+{
+ return sys_sched_setparam((int)pid, param);
+}
+
+
+/* Note: it is necessary to treat pid and policy as unsigned ints,
+ * with the corresponding cast to a signed int to insure that the
+ * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
+ * and the register representation of a signed int (msr in 64-bit mode) is performed.
+ */
+asmlinkage long compat_sys_sched_setscheduler(u32 pid, u32 policy, struct sched_param __user *param)
+{
+ return sys_sched_setscheduler((int)pid, (int)policy, param);
+}
+
+
+/* Note: it is necessary to treat len as an unsigned int,
+ * with the corresponding cast to a signed int to insure that the
+ * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
+ * and the register representation of a signed int (msr in 64-bit mode) is performed.
+ */
+asmlinkage long compat_sys_setdomainname(char __user *name, u32 len)
+{
+ return sys_setdomainname(name, (int)len);
+}
+
+
+/* Note: it is necessary to treat gidsetsize as an unsigned int,
+ * with the corresponding cast to a signed int to insure that the
+ * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
+ * and the register representation of a signed int (msr in 64-bit mode) is performed.
+ */
+asmlinkage long compat_sys_setgroups(u32 gidsetsize, gid_t __user *grouplist)
+{
+ return sys_setgroups((int)gidsetsize, grouplist);
+}
+
+
+asmlinkage long compat_sys_sethostname(char __user *name, u32 len)
+{
+ /* sign extend len */
+ return sys_sethostname(name, (int)len);
+}
+
+
+/* Note: it is necessary to treat pid and pgid as unsigned ints,
+ * with the corresponding cast to a signed int to insure that the
+ * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
+ * and the register representation of a signed int (msr in 64-bit mode) is performed.
+ */
+asmlinkage long compat_sys_setpgid(u32 pid, u32 pgid)
+{
+ return sys_setpgid((int)pid, (int)pgid);
+}
+
+long compat_sys_getpriority(u32 which, u32 who)
+{
+ /* sign extend which and who */
+ return sys_getpriority((int)which, (int)who);
+}
+
+long compat_sys_setpriority(u32 which, u32 who, u32 niceval)
+{
+ /* sign extend which, who and niceval */
+ return sys_setpriority((int)which, (int)who, (int)niceval);
+}
+
+long compat_sys_ioprio_get(u32 which, u32 who)
+{
+ /* sign extend which and who */
+ return sys_ioprio_get((int)which, (int)who);
+}
+
+long compat_sys_ioprio_set(u32 which, u32 who, u32 ioprio)
+{
+ /* sign extend which, who and ioprio */
+ return sys_ioprio_set((int)which, (int)who, (int)ioprio);
+}
+
+/* Note: it is necessary to treat newmask as an unsigned int,
+ * with the corresponding cast to a signed int to insure that the
+ * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
+ * and the register representation of a signed int (msr in 64-bit mode) is performed.
+ */
+asmlinkage long compat_sys_ssetmask(u32 newmask)
+{
+ return sys_ssetmask((int) newmask);
+}
+
+asmlinkage long compat_sys_syslog(u32 type, char __user * buf, u32 len)
+{
+ /* sign extend len */
+ return sys_syslog(type, buf, (int)len);
+}
+
+
+/* Note: it is necessary to treat mask as an unsigned int,
+ * with the corresponding cast to a signed int to insure that the
+ * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
+ * and the register representation of a signed int (msr in 64-bit mode) is performed.
+ */
+asmlinkage long compat_sys_umask(u32 mask)
+{
+ return sys_umask((int)mask);
+}
+
+unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
+ unsigned long prot, unsigned long flags,
+ unsigned long fd, unsigned long pgoff)
+{
+ /* This should remain 12 even if PAGE_SIZE changes */
+ return sys_mmap(addr, len, prot, flags, fd, pgoff << 12);
+}
+
+long compat_sys_tgkill(u32 tgid, u32 pid, int sig)
+{
+ /* sign extend tgid, pid */
+ return sys_tgkill((int)tgid, (int)pid, sig);
+}
+
+/*
+ * long long munging:
+ * The 32 bit ABI passes long longs in an odd even register pair.
+ */
+
+compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf, compat_size_t count,
+ u32 reg6, u32 poshi, u32 poslo)
+{
+ return sys_pread64(fd, ubuf, count, ((loff_t)poshi << 32) | poslo);
+}
+
+compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count,
+ u32 reg6, u32 poshi, u32 poslo)
+{
+ return sys_pwrite64(fd, ubuf, count, ((loff_t)poshi << 32) | poslo);
+}
+
+compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offhi, u32 offlo, u32 count)
+{
+ return sys_readahead(fd, ((loff_t)offhi << 32) | offlo, count);
+}
+
+asmlinkage int compat_sys_truncate64(const char __user * path, u32 reg4,
+ unsigned long high, unsigned long low)
+{
+ return sys_truncate(path, (high << 32) | low);
+}
+
+asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo,
+ u32 lenhi, u32 lenlo)
+{
+ return sys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo,
+ ((loff_t)lenhi << 32) | lenlo);
+}
+
+asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long high,
+ unsigned long low)
+{
+ return sys_ftruncate(fd, (high << 32) | low);
+}
+
+long ppc32_lookup_dcookie(u32 cookie_high, u32 cookie_low, char __user *buf,
+ size_t len)
+{
+ return sys_lookup_dcookie((u64)cookie_high << 32 | cookie_low,
+ buf, len);
+}
+
+long ppc32_fadvise64(int fd, u32 unused, u32 offset_high, u32 offset_low,
+ size_t len, int advice)
+{
+ return sys_fadvise64(fd, (u64)offset_high << 32 | offset_low, len,
+ advice);
+}
+
+asmlinkage long compat_sys_add_key(const char __user *_type,
+ const char __user *_description,
+ const void __user *_payload,
+ u32 plen,
+ u32 ringid)
+{
+ return sys_add_key(_type, _description, _payload, plen, ringid);
+}
+
+asmlinkage long compat_sys_request_key(const char __user *_type,
+ const char __user *_description,
+ const char __user *_callout_info,
+ u32 destringid)
+{
+ return sys_request_key(_type, _description, _callout_info, destringid);
+}
+
+asmlinkage long compat_sys_sync_file_range2(int fd, unsigned int flags,
+ unsigned offset_hi, unsigned offset_lo,
+ unsigned nbytes_hi, unsigned nbytes_lo)
+{
+ loff_t offset = ((loff_t)offset_hi << 32) | offset_lo;
+ loff_t nbytes = ((loff_t)nbytes_hi << 32) | nbytes_lo;
+
+ return sys_sync_file_range(fd, offset, nbytes, flags);
+}
+
+asmlinkage long compat_sys_fanotify_mark(int fanotify_fd, unsigned int flags,
+ unsigned mask_hi, unsigned mask_lo,
+ int dfd, const char __user *pathname)
+{
+ u64 mask = ((u64)mask_hi << 32) | mask_lo;
+ return sys_fanotify_mark(fanotify_fd, flags, mask, dfd, pathname);
+}
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
new file mode 100644
index 00000000..f2496f2f
--- /dev/null
+++ b/arch/powerpc/kernel/syscalls.c
@@ -0,0 +1,138 @@
+/*
+ * Implementation of various system calls for Linux/PowerPC
+ *
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Derived from "arch/i386/kernel/sys_i386.c"
+ * Adapted from the i386 version by Gary Thomas
+ * Modified by Cort Dougan (cort@cs.nmt.edu)
+ * and Paul Mackerras (paulus@cs.anu.edu.au).
+ *
+ * This file contains various random system calls that
+ * have a non-standard calling sequence on the Linux/PPC
+ * platform.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/syscalls.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/smp.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/shm.h>
+#include <linux/stat.h>
+#include <linux/mman.h>
+#include <linux/sys.h>
+#include <linux/ipc.h>
+#include <linux/utsname.h>
+#include <linux/file.h>
+#include <linux/init.h>
+#include <linux/personality.h>
+
+#include <asm/uaccess.h>
+#include <asm/syscalls.h>
+#include <asm/time.h>
+#include <asm/unistd.h>
+
+static inline unsigned long do_mmap2(unsigned long addr, size_t len,
+ unsigned long prot, unsigned long flags,
+ unsigned long fd, unsigned long off, int shift)
+{
+ unsigned long ret = -EINVAL;
+
+ if (!arch_validate_prot(prot))
+ goto out;
+
+ if (shift) {
+ if (off & ((1 << shift) - 1))
+ goto out;
+ off >>= shift;
+ }
+
+ ret = sys_mmap_pgoff(addr, len, prot, flags, fd, off);
+out:
+ return ret;
+}
+
+unsigned long sys_mmap2(unsigned long addr, size_t len,
+ unsigned long prot, unsigned long flags,
+ unsigned long fd, unsigned long pgoff)
+{
+ return do_mmap2(addr, len, prot, flags, fd, pgoff, PAGE_SHIFT-12);
+}
+
+unsigned long sys_mmap(unsigned long addr, size_t len,
+ unsigned long prot, unsigned long flags,
+ unsigned long fd, off_t offset)
+{
+ return do_mmap2(addr, len, prot, flags, fd, offset, PAGE_SHIFT);
+}
+
+#ifdef CONFIG_PPC32
+/*
+ * Due to some executables calling the wrong select we sometimes
+ * get wrong args. This determines how the args are being passed
+ * (a single ptr to them all args passed) then calls
+ * sys_select() with the appropriate args. -- Cort
+ */
+int
+ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp)
+{
+ if ( (unsigned long)n >= 4096 )
+ {
+ unsigned long __user *buffer = (unsigned long __user *)n;
+ if (!access_ok(VERIFY_READ, buffer, 5*sizeof(unsigned long))
+ || __get_user(n, buffer)
+ || __get_user(inp, ((fd_set __user * __user *)(buffer+1)))
+ || __get_user(outp, ((fd_set __user * __user *)(buffer+2)))
+ || __get_user(exp, ((fd_set __user * __user *)(buffer+3)))
+ || __get_user(tvp, ((struct timeval __user * __user *)(buffer+4))))
+ return -EFAULT;
+ }
+ return sys_select(n, inp, outp, exp, tvp);
+}
+#endif
+
+#ifdef CONFIG_PPC64
+long ppc64_personality(unsigned long personality)
+{
+ long ret;
+
+ if (personality(current->personality) == PER_LINUX32
+ && personality == PER_LINUX)
+ personality = PER_LINUX32;
+ ret = sys_personality(personality);
+ if (ret == PER_LINUX32)
+ ret = PER_LINUX;
+ return ret;
+}
+#endif
+
+long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
+ u32 len_high, u32 len_low)
+{
+ return sys_fadvise64(fd, (u64)offset_high << 32 | offset_low,
+ (u64)len_high << 32 | len_low, advice);
+}
+
+void do_show_syscall(unsigned long r3, unsigned long r4, unsigned long r5,
+ unsigned long r6, unsigned long r7, unsigned long r8,
+ struct pt_regs *regs)
+{
+ printk("syscall %ld(%lx, %lx, %lx, %lx, %lx, %lx) regs=%p current=%p"
+ " cpu=%d\n", regs->gpr[0], r3, r4, r5, r6, r7, r8, regs,
+ current, smp_processor_id());
+}
+
+void do_show_syscall_exit(unsigned long r3)
+{
+ printk(" -> %lx, current=%p cpu=%d\n", r3, current, smp_processor_id());
+}
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
new file mode 100644
index 00000000..3529446c
--- /dev/null
+++ b/arch/powerpc/kernel/sysfs.c
@@ -0,0 +1,666 @@
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/export.h>
+#include <linux/nodemask.h>
+#include <linux/cpumask.h>
+#include <linux/notifier.h>
+
+#include <asm/current.h>
+#include <asm/processor.h>
+#include <asm/cputable.h>
+#include <asm/hvcall.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/smp.h>
+#include <asm/pmc.h>
+
+#include "cacheinfo.h"
+
+#ifdef CONFIG_PPC64
+#include <asm/paca.h>
+#include <asm/lppaca.h>
+#endif
+
+static DEFINE_PER_CPU(struct cpu, cpu_devices);
+
+/*
+ * SMT snooze delay stuff, 64-bit only for now
+ */
+
+#ifdef CONFIG_PPC64
+
+/* Time in microseconds we delay before sleeping in the idle loop */
+DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 };
+
+static ssize_t store_smt_snooze_delay(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, dev);
+ ssize_t ret;
+ long snooze;
+
+ ret = sscanf(buf, "%ld", &snooze);
+ if (ret != 1)
+ return -EINVAL;
+
+ per_cpu(smt_snooze_delay, cpu->dev.id) = snooze;
+ update_smt_snooze_delay(snooze);
+
+ return count;
+}
+
+static ssize_t show_smt_snooze_delay(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, dev);
+
+ return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->dev.id));
+}
+
+static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
+ store_smt_snooze_delay);
+
+static int __init setup_smt_snooze_delay(char *str)
+{
+ unsigned int cpu;
+ long snooze;
+
+ if (!cpu_has_feature(CPU_FTR_SMT))
+ return 1;
+
+ snooze = simple_strtol(str, NULL, 10);
+ for_each_possible_cpu(cpu)
+ per_cpu(smt_snooze_delay, cpu) = snooze;
+
+ return 1;
+}
+__setup("smt-snooze-delay=", setup_smt_snooze_delay);
+
+#endif /* CONFIG_PPC64 */
+
+/*
+ * Enabling PMCs will slow partition context switch times so we only do
+ * it the first time we write to the PMCs.
+ */
+
+static DEFINE_PER_CPU(char, pmcs_enabled);
+
+void ppc_enable_pmcs(void)
+{
+ ppc_set_pmu_inuse(1);
+
+ /* Only need to enable them once */
+ if (__get_cpu_var(pmcs_enabled))
+ return;
+
+ __get_cpu_var(pmcs_enabled) = 1;
+
+ if (ppc_md.enable_pmcs)
+ ppc_md.enable_pmcs();
+}
+EXPORT_SYMBOL(ppc_enable_pmcs);
+
+#define SYSFS_PMCSETUP(NAME, ADDRESS) \
+static void read_##NAME(void *val) \
+{ \
+ *(unsigned long *)val = mfspr(ADDRESS); \
+} \
+static void write_##NAME(void *val) \
+{ \
+ ppc_enable_pmcs(); \
+ mtspr(ADDRESS, *(unsigned long *)val); \
+} \
+static ssize_t show_##NAME(struct device *dev, \
+ struct device_attribute *attr, \
+ char *buf) \
+{ \
+ struct cpu *cpu = container_of(dev, struct cpu, dev); \
+ unsigned long val; \
+ smp_call_function_single(cpu->dev.id, read_##NAME, &val, 1); \
+ return sprintf(buf, "%lx\n", val); \
+} \
+static ssize_t __used \
+ store_##NAME(struct device *dev, struct device_attribute *attr, \
+ const char *buf, size_t count) \
+{ \
+ struct cpu *cpu = container_of(dev, struct cpu, dev); \
+ unsigned long val; \
+ int ret = sscanf(buf, "%lx", &val); \
+ if (ret != 1) \
+ return -EINVAL; \
+ smp_call_function_single(cpu->dev.id, write_##NAME, &val, 1); \
+ return count; \
+}
+
+
+/* Let's define all possible registers, we'll only hook up the ones
+ * that are implemented on the current processor
+ */
+
+#if defined(CONFIG_PPC64)
+#define HAS_PPC_PMC_CLASSIC 1
+#define HAS_PPC_PMC_IBM 1
+#define HAS_PPC_PMC_PA6T 1
+#elif defined(CONFIG_6xx)
+#define HAS_PPC_PMC_CLASSIC 1
+#define HAS_PPC_PMC_IBM 1
+#define HAS_PPC_PMC_G4 1
+#endif
+
+
+#ifdef HAS_PPC_PMC_CLASSIC
+SYSFS_PMCSETUP(mmcr0, SPRN_MMCR0);
+SYSFS_PMCSETUP(mmcr1, SPRN_MMCR1);
+SYSFS_PMCSETUP(pmc1, SPRN_PMC1);
+SYSFS_PMCSETUP(pmc2, SPRN_PMC2);
+SYSFS_PMCSETUP(pmc3, SPRN_PMC3);
+SYSFS_PMCSETUP(pmc4, SPRN_PMC4);
+SYSFS_PMCSETUP(pmc5, SPRN_PMC5);
+SYSFS_PMCSETUP(pmc6, SPRN_PMC6);
+
+#ifdef HAS_PPC_PMC_G4
+SYSFS_PMCSETUP(mmcr2, SPRN_MMCR2);
+#endif
+
+#ifdef CONFIG_PPC64
+SYSFS_PMCSETUP(pmc7, SPRN_PMC7);
+SYSFS_PMCSETUP(pmc8, SPRN_PMC8);
+
+SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
+SYSFS_PMCSETUP(purr, SPRN_PURR);
+SYSFS_PMCSETUP(spurr, SPRN_SPURR);
+SYSFS_PMCSETUP(dscr, SPRN_DSCR);
+SYSFS_PMCSETUP(pir, SPRN_PIR);
+
+static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
+static DEVICE_ATTR(spurr, 0600, show_spurr, NULL);
+static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr);
+static DEVICE_ATTR(purr, 0600, show_purr, store_purr);
+static DEVICE_ATTR(pir, 0400, show_pir, NULL);
+
+unsigned long dscr_default = 0;
+EXPORT_SYMBOL(dscr_default);
+
+static ssize_t show_dscr_default(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%lx\n", dscr_default);
+}
+
+static ssize_t __used store_dscr_default(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ unsigned long val;
+ int ret = 0;
+
+ ret = sscanf(buf, "%lx", &val);
+ if (ret != 1)
+ return -EINVAL;
+ dscr_default = val;
+
+ return count;
+}
+
+static DEVICE_ATTR(dscr_default, 0600,
+ show_dscr_default, store_dscr_default);
+
+static void sysfs_create_dscr_default(void)
+{
+ int err = 0;
+ if (cpu_has_feature(CPU_FTR_DSCR))
+ err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default);
+}
+#endif /* CONFIG_PPC64 */
+
+#ifdef HAS_PPC_PMC_PA6T
+SYSFS_PMCSETUP(pa6t_pmc0, SPRN_PA6T_PMC0);
+SYSFS_PMCSETUP(pa6t_pmc1, SPRN_PA6T_PMC1);
+SYSFS_PMCSETUP(pa6t_pmc2, SPRN_PA6T_PMC2);
+SYSFS_PMCSETUP(pa6t_pmc3, SPRN_PA6T_PMC3);
+SYSFS_PMCSETUP(pa6t_pmc4, SPRN_PA6T_PMC4);
+SYSFS_PMCSETUP(pa6t_pmc5, SPRN_PA6T_PMC5);
+#ifdef CONFIG_DEBUG_KERNEL
+SYSFS_PMCSETUP(hid0, SPRN_HID0);
+SYSFS_PMCSETUP(hid1, SPRN_HID1);
+SYSFS_PMCSETUP(hid4, SPRN_HID4);
+SYSFS_PMCSETUP(hid5, SPRN_HID5);
+SYSFS_PMCSETUP(ima0, SPRN_PA6T_IMA0);
+SYSFS_PMCSETUP(ima1, SPRN_PA6T_IMA1);
+SYSFS_PMCSETUP(ima2, SPRN_PA6T_IMA2);
+SYSFS_PMCSETUP(ima3, SPRN_PA6T_IMA3);
+SYSFS_PMCSETUP(ima4, SPRN_PA6T_IMA4);
+SYSFS_PMCSETUP(ima5, SPRN_PA6T_IMA5);
+SYSFS_PMCSETUP(ima6, SPRN_PA6T_IMA6);
+SYSFS_PMCSETUP(ima7, SPRN_PA6T_IMA7);
+SYSFS_PMCSETUP(ima8, SPRN_PA6T_IMA8);
+SYSFS_PMCSETUP(ima9, SPRN_PA6T_IMA9);
+SYSFS_PMCSETUP(imaat, SPRN_PA6T_IMAAT);
+SYSFS_PMCSETUP(btcr, SPRN_PA6T_BTCR);
+SYSFS_PMCSETUP(pccr, SPRN_PA6T_PCCR);
+SYSFS_PMCSETUP(rpccr, SPRN_PA6T_RPCCR);
+SYSFS_PMCSETUP(der, SPRN_PA6T_DER);
+SYSFS_PMCSETUP(mer, SPRN_PA6T_MER);
+SYSFS_PMCSETUP(ber, SPRN_PA6T_BER);
+SYSFS_PMCSETUP(ier, SPRN_PA6T_IER);
+SYSFS_PMCSETUP(sier, SPRN_PA6T_SIER);
+SYSFS_PMCSETUP(siar, SPRN_PA6T_SIAR);
+SYSFS_PMCSETUP(tsr0, SPRN_PA6T_TSR0);
+SYSFS_PMCSETUP(tsr1, SPRN_PA6T_TSR1);
+SYSFS_PMCSETUP(tsr2, SPRN_PA6T_TSR2);
+SYSFS_PMCSETUP(tsr3, SPRN_PA6T_TSR3);
+#endif /* CONFIG_DEBUG_KERNEL */
+#endif /* HAS_PPC_PMC_PA6T */
+
+#ifdef HAS_PPC_PMC_IBM
+static struct device_attribute ibm_common_attrs[] = {
+ __ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0),
+ __ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1),
+};
+#endif /* HAS_PPC_PMC_G4 */
+
+#ifdef HAS_PPC_PMC_G4
+static struct device_attribute g4_common_attrs[] = {
+ __ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0),
+ __ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1),
+ __ATTR(mmcr2, 0600, show_mmcr2, store_mmcr2),
+};
+#endif /* HAS_PPC_PMC_G4 */
+
+static struct device_attribute classic_pmc_attrs[] = {
+ __ATTR(pmc1, 0600, show_pmc1, store_pmc1),
+ __ATTR(pmc2, 0600, show_pmc2, store_pmc2),
+ __ATTR(pmc3, 0600, show_pmc3, store_pmc3),
+ __ATTR(pmc4, 0600, show_pmc4, store_pmc4),
+ __ATTR(pmc5, 0600, show_pmc5, store_pmc5),
+ __ATTR(pmc6, 0600, show_pmc6, store_pmc6),
+#ifdef CONFIG_PPC64
+ __ATTR(pmc7, 0600, show_pmc7, store_pmc7),
+ __ATTR(pmc8, 0600, show_pmc8, store_pmc8),
+#endif
+};
+
+#ifdef HAS_PPC_PMC_PA6T
+static struct device_attribute pa6t_attrs[] = {
+ __ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0),
+ __ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1),
+ __ATTR(pmc0, 0600, show_pa6t_pmc0, store_pa6t_pmc0),
+ __ATTR(pmc1, 0600, show_pa6t_pmc1, store_pa6t_pmc1),
+ __ATTR(pmc2, 0600, show_pa6t_pmc2, store_pa6t_pmc2),
+ __ATTR(pmc3, 0600, show_pa6t_pmc3, store_pa6t_pmc3),
+ __ATTR(pmc4, 0600, show_pa6t_pmc4, store_pa6t_pmc4),
+ __ATTR(pmc5, 0600, show_pa6t_pmc5, store_pa6t_pmc5),
+#ifdef CONFIG_DEBUG_KERNEL
+ __ATTR(hid0, 0600, show_hid0, store_hid0),
+ __ATTR(hid1, 0600, show_hid1, store_hid1),
+ __ATTR(hid4, 0600, show_hid4, store_hid4),
+ __ATTR(hid5, 0600, show_hid5, store_hid5),
+ __ATTR(ima0, 0600, show_ima0, store_ima0),
+ __ATTR(ima1, 0600, show_ima1, store_ima1),
+ __ATTR(ima2, 0600, show_ima2, store_ima2),
+ __ATTR(ima3, 0600, show_ima3, store_ima3),
+ __ATTR(ima4, 0600, show_ima4, store_ima4),
+ __ATTR(ima5, 0600, show_ima5, store_ima5),
+ __ATTR(ima6, 0600, show_ima6, store_ima6),
+ __ATTR(ima7, 0600, show_ima7, store_ima7),
+ __ATTR(ima8, 0600, show_ima8, store_ima8),
+ __ATTR(ima9, 0600, show_ima9, store_ima9),
+ __ATTR(imaat, 0600, show_imaat, store_imaat),
+ __ATTR(btcr, 0600, show_btcr, store_btcr),
+ __ATTR(pccr, 0600, show_pccr, store_pccr),
+ __ATTR(rpccr, 0600, show_rpccr, store_rpccr),
+ __ATTR(der, 0600, show_der, store_der),
+ __ATTR(mer, 0600, show_mer, store_mer),
+ __ATTR(ber, 0600, show_ber, store_ber),
+ __ATTR(ier, 0600, show_ier, store_ier),
+ __ATTR(sier, 0600, show_sier, store_sier),
+ __ATTR(siar, 0600, show_siar, store_siar),
+ __ATTR(tsr0, 0600, show_tsr0, store_tsr0),
+ __ATTR(tsr1, 0600, show_tsr1, store_tsr1),
+ __ATTR(tsr2, 0600, show_tsr2, store_tsr2),
+ __ATTR(tsr3, 0600, show_tsr3, store_tsr3),
+#endif /* CONFIG_DEBUG_KERNEL */
+};
+#endif /* HAS_PPC_PMC_PA6T */
+#endif /* HAS_PPC_PMC_CLASSIC */
+
+static void __cpuinit register_cpu_online(unsigned int cpu)
+{
+ struct cpu *c = &per_cpu(cpu_devices, cpu);
+ struct device *s = &c->dev;
+ struct device_attribute *attrs, *pmc_attrs;
+ int i, nattrs;
+
+#ifdef CONFIG_PPC64
+ if (cpu_has_feature(CPU_FTR_SMT))
+ device_create_file(s, &dev_attr_smt_snooze_delay);
+#endif
+
+ /* PMC stuff */
+ switch (cur_cpu_spec->pmc_type) {
+#ifdef HAS_PPC_PMC_IBM
+ case PPC_PMC_IBM:
+ attrs = ibm_common_attrs;
+ nattrs = sizeof(ibm_common_attrs) / sizeof(struct device_attribute);
+ pmc_attrs = classic_pmc_attrs;
+ break;
+#endif /* HAS_PPC_PMC_IBM */
+#ifdef HAS_PPC_PMC_G4
+ case PPC_PMC_G4:
+ attrs = g4_common_attrs;
+ nattrs = sizeof(g4_common_attrs) / sizeof(struct device_attribute);
+ pmc_attrs = classic_pmc_attrs;
+ break;
+#endif /* HAS_PPC_PMC_G4 */
+#ifdef HAS_PPC_PMC_PA6T
+ case PPC_PMC_PA6T:
+ /* PA Semi starts counting at PMC0 */
+ attrs = pa6t_attrs;
+ nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute);
+ pmc_attrs = NULL;
+ break;
+#endif /* HAS_PPC_PMC_PA6T */
+ default:
+ attrs = NULL;
+ nattrs = 0;
+ pmc_attrs = NULL;
+ }
+
+ for (i = 0; i < nattrs; i++)
+ device_create_file(s, &attrs[i]);
+
+ if (pmc_attrs)
+ for (i = 0; i < cur_cpu_spec->num_pmcs; i++)
+ device_create_file(s, &pmc_attrs[i]);
+
+#ifdef CONFIG_PPC64
+ if (cpu_has_feature(CPU_FTR_MMCRA))
+ device_create_file(s, &dev_attr_mmcra);
+
+ if (cpu_has_feature(CPU_FTR_PURR))
+ device_create_file(s, &dev_attr_purr);
+
+ if (cpu_has_feature(CPU_FTR_SPURR))
+ device_create_file(s, &dev_attr_spurr);
+
+ if (cpu_has_feature(CPU_FTR_DSCR))
+ device_create_file(s, &dev_attr_dscr);
+
+ if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
+ device_create_file(s, &dev_attr_pir);
+#endif /* CONFIG_PPC64 */
+
+ cacheinfo_cpu_online(cpu);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void unregister_cpu_online(unsigned int cpu)
+{
+ struct cpu *c = &per_cpu(cpu_devices, cpu);
+ struct device *s = &c->dev;
+ struct device_attribute *attrs, *pmc_attrs;
+ int i, nattrs;
+
+ BUG_ON(!c->hotpluggable);
+
+#ifdef CONFIG_PPC64
+ if (cpu_has_feature(CPU_FTR_SMT))
+ device_remove_file(s, &dev_attr_smt_snooze_delay);
+#endif
+
+ /* PMC stuff */
+ switch (cur_cpu_spec->pmc_type) {
+#ifdef HAS_PPC_PMC_IBM
+ case PPC_PMC_IBM:
+ attrs = ibm_common_attrs;
+ nattrs = sizeof(ibm_common_attrs) / sizeof(struct device_attribute);
+ pmc_attrs = classic_pmc_attrs;
+ break;
+#endif /* HAS_PPC_PMC_IBM */
+#ifdef HAS_PPC_PMC_G4
+ case PPC_PMC_G4:
+ attrs = g4_common_attrs;
+ nattrs = sizeof(g4_common_attrs) / sizeof(struct device_attribute);
+ pmc_attrs = classic_pmc_attrs;
+ break;
+#endif /* HAS_PPC_PMC_G4 */
+#ifdef HAS_PPC_PMC_PA6T
+ case PPC_PMC_PA6T:
+ /* PA Semi starts counting at PMC0 */
+ attrs = pa6t_attrs;
+ nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute);
+ pmc_attrs = NULL;
+ break;
+#endif /* HAS_PPC_PMC_PA6T */
+ default:
+ attrs = NULL;
+ nattrs = 0;
+ pmc_attrs = NULL;
+ }
+
+ for (i = 0; i < nattrs; i++)
+ device_remove_file(s, &attrs[i]);
+
+ if (pmc_attrs)
+ for (i = 0; i < cur_cpu_spec->num_pmcs; i++)
+ device_remove_file(s, &pmc_attrs[i]);
+
+#ifdef CONFIG_PPC64
+ if (cpu_has_feature(CPU_FTR_MMCRA))
+ device_remove_file(s, &dev_attr_mmcra);
+
+ if (cpu_has_feature(CPU_FTR_PURR))
+ device_remove_file(s, &dev_attr_purr);
+
+ if (cpu_has_feature(CPU_FTR_SPURR))
+ device_remove_file(s, &dev_attr_spurr);
+
+ if (cpu_has_feature(CPU_FTR_DSCR))
+ device_remove_file(s, &dev_attr_dscr);
+
+ if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
+ device_remove_file(s, &dev_attr_pir);
+#endif /* CONFIG_PPC64 */
+
+ cacheinfo_cpu_offline(cpu);
+}
+
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+ssize_t arch_cpu_probe(const char *buf, size_t count)
+{
+ if (ppc_md.cpu_probe)
+ return ppc_md.cpu_probe(buf, count);
+
+ return -EINVAL;
+}
+
+ssize_t arch_cpu_release(const char *buf, size_t count)
+{
+ if (ppc_md.cpu_release)
+ return ppc_md.cpu_release(buf, count);
+
+ return -EINVAL;
+}
+#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
+
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static int __cpuinit sysfs_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned int)(long)hcpu;
+
+ switch (action) {
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ register_cpu_online(cpu);
+ break;
+#ifdef CONFIG_HOTPLUG_CPU
+ case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
+ unregister_cpu_online(cpu);
+ break;
+#endif
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata sysfs_cpu_nb = {
+ .notifier_call = sysfs_cpu_notify,
+};
+
+static DEFINE_MUTEX(cpu_mutex);
+
+int cpu_add_dev_attr(struct device_attribute *attr)
+{
+ int cpu;
+
+ mutex_lock(&cpu_mutex);
+
+ for_each_possible_cpu(cpu) {
+ device_create_file(get_cpu_device(cpu), attr);
+ }
+
+ mutex_unlock(&cpu_mutex);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cpu_add_dev_attr);
+
+int cpu_add_dev_attr_group(struct attribute_group *attrs)
+{
+ int cpu;
+ struct device *dev;
+ int ret;
+
+ mutex_lock(&cpu_mutex);
+
+ for_each_possible_cpu(cpu) {
+ dev = get_cpu_device(cpu);
+ ret = sysfs_create_group(&dev->kobj, attrs);
+ WARN_ON(ret != 0);
+ }
+
+ mutex_unlock(&cpu_mutex);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cpu_add_dev_attr_group);
+
+
+void cpu_remove_dev_attr(struct device_attribute *attr)
+{
+ int cpu;
+
+ mutex_lock(&cpu_mutex);
+
+ for_each_possible_cpu(cpu) {
+ device_remove_file(get_cpu_device(cpu), attr);
+ }
+
+ mutex_unlock(&cpu_mutex);
+}
+EXPORT_SYMBOL_GPL(cpu_remove_dev_attr);
+
+void cpu_remove_dev_attr_group(struct attribute_group *attrs)
+{
+ int cpu;
+ struct device *dev;
+
+ mutex_lock(&cpu_mutex);
+
+ for_each_possible_cpu(cpu) {
+ dev = get_cpu_device(cpu);
+ sysfs_remove_group(&dev->kobj, attrs);
+ }
+
+ mutex_unlock(&cpu_mutex);
+}
+EXPORT_SYMBOL_GPL(cpu_remove_dev_attr_group);
+
+
+/* NUMA stuff */
+
+#ifdef CONFIG_NUMA
+static void register_nodes(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_NUMNODES; i++)
+ register_one_node(i);
+}
+
+int sysfs_add_device_to_node(struct device *dev, int nid)
+{
+ struct node *node = &node_devices[nid];
+ return sysfs_create_link(&node->dev.kobj, &dev->kobj,
+ kobject_name(&dev->kobj));
+}
+EXPORT_SYMBOL_GPL(sysfs_add_device_to_node);
+
+void sysfs_remove_device_from_node(struct device *dev, int nid)
+{
+ struct node *node = &node_devices[nid];
+ sysfs_remove_link(&node->dev.kobj, kobject_name(&dev->kobj));
+}
+EXPORT_SYMBOL_GPL(sysfs_remove_device_from_node);
+
+#else
+static void register_nodes(void)
+{
+ return;
+}
+
+#endif
+
+/* Only valid if CPU is present. */
+static ssize_t show_physical_id(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, dev);
+
+ return sprintf(buf, "%d\n", get_hard_smp_processor_id(cpu->dev.id));
+}
+static DEVICE_ATTR(physical_id, 0444, show_physical_id, NULL);
+
+static int __init topology_init(void)
+{
+ int cpu;
+
+ register_nodes();
+ register_cpu_notifier(&sysfs_cpu_nb);
+
+ for_each_possible_cpu(cpu) {
+ struct cpu *c = &per_cpu(cpu_devices, cpu);
+
+ /*
+ * For now, we just see if the system supports making
+ * the RTAS calls for CPU hotplug. But, there may be a
+ * more comprehensive way to do this for an individual
+ * CPU. For instance, the boot cpu might never be valid
+ * for hotplugging.
+ */
+ if (ppc_md.cpu_die)
+ c->hotpluggable = 1;
+
+ if (cpu_online(cpu) || c->hotpluggable) {
+ register_cpu(c, cpu);
+
+ device_create_file(&c->dev, &dev_attr_physical_id);
+ }
+
+ if (cpu_online(cpu))
+ register_cpu_online(cpu);
+ }
+#ifdef CONFIG_PPC64
+ sysfs_create_dscr_default();
+#endif /* CONFIG_PPC64 */
+
+ return 0;
+}
+subsys_initcall(topology_init);
diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
new file mode 100644
index 00000000..93219c34
--- /dev/null
+++ b/arch/powerpc/kernel/systbl.S
@@ -0,0 +1,47 @@
+/*
+ * This file contains the table of syscall-handling functions.
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
+ * and Paul Mackerras.
+ *
+ * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com)
+ * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/ppc_asm.h>
+
+#ifdef CONFIG_PPC64
+#define SYSCALL(func) .llong .sys_##func,.sys_##func
+#define COMPAT_SYS(func) .llong .sys_##func,.compat_sys_##func
+#define PPC_SYS(func) .llong .ppc_##func,.ppc_##func
+#define OLDSYS(func) .llong .sys_ni_syscall,.sys_ni_syscall
+#define SYS32ONLY(func) .llong .sys_ni_syscall,.compat_sys_##func
+#define SYSX(f, f3264, f32) .llong .f,.f3264
+#else
+#define SYSCALL(func) .long sys_##func
+#define COMPAT_SYS(func) .long sys_##func
+#define PPC_SYS(func) .long ppc_##func
+#define OLDSYS(func) .long sys_##func
+#define SYS32ONLY(func) .long sys_##func
+#define SYSX(f, f3264, f32) .long f32
+#endif
+#define SYSCALL_SPU(func) SYSCALL(func)
+#define COMPAT_SYS_SPU(func) COMPAT_SYS(func)
+#define PPC_SYS_SPU(func) PPC_SYS(func)
+#define SYSX_SPU(f, f3264, f32) SYSX(f, f3264, f32)
+
+#ifdef CONFIG_PPC64
+#define sys_sigpending sys_ni_syscall
+#define sys_old_getrlimit sys_ni_syscall
+
+ .p2align 3
+#endif
+
+_GLOBAL(sys_call_table)
+#include <asm/systbl.h>
diff --git a/arch/powerpc/kernel/systbl_chk.c b/arch/powerpc/kernel/systbl_chk.c
new file mode 100644
index 00000000..238aa63c
--- /dev/null
+++ b/arch/powerpc/kernel/systbl_chk.c
@@ -0,0 +1,58 @@
+/*
+ * This file, when run through CPP produces a list of syscall numbers
+ * in the order of systbl.h. That way we can check for gaps and syscalls
+ * that are out of order.
+ *
+ * Unfortunately, we cannot check for the correct ordering of entries
+ * using SYSX().
+ *
+ * Copyright © IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/unistd.h>
+
+#define SYSCALL(func) __NR_##func
+#define COMPAT_SYS(func) __NR_##func
+#define PPC_SYS(func) __NR_##func
+#ifdef CONFIG_PPC64
+#define OLDSYS(func) -1
+#define SYS32ONLY(func) -1
+#else
+#define OLDSYS(func) __NR_old##func
+#define SYS32ONLY(func) __NR_##func
+#endif
+#define SYSX(f, f3264, f32) -1
+
+#define SYSCALL_SPU(func) SYSCALL(func)
+#define COMPAT_SYS_SPU(func) COMPAT_SYS(func)
+#define PPC_SYS_SPU(func) PPC_SYS(func)
+#define SYSX_SPU(f, f3264, f32) SYSX(f, f3264, f32)
+
+/* Just insert a marker for ni_syscalls */
+#define __NR_ni_syscall -1
+
+/*
+ * These are the known exceptions.
+ * Hopefully, there will be no more.
+ */
+#define __NR_llseek __NR__llseek
+#undef __NR_umount
+#define __NR_umount __NR_umount2
+#define __NR_old_getrlimit __NR_getrlimit
+#define __NR_newstat __NR_stat
+#define __NR_newlstat __NR_lstat
+#define __NR_newfstat __NR_fstat
+#define __NR_newuname __NR_uname
+#define __NR_sysctl __NR__sysctl
+#define __NR_olddebug_setcontext __NR_sys_debug_setcontext
+
+/* We call sys_ugetrlimit for syscall number __NR_getrlimit */
+#define getrlimit ugetrlimit
+
+START_TABLE
+#include <asm/systbl.h>
+END_TABLE __NR_syscalls
diff --git a/arch/powerpc/kernel/systbl_chk.sh b/arch/powerpc/kernel/systbl_chk.sh
new file mode 100644
index 00000000..19415e76
--- /dev/null
+++ b/arch/powerpc/kernel/systbl_chk.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+#
+# Just process the CPP output from systbl_chk.c and complain
+# if anything is out of order.
+#
+# Copyright © 2008 IBM Corporation
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version
+# 2 of the License, or (at your option) any later version.
+
+awk 'BEGIN { num = -1; } # Ignore the beginning of the file
+ /^#/ { next; }
+ /^[ \t]*$/ { next; }
+ /^START_TABLE/ { num = 0; next; }
+ /^END_TABLE/ {
+ if (num != $2) {
+ printf "__NR_syscalls (%s) is not one more than the last syscall (%s)\n",
+ $2, num - 1;
+ exit(1);
+ }
+ num = -1; # Ignore the rest of the file
+ }
+ {
+ if (num == -1) next;
+ if (($1 != -1) && ($1 != num)) {
+ printf "Syscall %s out of order (expected %s)\n",
+ $1, num;
+ exit(1);
+ };
+ num++;
+ }' "$1"
diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
new file mode 100644
index 00000000..a753b72e
--- /dev/null
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -0,0 +1,270 @@
+/*
+ * temp.c Thermal management for cpu's with Thermal Assist Units
+ *
+ * Written by Troy Benjegerdes <hozer@drgw.net>
+ *
+ * TODO:
+ * dynamic power management to limit peak CPU temp (using ICTC)
+ * calibration???
+ *
+ * Silly, crazy ideas: use cpu load (from scheduler) and ICTC to extend battery
+ * life in portables, and add a 'performance/watt' metric somewhere in /proc
+ */
+
+#include <linux/errno.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+
+#include <asm/io.h>
+#include <asm/reg.h>
+#include <asm/nvram.h>
+#include <asm/cache.h>
+#include <asm/8xx_immap.h>
+#include <asm/machdep.h>
+
+static struct tau_temp
+{
+ int interrupts;
+ unsigned char low;
+ unsigned char high;
+ unsigned char grew;
+} tau[NR_CPUS];
+
+struct timer_list tau_timer;
+
+#undef DEBUG
+
+/* TODO: put these in a /proc interface, with some sanity checks, and maybe
+ * dynamic adjustment to minimize # of interrupts */
+/* configurable values for step size and how much to expand the window when
+ * we get an interrupt. These are based on the limit that was out of range */
+#define step_size 2 /* step size when temp goes out of range */
+#define window_expand 1 /* expand the window by this much */
+/* configurable values for shrinking the window */
+#define shrink_timer 2*HZ /* period between shrinking the window */
+#define min_window 2 /* minimum window size, degrees C */
+
+void set_thresholds(unsigned long cpu)
+{
+#ifdef CONFIG_TAU_INT
+ /*
+ * setup THRM1,
+ * threshold, valid bit, enable interrupts, interrupt when below threshold
+ */
+ mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TIE | THRM1_TID);
+
+ /* setup THRM2,
+ * threshold, valid bit, enable interrupts, interrupt when above threshold
+ */
+ mtspr (SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | THRM1_TIE);
+#else
+ /* same thing but don't enable interrupts */
+ mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TID);
+ mtspr(SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V);
+#endif
+}
+
+void TAUupdate(int cpu)
+{
+ unsigned thrm;
+
+#ifdef DEBUG
+ printk("TAUupdate ");
+#endif
+
+ /* if both thresholds are crossed, the step_sizes cancel out
+ * and the window winds up getting expanded twice. */
+ if((thrm = mfspr(SPRN_THRM1)) & THRM1_TIV){ /* is valid? */
+ if(thrm & THRM1_TIN){ /* crossed low threshold */
+ if (tau[cpu].low >= step_size){
+ tau[cpu].low -= step_size;
+ tau[cpu].high -= (step_size - window_expand);
+ }
+ tau[cpu].grew = 1;
+#ifdef DEBUG
+ printk("low threshold crossed ");
+#endif
+ }
+ }
+ if((thrm = mfspr(SPRN_THRM2)) & THRM1_TIV){ /* is valid? */
+ if(thrm & THRM1_TIN){ /* crossed high threshold */
+ if (tau[cpu].high <= 127-step_size){
+ tau[cpu].low += (step_size - window_expand);
+ tau[cpu].high += step_size;
+ }
+ tau[cpu].grew = 1;
+#ifdef DEBUG
+ printk("high threshold crossed ");
+#endif
+ }
+ }
+
+#ifdef DEBUG
+ printk("grew = %d\n", tau[cpu].grew);
+#endif
+
+#ifndef CONFIG_TAU_INT /* tau_timeout will do this if not using interrupts */
+ set_thresholds(cpu);
+#endif
+
+}
+
+#ifdef CONFIG_TAU_INT
+/*
+ * TAU interrupts - called when we have a thermal assist unit interrupt
+ * with interrupts disabled
+ */
+
+void TAUException(struct pt_regs * regs)
+{
+ int cpu = smp_processor_id();
+
+ irq_enter();
+ tau[cpu].interrupts++;
+
+ TAUupdate(cpu);
+
+ irq_exit();
+}
+#endif /* CONFIG_TAU_INT */
+
+static void tau_timeout(void * info)
+{
+ int cpu;
+ unsigned long flags;
+ int size;
+ int shrink;
+
+ /* disabling interrupts *should* be okay */
+ local_irq_save(flags);
+ cpu = smp_processor_id();
+
+#ifndef CONFIG_TAU_INT
+ TAUupdate(cpu);
+#endif
+
+ size = tau[cpu].high - tau[cpu].low;
+ if (size > min_window && ! tau[cpu].grew) {
+ /* do an exponential shrink of half the amount currently over size */
+ shrink = (2 + size - min_window) / 4;
+ if (shrink) {
+ tau[cpu].low += shrink;
+ tau[cpu].high -= shrink;
+ } else { /* size must have been min_window + 1 */
+ tau[cpu].low += 1;
+#if 1 /* debug */
+ if ((tau[cpu].high - tau[cpu].low) != min_window){
+ printk(KERN_ERR "temp.c: line %d, logic error\n", __LINE__);
+ }
+#endif
+ }
+ }
+
+ tau[cpu].grew = 0;
+
+ set_thresholds(cpu);
+
+ /*
+ * Do the enable every time, since otherwise a bunch of (relatively)
+ * complex sleep code needs to be added. One mtspr every time
+ * tau_timeout is called is probably not a big deal.
+ *
+ * Enable thermal sensor and set up sample interval timer
+ * need 20 us to do the compare.. until a nice 'cpu_speed' function
+ * call is implemented, just assume a 500 mhz clock. It doesn't really
+ * matter if we take too long for a compare since it's all interrupt
+ * driven anyway.
+ *
+ * use a extra long time.. (60 us @ 500 mhz)
+ */
+ mtspr(SPRN_THRM3, THRM3_SITV(500*60) | THRM3_E);
+
+ local_irq_restore(flags);
+}
+
+static void tau_timeout_smp(unsigned long unused)
+{
+
+ /* schedule ourselves to be run again */
+ mod_timer(&tau_timer, jiffies + shrink_timer) ;
+ on_each_cpu(tau_timeout, NULL, 0);
+}
+
+/*
+ * setup the TAU
+ *
+ * Set things up to use THRM1 as a temperature lower bound, and THRM2 as an upper bound.
+ * Start off at zero
+ */
+
+int tau_initialized = 0;
+
+void __init TAU_init_smp(void * info)
+{
+ unsigned long cpu = smp_processor_id();
+
+ /* set these to a reasonable value and let the timer shrink the
+ * window */
+ tau[cpu].low = 5;
+ tau[cpu].high = 120;
+
+ set_thresholds(cpu);
+}
+
+int __init TAU_init(void)
+{
+ /* We assume in SMP that if one CPU has TAU support, they
+ * all have it --BenH
+ */
+ if (!cpu_has_feature(CPU_FTR_TAU)) {
+ printk("Thermal assist unit not available\n");
+ tau_initialized = 0;
+ return 1;
+ }
+
+
+ /* first, set up the window shrinking timer */
+ init_timer(&tau_timer);
+ tau_timer.function = tau_timeout_smp;
+ tau_timer.expires = jiffies + shrink_timer;
+ add_timer(&tau_timer);
+
+ on_each_cpu(TAU_init_smp, NULL, 0);
+
+ printk("Thermal assist unit ");
+#ifdef CONFIG_TAU_INT
+ printk("using interrupts, ");
+#else
+ printk("using timers, ");
+#endif
+ printk("shrink_timer: %d jiffies\n", shrink_timer);
+ tau_initialized = 1;
+
+ return 0;
+}
+
+__initcall(TAU_init);
+
+/*
+ * return current temp
+ */
+
+u32 cpu_temp_both(unsigned long cpu)
+{
+ return ((tau[cpu].high << 16) | tau[cpu].low);
+}
+
+int cpu_temp(unsigned long cpu)
+{
+ return ((tau[cpu].high + tau[cpu].low) / 2);
+}
+
+int tau_interrupts(unsigned long cpu)
+{
+ return (tau[cpu].interrupts);
+}
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
new file mode 100644
index 00000000..730e69cb
--- /dev/null
+++ b/arch/powerpc/kernel/time.c
@@ -0,0 +1,1036 @@
+/*
+ * Common time routines among all ppc machines.
+ *
+ * Written by Cort Dougan (cort@cs.nmt.edu) to merge
+ * Paul Mackerras' version and mine for PReP and Pmac.
+ * MPC8xx/MBX changes by Dan Malek (dmalek@jlc.net).
+ * Converted for 64-bit by Mike Corrigan (mikejc@us.ibm.com)
+ *
+ * First round of bugfixes by Gabriel Paubert (paubert@iram.es)
+ * to make clock more stable (2.4.0-test5). The only thing
+ * that this code assumes is that the timebases have been synchronized
+ * by firmware on SMP and are never stopped (never do sleep
+ * on SMP then, nap and doze are OK).
+ *
+ * Speeded up do_gettimeofday by getting rid of references to
+ * xtime (which required locks for consistency). (mikejc@us.ibm.com)
+ *
+ * TODO (not necessarily in this file):
+ * - improve precision and reproducibility of timebase frequency
+ * measurement at boot time.
+ * - for astronomical applications: add a new function to get
+ * non ambiguous timestamps even around leap seconds. This needs
+ * a new timestamp format and a good name.
+ *
+ * 1997-09-10 Updated NTP code according to technical memorandum Jan '96
+ * "A Kernel Model for Precision Timekeeping" by Dave Mills
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/timex.h>
+#include <linux/kernel_stat.h>
+#include <linux/time.h>
+#include <linux/init.h>
+#include <linux/profile.h>
+#include <linux/cpu.h>
+#include <linux/security.h>
+#include <linux/percpu.h>
+#include <linux/rtc.h>
+#include <linux/jiffies.h>
+#include <linux/posix-timers.h>
+#include <linux/irq.h>
+#include <linux/delay.h>
+#include <linux/irq_work.h>
+#include <asm/trace.h>
+
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/nvram.h>
+#include <asm/cache.h>
+#include <asm/machdep.h>
+#include <asm/uaccess.h>
+#include <asm/time.h>
+#include <asm/prom.h>
+#include <asm/irq.h>
+#include <asm/div64.h>
+#include <asm/smp.h>
+#include <asm/vdso_datapage.h>
+#include <asm/firmware.h>
+#include <asm/cputime.h>
+
+/* powerpc clocksource/clockevent code */
+
+#include <linux/clockchips.h>
+#include <linux/clocksource.h>
+
+static cycle_t rtc_read(struct clocksource *);
+static struct clocksource clocksource_rtc = {
+ .name = "rtc",
+ .rating = 400,
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ .mask = CLOCKSOURCE_MASK(64),
+ .read = rtc_read,
+};
+
+static cycle_t timebase_read(struct clocksource *);
+static struct clocksource clocksource_timebase = {
+ .name = "timebase",
+ .rating = 400,
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ .mask = CLOCKSOURCE_MASK(64),
+ .read = timebase_read,
+};
+
+#define DECREMENTER_MAX 0x7fffffff
+
+static int decrementer_set_next_event(unsigned long evt,
+ struct clock_event_device *dev);
+static void decrementer_set_mode(enum clock_event_mode mode,
+ struct clock_event_device *dev);
+
+static struct clock_event_device decrementer_clockevent = {
+ .name = "decrementer",
+ .rating = 200,
+ .irq = 0,
+ .set_next_event = decrementer_set_next_event,
+ .set_mode = decrementer_set_mode,
+ .features = CLOCK_EVT_FEAT_ONESHOT,
+};
+
+DEFINE_PER_CPU(u64, decrementers_next_tb);
+static DEFINE_PER_CPU(struct clock_event_device, decrementers);
+
+#define XSEC_PER_SEC (1024*1024)
+
+#ifdef CONFIG_PPC64
+#define SCALE_XSEC(xsec, max) (((xsec) * max) / XSEC_PER_SEC)
+#else
+/* compute ((xsec << 12) * max) >> 32 */
+#define SCALE_XSEC(xsec, max) mulhwu((xsec) << 12, max)
+#endif
+
+unsigned long tb_ticks_per_jiffy;
+unsigned long tb_ticks_per_usec = 100; /* sane default */
+EXPORT_SYMBOL(tb_ticks_per_usec);
+unsigned long tb_ticks_per_sec;
+EXPORT_SYMBOL(tb_ticks_per_sec); /* for cputime_t conversions */
+
+DEFINE_SPINLOCK(rtc_lock);
+EXPORT_SYMBOL_GPL(rtc_lock);
+
+static u64 tb_to_ns_scale __read_mostly;
+static unsigned tb_to_ns_shift __read_mostly;
+static u64 boot_tb __read_mostly;
+
+extern struct timezone sys_tz;
+static long timezone_offset;
+
+unsigned long ppc_proc_freq;
+EXPORT_SYMBOL_GPL(ppc_proc_freq);
+unsigned long ppc_tb_freq;
+EXPORT_SYMBOL_GPL(ppc_tb_freq);
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+/*
+ * Factors for converting from cputime_t (timebase ticks) to
+ * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds).
+ * These are all stored as 0.64 fixed-point binary fractions.
+ */
+u64 __cputime_jiffies_factor;
+EXPORT_SYMBOL(__cputime_jiffies_factor);
+u64 __cputime_usec_factor;
+EXPORT_SYMBOL(__cputime_usec_factor);
+u64 __cputime_sec_factor;
+EXPORT_SYMBOL(__cputime_sec_factor);
+u64 __cputime_clockt_factor;
+EXPORT_SYMBOL(__cputime_clockt_factor);
+DEFINE_PER_CPU(unsigned long, cputime_last_delta);
+DEFINE_PER_CPU(unsigned long, cputime_scaled_last_delta);
+
+cputime_t cputime_one_jiffy;
+
+void (*dtl_consumer)(struct dtl_entry *, u64);
+
+static void calc_cputime_factors(void)
+{
+ struct div_result res;
+
+ div128_by_32(HZ, 0, tb_ticks_per_sec, &res);
+ __cputime_jiffies_factor = res.result_low;
+ div128_by_32(1000000, 0, tb_ticks_per_sec, &res);
+ __cputime_usec_factor = res.result_low;
+ div128_by_32(1, 0, tb_ticks_per_sec, &res);
+ __cputime_sec_factor = res.result_low;
+ div128_by_32(USER_HZ, 0, tb_ticks_per_sec, &res);
+ __cputime_clockt_factor = res.result_low;
+}
+
+/*
+ * Read the SPURR on systems that have it, otherwise the PURR,
+ * or if that doesn't exist return the timebase value passed in.
+ */
+static u64 read_spurr(u64 tb)
+{
+ if (cpu_has_feature(CPU_FTR_SPURR))
+ return mfspr(SPRN_SPURR);
+ if (cpu_has_feature(CPU_FTR_PURR))
+ return mfspr(SPRN_PURR);
+ return tb;
+}
+
+#ifdef CONFIG_PPC_SPLPAR
+
+/*
+ * Scan the dispatch trace log and count up the stolen time.
+ * Should be called with interrupts disabled.
+ */
+static u64 scan_dispatch_log(u64 stop_tb)
+{
+ u64 i = local_paca->dtl_ridx;
+ struct dtl_entry *dtl = local_paca->dtl_curr;
+ struct dtl_entry *dtl_end = local_paca->dispatch_log_end;
+ struct lppaca *vpa = local_paca->lppaca_ptr;
+ u64 tb_delta;
+ u64 stolen = 0;
+ u64 dtb;
+
+ if (!dtl)
+ return 0;
+
+ if (i == vpa->dtl_idx)
+ return 0;
+ while (i < vpa->dtl_idx) {
+ if (dtl_consumer)
+ dtl_consumer(dtl, i);
+ dtb = dtl->timebase;
+ tb_delta = dtl->enqueue_to_dispatch_time +
+ dtl->ready_to_enqueue_time;
+ barrier();
+ if (i + N_DISPATCH_LOG < vpa->dtl_idx) {
+ /* buffer has overflowed */
+ i = vpa->dtl_idx - N_DISPATCH_LOG;
+ dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG);
+ continue;
+ }
+ if (dtb > stop_tb)
+ break;
+ stolen += tb_delta;
+ ++i;
+ ++dtl;
+ if (dtl == dtl_end)
+ dtl = local_paca->dispatch_log;
+ }
+ local_paca->dtl_ridx = i;
+ local_paca->dtl_curr = dtl;
+ return stolen;
+}
+
+/*
+ * Accumulate stolen time by scanning the dispatch trace log.
+ * Called on entry from user mode.
+ */
+void accumulate_stolen_time(void)
+{
+ u64 sst, ust;
+
+ u8 save_soft_enabled = local_paca->soft_enabled;
+
+ /* We are called early in the exception entry, before
+ * soft/hard_enabled are sync'ed to the expected state
+ * for the exception. We are hard disabled but the PACA
+ * needs to reflect that so various debug stuff doesn't
+ * complain
+ */
+ local_paca->soft_enabled = 0;
+
+ sst = scan_dispatch_log(local_paca->starttime_user);
+ ust = scan_dispatch_log(local_paca->starttime);
+ local_paca->system_time -= sst;
+ local_paca->user_time -= ust;
+ local_paca->stolen_time += ust + sst;
+
+ local_paca->soft_enabled = save_soft_enabled;
+}
+
+static inline u64 calculate_stolen_time(u64 stop_tb)
+{
+ u64 stolen = 0;
+
+ if (get_paca()->dtl_ridx != get_paca()->lppaca_ptr->dtl_idx) {
+ stolen = scan_dispatch_log(stop_tb);
+ get_paca()->system_time -= stolen;
+ }
+
+ stolen += get_paca()->stolen_time;
+ get_paca()->stolen_time = 0;
+ return stolen;
+}
+
+#else /* CONFIG_PPC_SPLPAR */
+static inline u64 calculate_stolen_time(u64 stop_tb)
+{
+ return 0;
+}
+
+#endif /* CONFIG_PPC_SPLPAR */
+
+/*
+ * Account time for a transition between system, hard irq
+ * or soft irq state.
+ */
+void account_system_vtime(struct task_struct *tsk)
+{
+ u64 now, nowscaled, delta, deltascaled;
+ unsigned long flags;
+ u64 stolen, udelta, sys_scaled, user_scaled;
+
+ local_irq_save(flags);
+ now = mftb();
+ nowscaled = read_spurr(now);
+ get_paca()->system_time += now - get_paca()->starttime;
+ get_paca()->starttime = now;
+ deltascaled = nowscaled - get_paca()->startspurr;
+ get_paca()->startspurr = nowscaled;
+
+ stolen = calculate_stolen_time(now);
+
+ delta = get_paca()->system_time;
+ get_paca()->system_time = 0;
+ udelta = get_paca()->user_time - get_paca()->utime_sspurr;
+ get_paca()->utime_sspurr = get_paca()->user_time;
+
+ /*
+ * Because we don't read the SPURR on every kernel entry/exit,
+ * deltascaled includes both user and system SPURR ticks.
+ * Apportion these ticks to system SPURR ticks and user
+ * SPURR ticks in the same ratio as the system time (delta)
+ * and user time (udelta) values obtained from the timebase
+ * over the same interval. The system ticks get accounted here;
+ * the user ticks get saved up in paca->user_time_scaled to be
+ * used by account_process_tick.
+ */
+ sys_scaled = delta;
+ user_scaled = udelta;
+ if (deltascaled != delta + udelta) {
+ if (udelta) {
+ sys_scaled = deltascaled * delta / (delta + udelta);
+ user_scaled = deltascaled - sys_scaled;
+ } else {
+ sys_scaled = deltascaled;
+ }
+ }
+ get_paca()->user_time_scaled += user_scaled;
+
+ if (in_interrupt() || idle_task(smp_processor_id()) != tsk) {
+ account_system_time(tsk, 0, delta, sys_scaled);
+ if (stolen)
+ account_steal_time(stolen);
+ } else {
+ account_idle_time(delta + stolen);
+ }
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(account_system_vtime);
+
+/*
+ * Transfer the user and system times accumulated in the paca
+ * by the exception entry and exit code to the generic process
+ * user and system time records.
+ * Must be called with interrupts disabled.
+ * Assumes that account_system_vtime() has been called recently
+ * (i.e. since the last entry from usermode) so that
+ * get_paca()->user_time_scaled is up to date.
+ */
+void account_process_tick(struct task_struct *tsk, int user_tick)
+{
+ cputime_t utime, utimescaled;
+
+ utime = get_paca()->user_time;
+ utimescaled = get_paca()->user_time_scaled;
+ get_paca()->user_time = 0;
+ get_paca()->user_time_scaled = 0;
+ get_paca()->utime_sspurr = 0;
+ account_user_time(tsk, utime, utimescaled);
+}
+
+#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */
+#define calc_cputime_factors()
+#endif
+
+void __delay(unsigned long loops)
+{
+ unsigned long start;
+ int diff;
+
+ if (__USE_RTC()) {
+ start = get_rtcl();
+ do {
+ /* the RTCL register wraps at 1000000000 */
+ diff = get_rtcl() - start;
+ if (diff < 0)
+ diff += 1000000000;
+ } while (diff < loops);
+ } else {
+ start = get_tbl();
+ while (get_tbl() - start < loops)
+ HMT_low();
+ HMT_medium();
+ }
+}
+EXPORT_SYMBOL(__delay);
+
+void udelay(unsigned long usecs)
+{
+ __delay(tb_ticks_per_usec * usecs);
+}
+EXPORT_SYMBOL(udelay);
+
+#ifdef CONFIG_SMP
+unsigned long profile_pc(struct pt_regs *regs)
+{
+ unsigned long pc = instruction_pointer(regs);
+
+ if (in_lock_functions(pc))
+ return regs->link;
+
+ return pc;
+}
+EXPORT_SYMBOL(profile_pc);
+#endif
+
+#ifdef CONFIG_IRQ_WORK
+
+/*
+ * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
+ */
+#ifdef CONFIG_PPC64
+static inline unsigned long test_irq_work_pending(void)
+{
+ unsigned long x;
+
+ asm volatile("lbz %0,%1(13)"
+ : "=r" (x)
+ : "i" (offsetof(struct paca_struct, irq_work_pending)));
+ return x;
+}
+
+static inline void set_irq_work_pending_flag(void)
+{
+ asm volatile("stb %0,%1(13)" : :
+ "r" (1),
+ "i" (offsetof(struct paca_struct, irq_work_pending)));
+}
+
+static inline void clear_irq_work_pending(void)
+{
+ asm volatile("stb %0,%1(13)" : :
+ "r" (0),
+ "i" (offsetof(struct paca_struct, irq_work_pending)));
+}
+
+#else /* 32-bit */
+
+DEFINE_PER_CPU(u8, irq_work_pending);
+
+#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1
+#define test_irq_work_pending() __get_cpu_var(irq_work_pending)
+#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0
+
+#endif /* 32 vs 64 bit */
+
+void arch_irq_work_raise(void)
+{
+ preempt_disable();
+ set_irq_work_pending_flag();
+ set_dec(1);
+ preempt_enable();
+}
+
+#else /* CONFIG_IRQ_WORK */
+
+#define test_irq_work_pending() 0
+#define clear_irq_work_pending()
+
+#endif /* CONFIG_IRQ_WORK */
+
+/*
+ * timer_interrupt - gets called when the decrementer overflows,
+ * with interrupts disabled.
+ */
+void timer_interrupt(struct pt_regs * regs)
+{
+ struct pt_regs *old_regs;
+ u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
+ struct clock_event_device *evt = &__get_cpu_var(decrementers);
+ u64 now;
+
+ /* Ensure a positive value is written to the decrementer, or else
+ * some CPUs will continue to take decrementer exceptions.
+ */
+ set_dec(DECREMENTER_MAX);
+
+ /* Some implementations of hotplug will get timer interrupts while
+ * offline, just ignore these
+ */
+ if (!cpu_online(smp_processor_id()))
+ return;
+
+ /* Conditionally hard-enable interrupts now that the DEC has been
+ * bumped to its maximum value
+ */
+ may_hard_irq_enable();
+
+ trace_timer_interrupt_entry(regs);
+
+ __get_cpu_var(irq_stat).timer_irqs++;
+
+#if defined(CONFIG_PPC32) && defined(CONFIG_PMAC)
+ if (atomic_read(&ppc_n_lost_interrupts) != 0)
+ do_IRQ(regs);
+#endif
+
+ old_regs = set_irq_regs(regs);
+ irq_enter();
+
+ if (test_irq_work_pending()) {
+ clear_irq_work_pending();
+ irq_work_run();
+ }
+
+ now = get_tb_or_rtc();
+ if (now >= *next_tb) {
+ *next_tb = ~(u64)0;
+ if (evt->event_handler)
+ evt->event_handler(evt);
+ } else {
+ now = *next_tb - now;
+ if (now <= DECREMENTER_MAX)
+ set_dec((int)now);
+ }
+
+#ifdef CONFIG_PPC64
+ /* collect purr register values often, for accurate calculations */
+ if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
+ cu->current_tb = mfspr(SPRN_PURR);
+ }
+#endif
+
+ irq_exit();
+ set_irq_regs(old_regs);
+
+ trace_timer_interrupt_exit(regs);
+}
+
+#ifdef CONFIG_SUSPEND
+static void generic_suspend_disable_irqs(void)
+{
+ /* Disable the decrementer, so that it doesn't interfere
+ * with suspending.
+ */
+
+ set_dec(DECREMENTER_MAX);
+ local_irq_disable();
+ set_dec(DECREMENTER_MAX);
+}
+
+static void generic_suspend_enable_irqs(void)
+{
+ local_irq_enable();
+}
+
+/* Overrides the weak version in kernel/power/main.c */
+void arch_suspend_disable_irqs(void)
+{
+ if (ppc_md.suspend_disable_irqs)
+ ppc_md.suspend_disable_irqs();
+ generic_suspend_disable_irqs();
+}
+
+/* Overrides the weak version in kernel/power/main.c */
+void arch_suspend_enable_irqs(void)
+{
+ generic_suspend_enable_irqs();
+ if (ppc_md.suspend_enable_irqs)
+ ppc_md.suspend_enable_irqs();
+}
+#endif
+
+/*
+ * Scheduler clock - returns current time in nanosec units.
+ *
+ * Note: mulhdu(a, b) (multiply high double unsigned) returns
+ * the high 64 bits of a * b, i.e. (a * b) >> 64, where a and b
+ * are 64-bit unsigned numbers.
+ */
+unsigned long long sched_clock(void)
+{
+ if (__USE_RTC())
+ return get_rtc();
+ return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift;
+}
+
+static int __init get_freq(char *name, int cells, unsigned long *val)
+{
+ struct device_node *cpu;
+ const unsigned int *fp;
+ int found = 0;
+
+ /* The cpu node should have timebase and clock frequency properties */
+ cpu = of_find_node_by_type(NULL, "cpu");
+
+ if (cpu) {
+ fp = of_get_property(cpu, name, NULL);
+ if (fp) {
+ found = 1;
+ *val = of_read_ulong(fp, cells);
+ }
+
+ of_node_put(cpu);
+ }
+
+ return found;
+}
+
+/* should become __cpuinit when secondary_cpu_time_init also is */
+void start_cpu_decrementer(void)
+{
+#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+ /* Clear any pending timer interrupts */
+ mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS);
+
+ /* Enable decrementer interrupt */
+ mtspr(SPRN_TCR, TCR_DIE);
+#endif /* defined(CONFIG_BOOKE) || defined(CONFIG_40x) */
+}
+
+void __init generic_calibrate_decr(void)
+{
+ ppc_tb_freq = DEFAULT_TB_FREQ; /* hardcoded default */
+
+ if (!get_freq("ibm,extended-timebase-frequency", 2, &ppc_tb_freq) &&
+ !get_freq("timebase-frequency", 1, &ppc_tb_freq)) {
+
+ printk(KERN_ERR "WARNING: Estimating decrementer frequency "
+ "(not found)\n");
+ }
+
+ ppc_proc_freq = DEFAULT_PROC_FREQ; /* hardcoded default */
+
+ if (!get_freq("ibm,extended-clock-frequency", 2, &ppc_proc_freq) &&
+ !get_freq("clock-frequency", 1, &ppc_proc_freq)) {
+
+ printk(KERN_ERR "WARNING: Estimating processor frequency "
+ "(not found)\n");
+ }
+}
+
+int update_persistent_clock(struct timespec now)
+{
+ struct rtc_time tm;
+
+ if (!ppc_md.set_rtc_time)
+ return 0;
+
+ to_tm(now.tv_sec + 1 + timezone_offset, &tm);
+ tm.tm_year -= 1900;
+ tm.tm_mon -= 1;
+
+ return ppc_md.set_rtc_time(&tm);
+}
+
+static void __read_persistent_clock(struct timespec *ts)
+{
+ struct rtc_time tm;
+ static int first = 1;
+
+ ts->tv_nsec = 0;
+ /* XXX this is a litle fragile but will work okay in the short term */
+ if (first) {
+ first = 0;
+ if (ppc_md.time_init)
+ timezone_offset = ppc_md.time_init();
+
+ /* get_boot_time() isn't guaranteed to be safe to call late */
+ if (ppc_md.get_boot_time) {
+ ts->tv_sec = ppc_md.get_boot_time() - timezone_offset;
+ return;
+ }
+ }
+ if (!ppc_md.get_rtc_time) {
+ ts->tv_sec = 0;
+ return;
+ }
+ ppc_md.get_rtc_time(&tm);
+
+ ts->tv_sec = mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday,
+ tm.tm_hour, tm.tm_min, tm.tm_sec);
+}
+
+void read_persistent_clock(struct timespec *ts)
+{
+ __read_persistent_clock(ts);
+
+ /* Sanitize it in case real time clock is set below EPOCH */
+ if (ts->tv_sec < 0) {
+ ts->tv_sec = 0;
+ ts->tv_nsec = 0;
+ }
+
+}
+
+/* clocksource code */
+static cycle_t rtc_read(struct clocksource *cs)
+{
+ return (cycle_t)get_rtc();
+}
+
+static cycle_t timebase_read(struct clocksource *cs)
+{
+ return (cycle_t)get_tb();
+}
+
+void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
+ struct clocksource *clock, u32 mult)
+{
+ u64 new_tb_to_xs, new_stamp_xsec;
+ u32 frac_sec;
+
+ if (clock != &clocksource_timebase)
+ return;
+
+ /* Make userspace gettimeofday spin until we're done. */
+ ++vdso_data->tb_update_count;
+ smp_mb();
+
+ /* 19342813113834067 ~= 2^(20+64) / 1e9 */
+ new_tb_to_xs = (u64) mult * (19342813113834067ULL >> clock->shift);
+ new_stamp_xsec = (u64) wall_time->tv_nsec * XSEC_PER_SEC;
+ do_div(new_stamp_xsec, 1000000000);
+ new_stamp_xsec += (u64) wall_time->tv_sec * XSEC_PER_SEC;
+
+ BUG_ON(wall_time->tv_nsec >= NSEC_PER_SEC);
+ /* this is tv_nsec / 1e9 as a 0.32 fraction */
+ frac_sec = ((u64) wall_time->tv_nsec * 18446744073ULL) >> 32;
+
+ /*
+ * tb_update_count is used to allow the userspace gettimeofday code
+ * to assure itself that it sees a consistent view of the tb_to_xs and
+ * stamp_xsec variables. It reads the tb_update_count, then reads
+ * tb_to_xs and stamp_xsec and then reads tb_update_count again. If
+ * the two values of tb_update_count match and are even then the
+ * tb_to_xs and stamp_xsec values are consistent. If not, then it
+ * loops back and reads them again until this criteria is met.
+ * We expect the caller to have done the first increment of
+ * vdso_data->tb_update_count already.
+ */
+ vdso_data->tb_orig_stamp = clock->cycle_last;
+ vdso_data->stamp_xsec = new_stamp_xsec;
+ vdso_data->tb_to_xs = new_tb_to_xs;
+ vdso_data->wtom_clock_sec = wtm->tv_sec;
+ vdso_data->wtom_clock_nsec = wtm->tv_nsec;
+ vdso_data->stamp_xtime = *wall_time;
+ vdso_data->stamp_sec_fraction = frac_sec;
+ smp_wmb();
+ ++(vdso_data->tb_update_count);
+}
+
+void update_vsyscall_tz(void)
+{
+ /* Make userspace gettimeofday spin until we're done. */
+ ++vdso_data->tb_update_count;
+ smp_mb();
+ vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
+ vdso_data->tz_dsttime = sys_tz.tz_dsttime;
+ smp_mb();
+ ++vdso_data->tb_update_count;
+}
+
+static void __init clocksource_init(void)
+{
+ struct clocksource *clock;
+
+ if (__USE_RTC())
+ clock = &clocksource_rtc;
+ else
+ clock = &clocksource_timebase;
+
+ if (clocksource_register_hz(clock, tb_ticks_per_sec)) {
+ printk(KERN_ERR "clocksource: %s is already registered\n",
+ clock->name);
+ return;
+ }
+
+ printk(KERN_INFO "clocksource: %s mult[%x] shift[%d] registered\n",
+ clock->name, clock->mult, clock->shift);
+}
+
+static int decrementer_set_next_event(unsigned long evt,
+ struct clock_event_device *dev)
+{
+ __get_cpu_var(decrementers_next_tb) = get_tb_or_rtc() + evt;
+ set_dec(evt);
+ return 0;
+}
+
+static void decrementer_set_mode(enum clock_event_mode mode,
+ struct clock_event_device *dev)
+{
+ if (mode != CLOCK_EVT_MODE_ONESHOT)
+ decrementer_set_next_event(DECREMENTER_MAX, dev);
+}
+
+static void register_decrementer_clockevent(int cpu)
+{
+ struct clock_event_device *dec = &per_cpu(decrementers, cpu);
+
+ *dec = decrementer_clockevent;
+ dec->cpumask = cpumask_of(cpu);
+
+ printk_once(KERN_DEBUG "clockevent: %s mult[%x] shift[%d] cpu[%d]\n",
+ dec->name, dec->mult, dec->shift, cpu);
+
+ clockevents_register_device(dec);
+}
+
+static void __init init_decrementer_clockevent(void)
+{
+ int cpu = smp_processor_id();
+
+ clockevents_calc_mult_shift(&decrementer_clockevent, ppc_tb_freq, 4);
+
+ decrementer_clockevent.max_delta_ns =
+ clockevent_delta2ns(DECREMENTER_MAX, &decrementer_clockevent);
+ decrementer_clockevent.min_delta_ns =
+ clockevent_delta2ns(2, &decrementer_clockevent);
+
+ register_decrementer_clockevent(cpu);
+}
+
+void secondary_cpu_time_init(void)
+{
+ /* Start the decrementer on CPUs that have manual control
+ * such as BookE
+ */
+ start_cpu_decrementer();
+
+ /* FIME: Should make unrelatred change to move snapshot_timebase
+ * call here ! */
+ register_decrementer_clockevent(smp_processor_id());
+}
+
+/* This function is only called on the boot processor */
+void __init time_init(void)
+{
+ struct div_result res;
+ u64 scale;
+ unsigned shift;
+
+ if (__USE_RTC()) {
+ /* 601 processor: dec counts down by 128 every 128ns */
+ ppc_tb_freq = 1000000000;
+ } else {
+ /* Normal PowerPC with timebase register */
+ ppc_md.calibrate_decr();
+ printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n",
+ ppc_tb_freq / 1000000, ppc_tb_freq % 1000000);
+ printk(KERN_DEBUG "time_init: processor frequency = %lu.%.6lu MHz\n",
+ ppc_proc_freq / 1000000, ppc_proc_freq % 1000000);
+ }
+
+ tb_ticks_per_jiffy = ppc_tb_freq / HZ;
+ tb_ticks_per_sec = ppc_tb_freq;
+ tb_ticks_per_usec = ppc_tb_freq / 1000000;
+ calc_cputime_factors();
+ setup_cputime_one_jiffy();
+
+ /*
+ * Compute scale factor for sched_clock.
+ * The calibrate_decr() function has set tb_ticks_per_sec,
+ * which is the timebase frequency.
+ * We compute 1e9 * 2^64 / tb_ticks_per_sec and interpret
+ * the 128-bit result as a 64.64 fixed-point number.
+ * We then shift that number right until it is less than 1.0,
+ * giving us the scale factor and shift count to use in
+ * sched_clock().
+ */
+ div128_by_32(1000000000, 0, tb_ticks_per_sec, &res);
+ scale = res.result_low;
+ for (shift = 0; res.result_high != 0; ++shift) {
+ scale = (scale >> 1) | (res.result_high << 63);
+ res.result_high >>= 1;
+ }
+ tb_to_ns_scale = scale;
+ tb_to_ns_shift = shift;
+ /* Save the current timebase to pretty up CONFIG_PRINTK_TIME */
+ boot_tb = get_tb_or_rtc();
+
+ /* If platform provided a timezone (pmac), we correct the time */
+ if (timezone_offset) {
+ sys_tz.tz_minuteswest = -timezone_offset / 60;
+ sys_tz.tz_dsttime = 0;
+ }
+
+ vdso_data->tb_update_count = 0;
+ vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
+
+ /* Start the decrementer on CPUs that have manual control
+ * such as BookE
+ */
+ start_cpu_decrementer();
+
+ /* Register the clocksource */
+ clocksource_init();
+
+ init_decrementer_clockevent();
+}
+
+
+#define FEBRUARY 2
+#define STARTOFTIME 1970
+#define SECDAY 86400L
+#define SECYR (SECDAY * 365)
+#define leapyear(year) ((year) % 4 == 0 && \
+ ((year) % 100 != 0 || (year) % 400 == 0))
+#define days_in_year(a) (leapyear(a) ? 366 : 365)
+#define days_in_month(a) (month_days[(a) - 1])
+
+static int month_days[12] = {
+ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
+};
+
+/*
+ * This only works for the Gregorian calendar - i.e. after 1752 (in the UK)
+ */
+void GregorianDay(struct rtc_time * tm)
+{
+ int leapsToDate;
+ int lastYear;
+ int day;
+ int MonthOffset[] = { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 };
+
+ lastYear = tm->tm_year - 1;
+
+ /*
+ * Number of leap corrections to apply up to end of last year
+ */
+ leapsToDate = lastYear / 4 - lastYear / 100 + lastYear / 400;
+
+ /*
+ * This year is a leap year if it is divisible by 4 except when it is
+ * divisible by 100 unless it is divisible by 400
+ *
+ * e.g. 1904 was a leap year, 1900 was not, 1996 is, and 2000 was
+ */
+ day = tm->tm_mon > 2 && leapyear(tm->tm_year);
+
+ day += lastYear*365 + leapsToDate + MonthOffset[tm->tm_mon-1] +
+ tm->tm_mday;
+
+ tm->tm_wday = day % 7;
+}
+
+void to_tm(int tim, struct rtc_time * tm)
+{
+ register int i;
+ register long hms, day;
+
+ day = tim / SECDAY;
+ hms = tim % SECDAY;
+
+ /* Hours, minutes, seconds are easy */
+ tm->tm_hour = hms / 3600;
+ tm->tm_min = (hms % 3600) / 60;
+ tm->tm_sec = (hms % 3600) % 60;
+
+ /* Number of years in days */
+ for (i = STARTOFTIME; day >= days_in_year(i); i++)
+ day -= days_in_year(i);
+ tm->tm_year = i;
+
+ /* Number of months in days left */
+ if (leapyear(tm->tm_year))
+ days_in_month(FEBRUARY) = 29;
+ for (i = 1; day >= days_in_month(i); i++)
+ day -= days_in_month(i);
+ days_in_month(FEBRUARY) = 28;
+ tm->tm_mon = i;
+
+ /* Days are what is left over (+1) from all that. */
+ tm->tm_mday = day + 1;
+
+ /*
+ * Determine the day of week
+ */
+ GregorianDay(tm);
+}
+
+/*
+ * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit
+ * result.
+ */
+void div128_by_32(u64 dividend_high, u64 dividend_low,
+ unsigned divisor, struct div_result *dr)
+{
+ unsigned long a, b, c, d;
+ unsigned long w, x, y, z;
+ u64 ra, rb, rc;
+
+ a = dividend_high >> 32;
+ b = dividend_high & 0xffffffff;
+ c = dividend_low >> 32;
+ d = dividend_low & 0xffffffff;
+
+ w = a / divisor;
+ ra = ((u64)(a - (w * divisor)) << 32) + b;
+
+ rb = ((u64) do_div(ra, divisor) << 32) + c;
+ x = ra;
+
+ rc = ((u64) do_div(rb, divisor) << 32) + d;
+ y = rb;
+
+ do_div(rc, divisor);
+ z = rc;
+
+ dr->result_high = ((u64)w << 32) + x;
+ dr->result_low = ((u64)y << 32) + z;
+
+}
+
+/* We don't need to calibrate delay, we use the CPU timebase for that */
+void calibrate_delay(void)
+{
+ /* Some generic code (such as spinlock debug) use loops_per_jiffy
+ * as the number of __delay(1) in a jiffy, so make it so
+ */
+ loops_per_jiffy = tb_ticks_per_jiffy;
+}
+
+static int __init rtc_init(void)
+{
+ struct platform_device *pdev;
+
+ if (!ppc_md.get_rtc_time)
+ return -ENODEV;
+
+ pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
+ if (IS_ERR(pdev))
+ return PTR_ERR(pdev);
+
+ return 0;
+}
+
+module_init(rtc_init);
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
new file mode 100644
index 00000000..15897234
--- /dev/null
+++ b/arch/powerpc/kernel/traps.c
@@ -0,0 +1,1627 @@
+/*
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ * Copyright 2007-2010 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Modified by Cort Dougan (cort@cs.nmt.edu)
+ * and Paul Mackerras (paulus@samba.org)
+ */
+
+/*
+ * This file handles the architecture-dependent parts of hardware exceptions
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/prctl.h>
+#include <linux/delay.h>
+#include <linux/kprobes.h>
+#include <linux/kexec.h>
+#include <linux/backlight.h>
+#include <linux/bug.h>
+#include <linux/kdebug.h>
+#include <linux/debugfs.h>
+#include <linux/ratelimit.h>
+
+#include <asm/emulated_ops.h>
+#include <asm/pgtable.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/rtas.h>
+#include <asm/pmc.h>
+#ifdef CONFIG_PPC32
+#include <asm/reg.h>
+#endif
+#ifdef CONFIG_PMAC_BACKLIGHT
+#include <asm/backlight.h>
+#endif
+#ifdef CONFIG_PPC64
+#include <asm/firmware.h>
+#include <asm/processor.h>
+#endif
+#include <asm/kexec.h>
+#include <asm/ppc-opcode.h>
+#include <asm/rio.h>
+#include <asm/fadump.h>
+#include <asm/switch_to.h>
+#include <asm/debug.h>
+
+#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
+int (*__debugger)(struct pt_regs *regs) __read_mostly;
+int (*__debugger_ipi)(struct pt_regs *regs) __read_mostly;
+int (*__debugger_bpt)(struct pt_regs *regs) __read_mostly;
+int (*__debugger_sstep)(struct pt_regs *regs) __read_mostly;
+int (*__debugger_iabr_match)(struct pt_regs *regs) __read_mostly;
+int (*__debugger_dabr_match)(struct pt_regs *regs) __read_mostly;
+int (*__debugger_fault_handler)(struct pt_regs *regs) __read_mostly;
+
+EXPORT_SYMBOL(__debugger);
+EXPORT_SYMBOL(__debugger_ipi);
+EXPORT_SYMBOL(__debugger_bpt);
+EXPORT_SYMBOL(__debugger_sstep);
+EXPORT_SYMBOL(__debugger_iabr_match);
+EXPORT_SYMBOL(__debugger_dabr_match);
+EXPORT_SYMBOL(__debugger_fault_handler);
+#endif
+
+/*
+ * Trap & Exception support
+ */
+
+#ifdef CONFIG_PMAC_BACKLIGHT
+static void pmac_backlight_unblank(void)
+{
+ mutex_lock(&pmac_backlight_mutex);
+ if (pmac_backlight) {
+ struct backlight_properties *props;
+
+ props = &pmac_backlight->props;
+ props->brightness = props->max_brightness;
+ props->power = FB_BLANK_UNBLANK;
+ backlight_update_status(pmac_backlight);
+ }
+ mutex_unlock(&pmac_backlight_mutex);
+}
+#else
+static inline void pmac_backlight_unblank(void) { }
+#endif
+
+static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+static int die_owner = -1;
+static unsigned int die_nest_count;
+static int die_counter;
+
+static unsigned __kprobes long oops_begin(struct pt_regs *regs)
+{
+ int cpu;
+ unsigned long flags;
+
+ if (debugger(regs))
+ return 1;
+
+ oops_enter();
+
+ /* racy, but better than risking deadlock. */
+ raw_local_irq_save(flags);
+ cpu = smp_processor_id();
+ if (!arch_spin_trylock(&die_lock)) {
+ if (cpu == die_owner)
+ /* nested oops. should stop eventually */;
+ else
+ arch_spin_lock(&die_lock);
+ }
+ die_nest_count++;
+ die_owner = cpu;
+ console_verbose();
+ bust_spinlocks(1);
+ if (machine_is(powermac))
+ pmac_backlight_unblank();
+ return flags;
+}
+
+static void __kprobes oops_end(unsigned long flags, struct pt_regs *regs,
+ int signr)
+{
+ bust_spinlocks(0);
+ die_owner = -1;
+ add_taint(TAINT_DIE);
+ die_nest_count--;
+ oops_exit();
+ printk("\n");
+ if (!die_nest_count)
+ /* Nest count reaches zero, release the lock. */
+ arch_spin_unlock(&die_lock);
+ raw_local_irq_restore(flags);
+
+ crash_fadump(regs, "die oops");
+
+ /*
+ * A system reset (0x100) is a request to dump, so we always send
+ * it through the crashdump code.
+ */
+ if (kexec_should_crash(current) || (TRAP(regs) == 0x100)) {
+ crash_kexec(regs);
+
+ /*
+ * We aren't the primary crash CPU. We need to send it
+ * to a holding pattern to avoid it ending up in the panic
+ * code.
+ */
+ crash_kexec_secondary(regs);
+ }
+
+ if (!signr)
+ return;
+
+ /*
+ * While our oops output is serialised by a spinlock, output
+ * from panic() called below can race and corrupt it. If we
+ * know we are going to panic, delay for 1 second so we have a
+ * chance to get clean backtraces from all CPUs that are oopsing.
+ */
+ if (in_interrupt() || panic_on_oops || !current->pid ||
+ is_global_init(current)) {
+ mdelay(MSEC_PER_SEC);
+ }
+
+ if (in_interrupt())
+ panic("Fatal exception in interrupt");
+ if (panic_on_oops)
+ panic("Fatal exception");
+ do_exit(signr);
+}
+
+static int __kprobes __die(const char *str, struct pt_regs *regs, long err)
+{
+ printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
+#ifdef CONFIG_PREEMPT
+ printk("PREEMPT ");
+#endif
+#ifdef CONFIG_SMP
+ printk("SMP NR_CPUS=%d ", NR_CPUS);
+#endif
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ printk("DEBUG_PAGEALLOC ");
+#endif
+#ifdef CONFIG_NUMA
+ printk("NUMA ");
+#endif
+ printk("%s\n", ppc_md.name ? ppc_md.name : "");
+
+ if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV) == NOTIFY_STOP)
+ return 1;
+
+ print_modules();
+ show_regs(regs);
+
+ return 0;
+}
+
+void die(const char *str, struct pt_regs *regs, long err)
+{
+ unsigned long flags = oops_begin(regs);
+
+ if (__die(str, regs, err))
+ err = 0;
+ oops_end(flags, regs, err);
+}
+
+void user_single_step_siginfo(struct task_struct *tsk,
+ struct pt_regs *regs, siginfo_t *info)
+{
+ memset(info, 0, sizeof(*info));
+ info->si_signo = SIGTRAP;
+ info->si_code = TRAP_TRACE;
+ info->si_addr = (void __user *)regs->nip;
+}
+
+void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
+{
+ siginfo_t info;
+ const char fmt32[] = KERN_INFO "%s[%d]: unhandled signal %d " \
+ "at %08lx nip %08lx lr %08lx code %x\n";
+ const char fmt64[] = KERN_INFO "%s[%d]: unhandled signal %d " \
+ "at %016lx nip %016lx lr %016lx code %x\n";
+
+ if (!user_mode(regs)) {
+ die("Exception in kernel mode", regs, signr);
+ return;
+ }
+
+ if (show_unhandled_signals && unhandled_signal(current, signr)) {
+ printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32,
+ current->comm, current->pid, signr,
+ addr, regs->nip, regs->link, code);
+ }
+
+ if (arch_irqs_disabled() && !arch_irq_disabled_regs(regs))
+ local_irq_enable();
+
+ memset(&info, 0, sizeof(info));
+ info.si_signo = signr;
+ info.si_code = code;
+ info.si_addr = (void __user *) addr;
+ force_sig_info(signr, &info, current);
+}
+
+#ifdef CONFIG_PPC64
+void system_reset_exception(struct pt_regs *regs)
+{
+ /* See if any machine dependent calls */
+ if (ppc_md.system_reset_exception) {
+ if (ppc_md.system_reset_exception(regs))
+ return;
+ }
+
+ die("System Reset", regs, SIGABRT);
+
+ /* Must die if the interrupt is not recoverable */
+ if (!(regs->msr & MSR_RI))
+ panic("Unrecoverable System Reset");
+
+ /* What should we do here? We could issue a shutdown or hard reset. */
+}
+#endif
+
+/*
+ * I/O accesses can cause machine checks on powermacs.
+ * Check if the NIP corresponds to the address of a sync
+ * instruction for which there is an entry in the exception
+ * table.
+ * Note that the 601 only takes a machine check on TEA
+ * (transfer error ack) signal assertion, and does not
+ * set any of the top 16 bits of SRR1.
+ * -- paulus.
+ */
+static inline int check_io_access(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC32
+ unsigned long msr = regs->msr;
+ const struct exception_table_entry *entry;
+ unsigned int *nip = (unsigned int *)regs->nip;
+
+ if (((msr & 0xffff0000) == 0 || (msr & (0x80000 | 0x40000)))
+ && (entry = search_exception_tables(regs->nip)) != NULL) {
+ /*
+ * Check that it's a sync instruction, or somewhere
+ * in the twi; isync; nop sequence that inb/inw/inl uses.
+ * As the address is in the exception table
+ * we should be able to read the instr there.
+ * For the debug message, we look at the preceding
+ * load or store.
+ */
+ if (*nip == 0x60000000) /* nop */
+ nip -= 2;
+ else if (*nip == 0x4c00012c) /* isync */
+ --nip;
+ if (*nip == 0x7c0004ac || (*nip >> 26) == 3) {
+ /* sync or twi */
+ unsigned int rb;
+
+ --nip;
+ rb = (*nip >> 11) & 0x1f;
+ printk(KERN_DEBUG "%s bad port %lx at %p\n",
+ (*nip & 0x100)? "OUT to": "IN from",
+ regs->gpr[rb] - _IO_BASE, nip);
+ regs->msr |= MSR_RI;
+ regs->nip = entry->fixup;
+ return 1;
+ }
+ }
+#endif /* CONFIG_PPC32 */
+ return 0;
+}
+
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+/* On 4xx, the reason for the machine check or program exception
+ is in the ESR. */
+#define get_reason(regs) ((regs)->dsisr)
+#ifndef CONFIG_FSL_BOOKE
+#define get_mc_reason(regs) ((regs)->dsisr)
+#else
+#define get_mc_reason(regs) (mfspr(SPRN_MCSR))
+#endif
+#define REASON_FP ESR_FP
+#define REASON_ILLEGAL (ESR_PIL | ESR_PUO)
+#define REASON_PRIVILEGED ESR_PPR
+#define REASON_TRAP ESR_PTR
+
+/* single-step stuff */
+#define single_stepping(regs) (current->thread.dbcr0 & DBCR0_IC)
+#define clear_single_step(regs) (current->thread.dbcr0 &= ~DBCR0_IC)
+
+#else
+/* On non-4xx, the reason for the machine check or program
+ exception is in the MSR. */
+#define get_reason(regs) ((regs)->msr)
+#define get_mc_reason(regs) ((regs)->msr)
+#define REASON_FP 0x100000
+#define REASON_ILLEGAL 0x80000
+#define REASON_PRIVILEGED 0x40000
+#define REASON_TRAP 0x20000
+
+#define single_stepping(regs) ((regs)->msr & MSR_SE)
+#define clear_single_step(regs) ((regs)->msr &= ~MSR_SE)
+#endif
+
+#if defined(CONFIG_4xx)
+int machine_check_4xx(struct pt_regs *regs)
+{
+ unsigned long reason = get_mc_reason(regs);
+
+ if (reason & ESR_IMCP) {
+ printk("Instruction");
+ mtspr(SPRN_ESR, reason & ~ESR_IMCP);
+ } else
+ printk("Data");
+ printk(" machine check in kernel mode.\n");
+
+ return 0;
+}
+
+int machine_check_440A(struct pt_regs *regs)
+{
+ unsigned long reason = get_mc_reason(regs);
+
+ printk("Machine check in kernel mode.\n");
+ if (reason & ESR_IMCP){
+ printk("Instruction Synchronous Machine Check exception\n");
+ mtspr(SPRN_ESR, reason & ~ESR_IMCP);
+ }
+ else {
+ u32 mcsr = mfspr(SPRN_MCSR);
+ if (mcsr & MCSR_IB)
+ printk("Instruction Read PLB Error\n");
+ if (mcsr & MCSR_DRB)
+ printk("Data Read PLB Error\n");
+ if (mcsr & MCSR_DWB)
+ printk("Data Write PLB Error\n");
+ if (mcsr & MCSR_TLBP)
+ printk("TLB Parity Error\n");
+ if (mcsr & MCSR_ICP){
+ flush_instruction_cache();
+ printk("I-Cache Parity Error\n");
+ }
+ if (mcsr & MCSR_DCSP)
+ printk("D-Cache Search Parity Error\n");
+ if (mcsr & MCSR_DCFP)
+ printk("D-Cache Flush Parity Error\n");
+ if (mcsr & MCSR_IMPE)
+ printk("Machine Check exception is imprecise\n");
+
+ /* Clear MCSR */
+ mtspr(SPRN_MCSR, mcsr);
+ }
+ return 0;
+}
+
+int machine_check_47x(struct pt_regs *regs)
+{
+ unsigned long reason = get_mc_reason(regs);
+ u32 mcsr;
+
+ printk(KERN_ERR "Machine check in kernel mode.\n");
+ if (reason & ESR_IMCP) {
+ printk(KERN_ERR
+ "Instruction Synchronous Machine Check exception\n");
+ mtspr(SPRN_ESR, reason & ~ESR_IMCP);
+ return 0;
+ }
+ mcsr = mfspr(SPRN_MCSR);
+ if (mcsr & MCSR_IB)
+ printk(KERN_ERR "Instruction Read PLB Error\n");
+ if (mcsr & MCSR_DRB)
+ printk(KERN_ERR "Data Read PLB Error\n");
+ if (mcsr & MCSR_DWB)
+ printk(KERN_ERR "Data Write PLB Error\n");
+ if (mcsr & MCSR_TLBP)
+ printk(KERN_ERR "TLB Parity Error\n");
+ if (mcsr & MCSR_ICP) {
+ flush_instruction_cache();
+ printk(KERN_ERR "I-Cache Parity Error\n");
+ }
+ if (mcsr & MCSR_DCSP)
+ printk(KERN_ERR "D-Cache Search Parity Error\n");
+ if (mcsr & PPC47x_MCSR_GPR)
+ printk(KERN_ERR "GPR Parity Error\n");
+ if (mcsr & PPC47x_MCSR_FPR)
+ printk(KERN_ERR "FPR Parity Error\n");
+ if (mcsr & PPC47x_MCSR_IPR)
+ printk(KERN_ERR "Machine Check exception is imprecise\n");
+
+ /* Clear MCSR */
+ mtspr(SPRN_MCSR, mcsr);
+
+ return 0;
+}
+#elif defined(CONFIG_E500)
+int machine_check_e500mc(struct pt_regs *regs)
+{
+ unsigned long mcsr = mfspr(SPRN_MCSR);
+ unsigned long reason = mcsr;
+ int recoverable = 1;
+
+ if (reason & MCSR_LD) {
+ recoverable = fsl_rio_mcheck_exception(regs);
+ if (recoverable == 1)
+ goto silent_out;
+ }
+
+ printk("Machine check in kernel mode.\n");
+ printk("Caused by (from MCSR=%lx): ", reason);
+
+ if (reason & MCSR_MCP)
+ printk("Machine Check Signal\n");
+
+ if (reason & MCSR_ICPERR) {
+ printk("Instruction Cache Parity Error\n");
+
+ /*
+ * This is recoverable by invalidating the i-cache.
+ */
+ mtspr(SPRN_L1CSR1, mfspr(SPRN_L1CSR1) | L1CSR1_ICFI);
+ while (mfspr(SPRN_L1CSR1) & L1CSR1_ICFI)
+ ;
+
+ /*
+ * This will generally be accompanied by an instruction
+ * fetch error report -- only treat MCSR_IF as fatal
+ * if it wasn't due to an L1 parity error.
+ */
+ reason &= ~MCSR_IF;
+ }
+
+ if (reason & MCSR_DCPERR_MC) {
+ printk("Data Cache Parity Error\n");
+
+ /*
+ * In write shadow mode we auto-recover from the error, but it
+ * may still get logged and cause a machine check. We should
+ * only treat the non-write shadow case as non-recoverable.
+ */
+ if (!(mfspr(SPRN_L1CSR2) & L1CSR2_DCWS))
+ recoverable = 0;
+ }
+
+ if (reason & MCSR_L2MMU_MHIT) {
+ printk("Hit on multiple TLB entries\n");
+ recoverable = 0;
+ }
+
+ if (reason & MCSR_NMI)
+ printk("Non-maskable interrupt\n");
+
+ if (reason & MCSR_IF) {
+ printk("Instruction Fetch Error Report\n");
+ recoverable = 0;
+ }
+
+ if (reason & MCSR_LD) {
+ printk("Load Error Report\n");
+ recoverable = 0;
+ }
+
+ if (reason & MCSR_ST) {
+ printk("Store Error Report\n");
+ recoverable = 0;
+ }
+
+ if (reason & MCSR_LDG) {
+ printk("Guarded Load Error Report\n");
+ recoverable = 0;
+ }
+
+ if (reason & MCSR_TLBSYNC)
+ printk("Simultaneous tlbsync operations\n");
+
+ if (reason & MCSR_BSL2_ERR) {
+ printk("Level 2 Cache Error\n");
+ recoverable = 0;
+ }
+
+ if (reason & MCSR_MAV) {
+ u64 addr;
+
+ addr = mfspr(SPRN_MCAR);
+ addr |= (u64)mfspr(SPRN_MCARU) << 32;
+
+ printk("Machine Check %s Address: %#llx\n",
+ reason & MCSR_MEA ? "Effective" : "Physical", addr);
+ }
+
+silent_out:
+ mtspr(SPRN_MCSR, mcsr);
+ return mfspr(SPRN_MCSR) == 0 && recoverable;
+}
+
+int machine_check_e500(struct pt_regs *regs)
+{
+ unsigned long reason = get_mc_reason(regs);
+
+ if (reason & MCSR_BUS_RBERR) {
+ if (fsl_rio_mcheck_exception(regs))
+ return 1;
+ }
+
+ printk("Machine check in kernel mode.\n");
+ printk("Caused by (from MCSR=%lx): ", reason);
+
+ if (reason & MCSR_MCP)
+ printk("Machine Check Signal\n");
+ if (reason & MCSR_ICPERR)
+ printk("Instruction Cache Parity Error\n");
+ if (reason & MCSR_DCP_PERR)
+ printk("Data Cache Push Parity Error\n");
+ if (reason & MCSR_DCPERR)
+ printk("Data Cache Parity Error\n");
+ if (reason & MCSR_BUS_IAERR)
+ printk("Bus - Instruction Address Error\n");
+ if (reason & MCSR_BUS_RAERR)
+ printk("Bus - Read Address Error\n");
+ if (reason & MCSR_BUS_WAERR)
+ printk("Bus - Write Address Error\n");
+ if (reason & MCSR_BUS_IBERR)
+ printk("Bus - Instruction Data Error\n");
+ if (reason & MCSR_BUS_RBERR)
+ printk("Bus - Read Data Bus Error\n");
+ if (reason & MCSR_BUS_WBERR)
+ printk("Bus - Read Data Bus Error\n");
+ if (reason & MCSR_BUS_IPERR)
+ printk("Bus - Instruction Parity Error\n");
+ if (reason & MCSR_BUS_RPERR)
+ printk("Bus - Read Parity Error\n");
+
+ return 0;
+}
+
+int machine_check_generic(struct pt_regs *regs)
+{
+ return 0;
+}
+#elif defined(CONFIG_E200)
+int machine_check_e200(struct pt_regs *regs)
+{
+ unsigned long reason = get_mc_reason(regs);
+
+ printk("Machine check in kernel mode.\n");
+ printk("Caused by (from MCSR=%lx): ", reason);
+
+ if (reason & MCSR_MCP)
+ printk("Machine Check Signal\n");
+ if (reason & MCSR_CP_PERR)
+ printk("Cache Push Parity Error\n");
+ if (reason & MCSR_CPERR)
+ printk("Cache Parity Error\n");
+ if (reason & MCSR_EXCP_ERR)
+ printk("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n");
+ if (reason & MCSR_BUS_IRERR)
+ printk("Bus - Read Bus Error on instruction fetch\n");
+ if (reason & MCSR_BUS_DRERR)
+ printk("Bus - Read Bus Error on data load\n");
+ if (reason & MCSR_BUS_WRERR)
+ printk("Bus - Write Bus Error on buffered store or cache line push\n");
+
+ return 0;
+}
+#else
+int machine_check_generic(struct pt_regs *regs)
+{
+ unsigned long reason = get_mc_reason(regs);
+
+ printk("Machine check in kernel mode.\n");
+ printk("Caused by (from SRR1=%lx): ", reason);
+ switch (reason & 0x601F0000) {
+ case 0x80000:
+ printk("Machine check signal\n");
+ break;
+ case 0: /* for 601 */
+ case 0x40000:
+ case 0x140000: /* 7450 MSS error and TEA */
+ printk("Transfer error ack signal\n");
+ break;
+ case 0x20000:
+ printk("Data parity error signal\n");
+ break;
+ case 0x10000:
+ printk("Address parity error signal\n");
+ break;
+ case 0x20000000:
+ printk("L1 Data Cache error\n");
+ break;
+ case 0x40000000:
+ printk("L1 Instruction Cache error\n");
+ break;
+ case 0x00100000:
+ printk("L2 data cache parity error\n");
+ break;
+ default:
+ printk("Unknown values in msr\n");
+ }
+ return 0;
+}
+#endif /* everything else */
+
+void machine_check_exception(struct pt_regs *regs)
+{
+ int recover = 0;
+
+ __get_cpu_var(irq_stat).mce_exceptions++;
+
+ /* See if any machine dependent calls. In theory, we would want
+ * to call the CPU first, and call the ppc_md. one if the CPU
+ * one returns a positive number. However there is existing code
+ * that assumes the board gets a first chance, so let's keep it
+ * that way for now and fix things later. --BenH.
+ */
+ if (ppc_md.machine_check_exception)
+ recover = ppc_md.machine_check_exception(regs);
+ else if (cur_cpu_spec->machine_check)
+ recover = cur_cpu_spec->machine_check(regs);
+
+ if (recover > 0)
+ return;
+
+#if defined(CONFIG_8xx) && defined(CONFIG_PCI)
+ /* the qspan pci read routines can cause machine checks -- Cort
+ *
+ * yuck !!! that totally needs to go away ! There are better ways
+ * to deal with that than having a wart in the mcheck handler.
+ * -- BenH
+ */
+ bad_page_fault(regs, regs->dar, SIGBUS);
+ return;
+#endif
+
+ if (debugger_fault_handler(regs))
+ return;
+
+ if (check_io_access(regs))
+ return;
+
+ die("Machine check", regs, SIGBUS);
+
+ /* Must die if the interrupt is not recoverable */
+ if (!(regs->msr & MSR_RI))
+ panic("Unrecoverable Machine check");
+}
+
+void SMIException(struct pt_regs *regs)
+{
+ die("System Management Interrupt", regs, SIGABRT);
+}
+
+void unknown_exception(struct pt_regs *regs)
+{
+ printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
+ regs->nip, regs->msr, regs->trap);
+
+ _exception(SIGTRAP, regs, 0, 0);
+}
+
+void instruction_breakpoint_exception(struct pt_regs *regs)
+{
+ if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5,
+ 5, SIGTRAP) == NOTIFY_STOP)
+ return;
+ if (debugger_iabr_match(regs))
+ return;
+ _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
+}
+
+void RunModeException(struct pt_regs *regs)
+{
+ _exception(SIGTRAP, regs, 0, 0);
+}
+
+void __kprobes single_step_exception(struct pt_regs *regs)
+{
+ clear_single_step(regs);
+
+ if (notify_die(DIE_SSTEP, "single_step", regs, 5,
+ 5, SIGTRAP) == NOTIFY_STOP)
+ return;
+ if (debugger_sstep(regs))
+ return;
+
+ _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
+}
+
+/*
+ * After we have successfully emulated an instruction, we have to
+ * check if the instruction was being single-stepped, and if so,
+ * pretend we got a single-step exception. This was pointed out
+ * by Kumar Gala. -- paulus
+ */
+static void emulate_single_step(struct pt_regs *regs)
+{
+ if (single_stepping(regs))
+ single_step_exception(regs);
+}
+
+static inline int __parse_fpscr(unsigned long fpscr)
+{
+ int ret = 0;
+
+ /* Invalid operation */
+ if ((fpscr & FPSCR_VE) && (fpscr & FPSCR_VX))
+ ret = FPE_FLTINV;
+
+ /* Overflow */
+ else if ((fpscr & FPSCR_OE) && (fpscr & FPSCR_OX))
+ ret = FPE_FLTOVF;
+
+ /* Underflow */
+ else if ((fpscr & FPSCR_UE) && (fpscr & FPSCR_UX))
+ ret = FPE_FLTUND;
+
+ /* Divide by zero */
+ else if ((fpscr & FPSCR_ZE) && (fpscr & FPSCR_ZX))
+ ret = FPE_FLTDIV;
+
+ /* Inexact result */
+ else if ((fpscr & FPSCR_XE) && (fpscr & FPSCR_XX))
+ ret = FPE_FLTRES;
+
+ return ret;
+}
+
+static void parse_fpe(struct pt_regs *regs)
+{
+ int code = 0;
+
+ flush_fp_to_thread(current);
+
+ code = __parse_fpscr(current->thread.fpscr.val);
+
+ _exception(SIGFPE, regs, code, regs->nip);
+}
+
+/*
+ * Illegal instruction emulation support. Originally written to
+ * provide the PVR to user applications using the mfspr rd, PVR.
+ * Return non-zero if we can't emulate, or -EFAULT if the associated
+ * memory access caused an access fault. Return zero on success.
+ *
+ * There are a couple of ways to do this, either "decode" the instruction
+ * or directly match lots of bits. In this case, matching lots of
+ * bits is faster and easier.
+ *
+ */
+static int emulate_string_inst(struct pt_regs *regs, u32 instword)
+{
+ u8 rT = (instword >> 21) & 0x1f;
+ u8 rA = (instword >> 16) & 0x1f;
+ u8 NB_RB = (instword >> 11) & 0x1f;
+ u32 num_bytes;
+ unsigned long EA;
+ int pos = 0;
+
+ /* Early out if we are an invalid form of lswx */
+ if ((instword & PPC_INST_STRING_MASK) == PPC_INST_LSWX)
+ if ((rT == rA) || (rT == NB_RB))
+ return -EINVAL;
+
+ EA = (rA == 0) ? 0 : regs->gpr[rA];
+
+ switch (instword & PPC_INST_STRING_MASK) {
+ case PPC_INST_LSWX:
+ case PPC_INST_STSWX:
+ EA += NB_RB;
+ num_bytes = regs->xer & 0x7f;
+ break;
+ case PPC_INST_LSWI:
+ case PPC_INST_STSWI:
+ num_bytes = (NB_RB == 0) ? 32 : NB_RB;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ while (num_bytes != 0)
+ {
+ u8 val;
+ u32 shift = 8 * (3 - (pos & 0x3));
+
+ switch ((instword & PPC_INST_STRING_MASK)) {
+ case PPC_INST_LSWX:
+ case PPC_INST_LSWI:
+ if (get_user(val, (u8 __user *)EA))
+ return -EFAULT;
+ /* first time updating this reg,
+ * zero it out */
+ if (pos == 0)
+ regs->gpr[rT] = 0;
+ regs->gpr[rT] |= val << shift;
+ break;
+ case PPC_INST_STSWI:
+ case PPC_INST_STSWX:
+ val = regs->gpr[rT] >> shift;
+ if (put_user(val, (u8 __user *)EA))
+ return -EFAULT;
+ break;
+ }
+ /* move EA to next address */
+ EA += 1;
+ num_bytes--;
+
+ /* manage our position within the register */
+ if (++pos == 4) {
+ pos = 0;
+ if (++rT == 32)
+ rT = 0;
+ }
+ }
+
+ return 0;
+}
+
+static int emulate_popcntb_inst(struct pt_regs *regs, u32 instword)
+{
+ u32 ra,rs;
+ unsigned long tmp;
+
+ ra = (instword >> 16) & 0x1f;
+ rs = (instword >> 21) & 0x1f;
+
+ tmp = regs->gpr[rs];
+ tmp = tmp - ((tmp >> 1) & 0x5555555555555555ULL);
+ tmp = (tmp & 0x3333333333333333ULL) + ((tmp >> 2) & 0x3333333333333333ULL);
+ tmp = (tmp + (tmp >> 4)) & 0x0f0f0f0f0f0f0f0fULL;
+ regs->gpr[ra] = tmp;
+
+ return 0;
+}
+
+static int emulate_isel(struct pt_regs *regs, u32 instword)
+{
+ u8 rT = (instword >> 21) & 0x1f;
+ u8 rA = (instword >> 16) & 0x1f;
+ u8 rB = (instword >> 11) & 0x1f;
+ u8 BC = (instword >> 6) & 0x1f;
+ u8 bit;
+ unsigned long tmp;
+
+ tmp = (rA == 0) ? 0 : regs->gpr[rA];
+ bit = (regs->ccr >> (31 - BC)) & 0x1;
+
+ regs->gpr[rT] = bit ? tmp : regs->gpr[rB];
+
+ return 0;
+}
+
+static int emulate_instruction(struct pt_regs *regs)
+{
+ u32 instword;
+ u32 rd;
+
+ if (!user_mode(regs) || (regs->msr & MSR_LE))
+ return -EINVAL;
+ CHECK_FULL_REGS(regs);
+
+ if (get_user(instword, (u32 __user *)(regs->nip)))
+ return -EFAULT;
+
+ /* Emulate the mfspr rD, PVR. */
+ if ((instword & PPC_INST_MFSPR_PVR_MASK) == PPC_INST_MFSPR_PVR) {
+ PPC_WARN_EMULATED(mfpvr, regs);
+ rd = (instword >> 21) & 0x1f;
+ regs->gpr[rd] = mfspr(SPRN_PVR);
+ return 0;
+ }
+
+ /* Emulating the dcba insn is just a no-op. */
+ if ((instword & PPC_INST_DCBA_MASK) == PPC_INST_DCBA) {
+ PPC_WARN_EMULATED(dcba, regs);
+ return 0;
+ }
+
+ /* Emulate the mcrxr insn. */
+ if ((instword & PPC_INST_MCRXR_MASK) == PPC_INST_MCRXR) {
+ int shift = (instword >> 21) & 0x1c;
+ unsigned long msk = 0xf0000000UL >> shift;
+
+ PPC_WARN_EMULATED(mcrxr, regs);
+ regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk);
+ regs->xer &= ~0xf0000000UL;
+ return 0;
+ }
+
+ /* Emulate load/store string insn. */
+ if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) {
+ PPC_WARN_EMULATED(string, regs);
+ return emulate_string_inst(regs, instword);
+ }
+
+ /* Emulate the popcntb (Population Count Bytes) instruction. */
+ if ((instword & PPC_INST_POPCNTB_MASK) == PPC_INST_POPCNTB) {
+ PPC_WARN_EMULATED(popcntb, regs);
+ return emulate_popcntb_inst(regs, instword);
+ }
+
+ /* Emulate isel (Integer Select) instruction */
+ if ((instword & PPC_INST_ISEL_MASK) == PPC_INST_ISEL) {
+ PPC_WARN_EMULATED(isel, regs);
+ return emulate_isel(regs, instword);
+ }
+
+#ifdef CONFIG_PPC64
+ /* Emulate the mfspr rD, DSCR. */
+ if (((instword & PPC_INST_MFSPR_DSCR_MASK) == PPC_INST_MFSPR_DSCR) &&
+ cpu_has_feature(CPU_FTR_DSCR)) {
+ PPC_WARN_EMULATED(mfdscr, regs);
+ rd = (instword >> 21) & 0x1f;
+ regs->gpr[rd] = mfspr(SPRN_DSCR);
+ return 0;
+ }
+ /* Emulate the mtspr DSCR, rD. */
+ if (((instword & PPC_INST_MTSPR_DSCR_MASK) == PPC_INST_MTSPR_DSCR) &&
+ cpu_has_feature(CPU_FTR_DSCR)) {
+ PPC_WARN_EMULATED(mtdscr, regs);
+ rd = (instword >> 21) & 0x1f;
+ mtspr(SPRN_DSCR, regs->gpr[rd]);
+ current->thread.dscr_inherit = 1;
+ return 0;
+ }
+#endif
+
+ return -EINVAL;
+}
+
+int is_valid_bugaddr(unsigned long addr)
+{
+ return is_kernel_addr(addr);
+}
+
+void __kprobes program_check_exception(struct pt_regs *regs)
+{
+ unsigned int reason = get_reason(regs);
+ extern int do_mathemu(struct pt_regs *regs);
+
+ /* We can now get here via a FP Unavailable exception if the core
+ * has no FPU, in that case the reason flags will be 0 */
+
+ if (reason & REASON_FP) {
+ /* IEEE FP exception */
+ parse_fpe(regs);
+ return;
+ }
+ if (reason & REASON_TRAP) {
+ /* Debugger is first in line to stop recursive faults in
+ * rcu_lock, notify_die, or atomic_notifier_call_chain */
+ if (debugger_bpt(regs))
+ return;
+
+ /* trap exception */
+ if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP)
+ == NOTIFY_STOP)
+ return;
+
+ if (!(regs->msr & MSR_PR) && /* not user-mode */
+ report_bug(regs->nip, regs) == BUG_TRAP_TYPE_WARN) {
+ regs->nip += 4;
+ return;
+ }
+ _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
+ return;
+ }
+
+ /* We restore the interrupt state now */
+ if (!arch_irq_disabled_regs(regs))
+ local_irq_enable();
+
+#ifdef CONFIG_MATH_EMULATION
+ /* (reason & REASON_ILLEGAL) would be the obvious thing here,
+ * but there seems to be a hardware bug on the 405GP (RevD)
+ * that means ESR is sometimes set incorrectly - either to
+ * ESR_DST (!?) or 0. In the process of chasing this with the
+ * hardware people - not sure if it can happen on any illegal
+ * instruction or only on FP instructions, whether there is a
+ * pattern to occurrences etc. -dgibson 31/Mar/2003 */
+ switch (do_mathemu(regs)) {
+ case 0:
+ emulate_single_step(regs);
+ return;
+ case 1: {
+ int code = 0;
+ code = __parse_fpscr(current->thread.fpscr.val);
+ _exception(SIGFPE, regs, code, regs->nip);
+ return;
+ }
+ case -EFAULT:
+ _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
+ return;
+ }
+ /* fall through on any other errors */
+#endif /* CONFIG_MATH_EMULATION */
+
+ /* Try to emulate it if we should. */
+ if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) {
+ switch (emulate_instruction(regs)) {
+ case 0:
+ regs->nip += 4;
+ emulate_single_step(regs);
+ return;
+ case -EFAULT:
+ _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
+ return;
+ }
+ }
+
+ if (reason & REASON_PRIVILEGED)
+ _exception(SIGILL, regs, ILL_PRVOPC, regs->nip);
+ else
+ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+}
+
+void alignment_exception(struct pt_regs *regs)
+{
+ int sig, code, fixed = 0;
+
+ /* We restore the interrupt state now */
+ if (!arch_irq_disabled_regs(regs))
+ local_irq_enable();
+
+ /* we don't implement logging of alignment exceptions */
+ if (!(current->thread.align_ctl & PR_UNALIGN_SIGBUS))
+ fixed = fix_alignment(regs);
+
+ if (fixed == 1) {
+ regs->nip += 4; /* skip over emulated instruction */
+ emulate_single_step(regs);
+ return;
+ }
+
+ /* Operand address was bad */
+ if (fixed == -EFAULT) {
+ sig = SIGSEGV;
+ code = SEGV_ACCERR;
+ } else {
+ sig = SIGBUS;
+ code = BUS_ADRALN;
+ }
+ if (user_mode(regs))
+ _exception(sig, regs, code, regs->dar);
+ else
+ bad_page_fault(regs, regs->dar, sig);
+}
+
+void StackOverflow(struct pt_regs *regs)
+{
+ printk(KERN_CRIT "Kernel stack overflow in process %p, r1=%lx\n",
+ current, regs->gpr[1]);
+ debugger(regs);
+ show_regs(regs);
+ panic("kernel stack overflow");
+}
+
+void nonrecoverable_exception(struct pt_regs *regs)
+{
+ printk(KERN_ERR "Non-recoverable exception at PC=%lx MSR=%lx\n",
+ regs->nip, regs->msr);
+ debugger(regs);
+ die("nonrecoverable exception", regs, SIGKILL);
+}
+
+void trace_syscall(struct pt_regs *regs)
+{
+ printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld %s\n",
+ current, task_pid_nr(current), regs->nip, regs->link, regs->gpr[0],
+ regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted());
+}
+
+void kernel_fp_unavailable_exception(struct pt_regs *regs)
+{
+ printk(KERN_EMERG "Unrecoverable FP Unavailable Exception "
+ "%lx at %lx\n", regs->trap, regs->nip);
+ die("Unrecoverable FP Unavailable Exception", regs, SIGABRT);
+}
+
+void altivec_unavailable_exception(struct pt_regs *regs)
+{
+ if (user_mode(regs)) {
+ /* A user program has executed an altivec instruction,
+ but this kernel doesn't support altivec. */
+ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+ return;
+ }
+
+ printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception "
+ "%lx at %lx\n", regs->trap, regs->nip);
+ die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT);
+}
+
+void vsx_unavailable_exception(struct pt_regs *regs)
+{
+ if (user_mode(regs)) {
+ /* A user program has executed an vsx instruction,
+ but this kernel doesn't support vsx. */
+ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+ return;
+ }
+
+ printk(KERN_EMERG "Unrecoverable VSX Unavailable Exception "
+ "%lx at %lx\n", regs->trap, regs->nip);
+ die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT);
+}
+
+void performance_monitor_exception(struct pt_regs *regs)
+{
+ __get_cpu_var(irq_stat).pmu_irqs++;
+
+ perf_irq(regs);
+}
+
+#ifdef CONFIG_8xx
+void SoftwareEmulation(struct pt_regs *regs)
+{
+ extern int do_mathemu(struct pt_regs *);
+ extern int Soft_emulate_8xx(struct pt_regs *);
+#if defined(CONFIG_MATH_EMULATION) || defined(CONFIG_8XX_MINIMAL_FPEMU)
+ int errcode;
+#endif
+
+ CHECK_FULL_REGS(regs);
+
+ if (!user_mode(regs)) {
+ debugger(regs);
+ die("Kernel Mode Software FPU Emulation", regs, SIGFPE);
+ }
+
+#ifdef CONFIG_MATH_EMULATION
+ errcode = do_mathemu(regs);
+ if (errcode >= 0)
+ PPC_WARN_EMULATED(math, regs);
+
+ switch (errcode) {
+ case 0:
+ emulate_single_step(regs);
+ return;
+ case 1: {
+ int code = 0;
+ code = __parse_fpscr(current->thread.fpscr.val);
+ _exception(SIGFPE, regs, code, regs->nip);
+ return;
+ }
+ case -EFAULT:
+ _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
+ return;
+ default:
+ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+ return;
+ }
+
+#elif defined(CONFIG_8XX_MINIMAL_FPEMU)
+ errcode = Soft_emulate_8xx(regs);
+ if (errcode >= 0)
+ PPC_WARN_EMULATED(8xx, regs);
+
+ switch (errcode) {
+ case 0:
+ emulate_single_step(regs);
+ return;
+ case 1:
+ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+ return;
+ case -EFAULT:
+ _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
+ return;
+ }
+#else
+ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+#endif
+}
+#endif /* CONFIG_8xx */
+
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
+{
+ int changed = 0;
+ /*
+ * Determine the cause of the debug event, clear the
+ * event flags and send a trap to the handler. Torez
+ */
+ if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) {
+ dbcr_dac(current) &= ~(DBCR_DAC1R | DBCR_DAC1W);
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+ current->thread.dbcr2 &= ~DBCR2_DAC12MODE;
+#endif
+ do_send_trap(regs, mfspr(SPRN_DAC1), debug_status, TRAP_HWBKPT,
+ 5);
+ changed |= 0x01;
+ } else if (debug_status & (DBSR_DAC2R | DBSR_DAC2W)) {
+ dbcr_dac(current) &= ~(DBCR_DAC2R | DBCR_DAC2W);
+ do_send_trap(regs, mfspr(SPRN_DAC2), debug_status, TRAP_HWBKPT,
+ 6);
+ changed |= 0x01;
+ } else if (debug_status & DBSR_IAC1) {
+ current->thread.dbcr0 &= ~DBCR0_IAC1;
+ dbcr_iac_range(current) &= ~DBCR_IAC12MODE;
+ do_send_trap(regs, mfspr(SPRN_IAC1), debug_status, TRAP_HWBKPT,
+ 1);
+ changed |= 0x01;
+ } else if (debug_status & DBSR_IAC2) {
+ current->thread.dbcr0 &= ~DBCR0_IAC2;
+ do_send_trap(regs, mfspr(SPRN_IAC2), debug_status, TRAP_HWBKPT,
+ 2);
+ changed |= 0x01;
+ } else if (debug_status & DBSR_IAC3) {
+ current->thread.dbcr0 &= ~DBCR0_IAC3;
+ dbcr_iac_range(current) &= ~DBCR_IAC34MODE;
+ do_send_trap(regs, mfspr(SPRN_IAC3), debug_status, TRAP_HWBKPT,
+ 3);
+ changed |= 0x01;
+ } else if (debug_status & DBSR_IAC4) {
+ current->thread.dbcr0 &= ~DBCR0_IAC4;
+ do_send_trap(regs, mfspr(SPRN_IAC4), debug_status, TRAP_HWBKPT,
+ 4);
+ changed |= 0x01;
+ }
+ /*
+ * At the point this routine was called, the MSR(DE) was turned off.
+ * Check all other debug flags and see if that bit needs to be turned
+ * back on or not.
+ */
+ if (DBCR_ACTIVE_EVENTS(current->thread.dbcr0, current->thread.dbcr1))
+ regs->msr |= MSR_DE;
+ else
+ /* Make sure the IDM flag is off */
+ current->thread.dbcr0 &= ~DBCR0_IDM;
+
+ if (changed & 0x01)
+ mtspr(SPRN_DBCR0, current->thread.dbcr0);
+}
+
+void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)
+{
+ current->thread.dbsr = debug_status;
+
+ /* Hack alert: On BookE, Branch Taken stops on the branch itself, while
+ * on server, it stops on the target of the branch. In order to simulate
+ * the server behaviour, we thus restart right away with a single step
+ * instead of stopping here when hitting a BT
+ */
+ if (debug_status & DBSR_BT) {
+ regs->msr &= ~MSR_DE;
+
+ /* Disable BT */
+ mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_BT);
+ /* Clear the BT event */
+ mtspr(SPRN_DBSR, DBSR_BT);
+
+ /* Do the single step trick only when coming from userspace */
+ if (user_mode(regs)) {
+ current->thread.dbcr0 &= ~DBCR0_BT;
+ current->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC;
+ regs->msr |= MSR_DE;
+ return;
+ }
+
+ if (notify_die(DIE_SSTEP, "block_step", regs, 5,
+ 5, SIGTRAP) == NOTIFY_STOP) {
+ return;
+ }
+ if (debugger_sstep(regs))
+ return;
+ } else if (debug_status & DBSR_IC) { /* Instruction complete */
+ regs->msr &= ~MSR_DE;
+
+ /* Disable instruction completion */
+ mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_IC);
+ /* Clear the instruction completion event */
+ mtspr(SPRN_DBSR, DBSR_IC);
+
+ if (notify_die(DIE_SSTEP, "single_step", regs, 5,
+ 5, SIGTRAP) == NOTIFY_STOP) {
+ return;
+ }
+
+ if (debugger_sstep(regs))
+ return;
+
+ if (user_mode(regs)) {
+ current->thread.dbcr0 &= ~DBCR0_IC;
+ if (DBCR_ACTIVE_EVENTS(current->thread.dbcr0,
+ current->thread.dbcr1))
+ regs->msr |= MSR_DE;
+ else
+ /* Make sure the IDM bit is off */
+ current->thread.dbcr0 &= ~DBCR0_IDM;
+ }
+
+ _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
+ } else
+ handle_debug(regs, debug_status);
+}
+#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
+
+#if !defined(CONFIG_TAU_INT)
+void TAUException(struct pt_regs *regs)
+{
+ printk("TAU trap at PC: %lx, MSR: %lx, vector=%lx %s\n",
+ regs->nip, regs->msr, regs->trap, print_tainted());
+}
+#endif /* CONFIG_INT_TAU */
+
+#ifdef CONFIG_ALTIVEC
+void altivec_assist_exception(struct pt_regs *regs)
+{
+ int err;
+
+ if (!user_mode(regs)) {
+ printk(KERN_EMERG "VMX/Altivec assist exception in kernel mode"
+ " at %lx\n", regs->nip);
+ die("Kernel VMX/Altivec assist exception", regs, SIGILL);
+ }
+
+ flush_altivec_to_thread(current);
+
+ PPC_WARN_EMULATED(altivec, regs);
+ err = emulate_altivec(regs);
+ if (err == 0) {
+ regs->nip += 4; /* skip emulated instruction */
+ emulate_single_step(regs);
+ return;
+ }
+
+ if (err == -EFAULT) {
+ /* got an error reading the instruction */
+ _exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip);
+ } else {
+ /* didn't recognize the instruction */
+ /* XXX quick hack for now: set the non-Java bit in the VSCR */
+ printk_ratelimited(KERN_ERR "Unrecognized altivec instruction "
+ "in %s at %lx\n", current->comm, regs->nip);
+ current->thread.vscr.u[3] |= 0x10000;
+ }
+}
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef CONFIG_VSX
+void vsx_assist_exception(struct pt_regs *regs)
+{
+ if (!user_mode(regs)) {
+ printk(KERN_EMERG "VSX assist exception in kernel mode"
+ " at %lx\n", regs->nip);
+ die("Kernel VSX assist exception", regs, SIGILL);
+ }
+
+ flush_vsx_to_thread(current);
+ printk(KERN_INFO "VSX assist not supported at %lx\n", regs->nip);
+ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+}
+#endif /* CONFIG_VSX */
+
+#ifdef CONFIG_FSL_BOOKE
+void CacheLockingException(struct pt_regs *regs, unsigned long address,
+ unsigned long error_code)
+{
+ /* We treat cache locking instructions from the user
+ * as priv ops, in the future we could try to do
+ * something smarter
+ */
+ if (error_code & (ESR_DLK|ESR_ILK))
+ _exception(SIGILL, regs, ILL_PRVOPC, regs->nip);
+ return;
+}
+#endif /* CONFIG_FSL_BOOKE */
+
+#ifdef CONFIG_SPE
+void SPEFloatingPointException(struct pt_regs *regs)
+{
+ extern int do_spe_mathemu(struct pt_regs *regs);
+ unsigned long spefscr;
+ int fpexc_mode;
+ int code = 0;
+ int err;
+
+ flush_spe_to_thread(current);
+
+ spefscr = current->thread.spefscr;
+ fpexc_mode = current->thread.fpexc_mode;
+
+ if ((spefscr & SPEFSCR_FOVF) && (fpexc_mode & PR_FP_EXC_OVF)) {
+ code = FPE_FLTOVF;
+ }
+ else if ((spefscr & SPEFSCR_FUNF) && (fpexc_mode & PR_FP_EXC_UND)) {
+ code = FPE_FLTUND;
+ }
+ else if ((spefscr & SPEFSCR_FDBZ) && (fpexc_mode & PR_FP_EXC_DIV))
+ code = FPE_FLTDIV;
+ else if ((spefscr & SPEFSCR_FINV) && (fpexc_mode & PR_FP_EXC_INV)) {
+ code = FPE_FLTINV;
+ }
+ else if ((spefscr & (SPEFSCR_FG | SPEFSCR_FX)) && (fpexc_mode & PR_FP_EXC_RES))
+ code = FPE_FLTRES;
+
+ err = do_spe_mathemu(regs);
+ if (err == 0) {
+ regs->nip += 4; /* skip emulated instruction */
+ emulate_single_step(regs);
+ return;
+ }
+
+ if (err == -EFAULT) {
+ /* got an error reading the instruction */
+ _exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip);
+ } else if (err == -EINVAL) {
+ /* didn't recognize the instruction */
+ printk(KERN_ERR "unrecognized spe instruction "
+ "in %s at %lx\n", current->comm, regs->nip);
+ } else {
+ _exception(SIGFPE, regs, code, regs->nip);
+ }
+
+ return;
+}
+
+void SPEFloatingPointRoundException(struct pt_regs *regs)
+{
+ extern int speround_handler(struct pt_regs *regs);
+ int err;
+
+ preempt_disable();
+ if (regs->msr & MSR_SPE)
+ giveup_spe(current);
+ preempt_enable();
+
+ regs->nip -= 4;
+ err = speround_handler(regs);
+ if (err == 0) {
+ regs->nip += 4; /* skip emulated instruction */
+ emulate_single_step(regs);
+ return;
+ }
+
+ if (err == -EFAULT) {
+ /* got an error reading the instruction */
+ _exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip);
+ } else if (err == -EINVAL) {
+ /* didn't recognize the instruction */
+ printk(KERN_ERR "unrecognized spe instruction "
+ "in %s at %lx\n", current->comm, regs->nip);
+ } else {
+ _exception(SIGFPE, regs, 0, regs->nip);
+ return;
+ }
+}
+#endif
+
+/*
+ * We enter here if we get an unrecoverable exception, that is, one
+ * that happened at a point where the RI (recoverable interrupt) bit
+ * in the MSR is 0. This indicates that SRR0/1 are live, and that
+ * we therefore lost state by taking this exception.
+ */
+void unrecoverable_exception(struct pt_regs *regs)
+{
+ printk(KERN_EMERG "Unrecoverable exception %lx at %lx\n",
+ regs->trap, regs->nip);
+ die("Unrecoverable exception", regs, SIGABRT);
+}
+
+#ifdef CONFIG_BOOKE_WDT
+/*
+ * Default handler for a Watchdog exception,
+ * spins until a reboot occurs
+ */
+void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs)
+{
+ /* Generic WatchdogHandler, implement your own */
+ mtspr(SPRN_TCR, mfspr(SPRN_TCR)&(~TCR_WIE));
+ return;
+}
+
+void WatchdogException(struct pt_regs *regs)
+{
+ printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n");
+ WatchdogHandler(regs);
+}
+#endif
+
+/*
+ * We enter here if we discover during exception entry that we are
+ * running in supervisor mode with a userspace value in the stack pointer.
+ */
+void kernel_bad_stack(struct pt_regs *regs)
+{
+ printk(KERN_EMERG "Bad kernel stack pointer %lx at %lx\n",
+ regs->gpr[1], regs->nip);
+ die("Bad kernel stack pointer", regs, SIGABRT);
+}
+
+void __init trap_init(void)
+{
+}
+
+
+#ifdef CONFIG_PPC_EMULATED_STATS
+
+#define WARN_EMULATED_SETUP(type) .type = { .name = #type }
+
+struct ppc_emulated ppc_emulated = {
+#ifdef CONFIG_ALTIVEC
+ WARN_EMULATED_SETUP(altivec),
+#endif
+ WARN_EMULATED_SETUP(dcba),
+ WARN_EMULATED_SETUP(dcbz),
+ WARN_EMULATED_SETUP(fp_pair),
+ WARN_EMULATED_SETUP(isel),
+ WARN_EMULATED_SETUP(mcrxr),
+ WARN_EMULATED_SETUP(mfpvr),
+ WARN_EMULATED_SETUP(multiple),
+ WARN_EMULATED_SETUP(popcntb),
+ WARN_EMULATED_SETUP(spe),
+ WARN_EMULATED_SETUP(string),
+ WARN_EMULATED_SETUP(unaligned),
+#ifdef CONFIG_MATH_EMULATION
+ WARN_EMULATED_SETUP(math),
+#elif defined(CONFIG_8XX_MINIMAL_FPEMU)
+ WARN_EMULATED_SETUP(8xx),
+#endif
+#ifdef CONFIG_VSX
+ WARN_EMULATED_SETUP(vsx),
+#endif
+#ifdef CONFIG_PPC64
+ WARN_EMULATED_SETUP(mfdscr),
+ WARN_EMULATED_SETUP(mtdscr),
+#endif
+};
+
+u32 ppc_warn_emulated;
+
+void ppc_warn_emulated_print(const char *type)
+{
+ pr_warn_ratelimited("%s used emulated %s instruction\n", current->comm,
+ type);
+}
+
+static int __init ppc_warn_emulated_init(void)
+{
+ struct dentry *dir, *d;
+ unsigned int i;
+ struct ppc_emulated_entry *entries = (void *)&ppc_emulated;
+
+ if (!powerpc_debugfs_root)
+ return -ENODEV;
+
+ dir = debugfs_create_dir("emulated_instructions",
+ powerpc_debugfs_root);
+ if (!dir)
+ return -ENOMEM;
+
+ d = debugfs_create_u32("do_warn", S_IRUGO | S_IWUSR, dir,
+ &ppc_warn_emulated);
+ if (!d)
+ goto fail;
+
+ for (i = 0; i < sizeof(ppc_emulated)/sizeof(*entries); i++) {
+ d = debugfs_create_u32(entries[i].name, S_IRUGO | S_IWUSR, dir,
+ (u32 *)&entries[i].val.counter);
+ if (!d)
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ debugfs_remove_recursive(dir);
+ return -ENOMEM;
+}
+
+device_initcall(ppc_warn_emulated_init);
+
+#endif /* CONFIG_PPC_EMULATED_STATS */
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
new file mode 100644
index 00000000..c39c1ca7
--- /dev/null
+++ b/arch/powerpc/kernel/udbg.c
@@ -0,0 +1,206 @@
+/*
+ * polling mode stateless debugging stuff, originally for NS16550 Serial Ports
+ *
+ * c 2001 PPC 64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <stdarg.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/console.h>
+#include <linux/init.h>
+#include <asm/processor.h>
+#include <asm/udbg.h>
+
+void (*udbg_putc)(char c);
+void (*udbg_flush)(void);
+int (*udbg_getc)(void);
+int (*udbg_getc_poll)(void);
+
+/*
+ * Early debugging facilities. You can enable _one_ of these via .config,
+ * if you do so your kernel _will not boot_ on anything else. Be careful.
+ */
+void __init udbg_early_init(void)
+{
+#if defined(CONFIG_PPC_EARLY_DEBUG_LPAR)
+ /* For LPAR machines that have an HVC console on vterm 0 */
+ udbg_init_debug_lpar();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_LPAR_HVSI)
+ /* For LPAR machines that have an HVSI console on vterm 0 */
+ udbg_init_debug_lpar_hvsi();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_G5)
+ /* For use on Apple G5 machines */
+ udbg_init_pmac_realmode();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_RTAS_PANEL)
+ /* RTAS panel debug */
+ udbg_init_rtas_panel();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_RTAS_CONSOLE)
+ /* RTAS console debug */
+ udbg_init_rtas_console();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_MAPLE)
+ /* Maple real mode debug */
+ udbg_init_maple_realmode();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_BEAT)
+ udbg_init_debug_beat();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_PAS_REALMODE)
+ udbg_init_pas_realmode();
+#elif defined(CONFIG_BOOTX_TEXT)
+ udbg_init_btext();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_44x)
+ /* PPC44x debug */
+ udbg_init_44x_as1();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_40x)
+ /* PPC40x debug */
+ udbg_init_40x_realmode();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_CPM)
+ udbg_init_cpm();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_USBGECKO)
+ udbg_init_usbgecko();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_WSP)
+ udbg_init_wsp();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_EHV_BC)
+ udbg_init_ehv_bc();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_PS3GELIC)
+ udbg_init_ps3gelic();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_OPAL_RAW)
+ udbg_init_debug_opal_raw();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_OPAL_HVSI)
+ udbg_init_debug_opal_hvsi();
+#endif
+
+#ifdef CONFIG_PPC_EARLY_DEBUG
+ console_loglevel = 10;
+
+ register_early_udbg_console();
+#endif
+}
+
+/* udbg library, used by xmon et al */
+void udbg_puts(const char *s)
+{
+ if (udbg_putc) {
+ char c;
+
+ if (s && *s != '\0') {
+ while ((c = *s++) != '\0')
+ udbg_putc(c);
+ }
+
+ if (udbg_flush)
+ udbg_flush();
+ }
+#if 0
+ else {
+ printk("%s", s);
+ }
+#endif
+}
+
+int udbg_write(const char *s, int n)
+{
+ int remain = n;
+ char c;
+
+ if (!udbg_putc)
+ return 0;
+
+ if (s && *s != '\0') {
+ while (((c = *s++) != '\0') && (remain-- > 0)) {
+ udbg_putc(c);
+ }
+ }
+
+ if (udbg_flush)
+ udbg_flush();
+
+ return n - remain;
+}
+
+int udbg_read(char *buf, int buflen)
+{
+ char *p = buf;
+ int i, c;
+
+ if (!udbg_getc)
+ return 0;
+
+ for (i = 0; i < buflen; ++i) {
+ do {
+ c = udbg_getc();
+ if (c == -1 && i == 0)
+ return -1;
+
+ } while (c == 0x11 || c == 0x13);
+ if (c == 0 || c == -1)
+ break;
+ *p++ = c;
+ }
+
+ return i;
+}
+
+#define UDBG_BUFSIZE 256
+void udbg_printf(const char *fmt, ...)
+{
+ char buf[UDBG_BUFSIZE];
+ va_list args;
+
+ va_start(args, fmt);
+ vsnprintf(buf, UDBG_BUFSIZE, fmt, args);
+ udbg_puts(buf);
+ va_end(args);
+}
+
+void __init udbg_progress(char *s, unsigned short hex)
+{
+ udbg_puts(s);
+ udbg_puts("\n");
+}
+
+/*
+ * Early boot console based on udbg
+ */
+static void udbg_console_write(struct console *con, const char *s,
+ unsigned int n)
+{
+ udbg_write(s, n);
+}
+
+static struct console udbg_console = {
+ .name = "udbg",
+ .write = udbg_console_write,
+ .flags = CON_PRINTBUFFER | CON_ENABLED | CON_BOOT | CON_ANYTIME,
+ .index = 0,
+};
+
+static int early_console_initialized;
+
+/*
+ * Called by setup_system after ppc_md->probe and ppc_md->early_init.
+ * Call it again after setting udbg_putc in ppc_md->setup_arch.
+ */
+void __init register_early_udbg_console(void)
+{
+ if (early_console_initialized)
+ return;
+
+ if (!udbg_putc)
+ return;
+
+ if (strstr(boot_command_line, "udbg-immortal")) {
+ printk(KERN_INFO "early console immortal !\n");
+ udbg_console.flags &= ~CON_BOOT;
+ }
+ early_console_initialized = 1;
+ register_console(&udbg_console);
+}
+
+#if 0 /* if you want to use this as a regular output console */
+console_initcall(register_udbg_console);
+#endif
diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c
new file mode 100644
index 00000000..6837f839
--- /dev/null
+++ b/arch/powerpc/kernel/udbg_16550.c
@@ -0,0 +1,351 @@
+/*
+ * udbg for NS16550 compatible serial ports
+ *
+ * Copyright (C) 2001-2005 PPC 64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/types.h>
+#include <asm/udbg.h>
+#include <asm/io.h>
+#include <asm/reg_a2.h>
+
+extern u8 real_readb(volatile u8 __iomem *addr);
+extern void real_writeb(u8 data, volatile u8 __iomem *addr);
+extern u8 real_205_readb(volatile u8 __iomem *addr);
+extern void real_205_writeb(u8 data, volatile u8 __iomem *addr);
+
+struct NS16550 {
+ /* this struct must be packed */
+ unsigned char rbr; /* 0 */
+ unsigned char ier; /* 1 */
+ unsigned char fcr; /* 2 */
+ unsigned char lcr; /* 3 */
+ unsigned char mcr; /* 4 */
+ unsigned char lsr; /* 5 */
+ unsigned char msr; /* 6 */
+ unsigned char scr; /* 7 */
+};
+
+#define thr rbr
+#define iir fcr
+#define dll rbr
+#define dlm ier
+#define dlab lcr
+
+#define LSR_DR 0x01 /* Data ready */
+#define LSR_OE 0x02 /* Overrun */
+#define LSR_PE 0x04 /* Parity error */
+#define LSR_FE 0x08 /* Framing error */
+#define LSR_BI 0x10 /* Break */
+#define LSR_THRE 0x20 /* Xmit holding register empty */
+#define LSR_TEMT 0x40 /* Xmitter empty */
+#define LSR_ERR 0x80 /* Error */
+
+#define LCR_DLAB 0x80
+
+static struct NS16550 __iomem *udbg_comport;
+
+static void udbg_550_flush(void)
+{
+ if (udbg_comport) {
+ while ((in_8(&udbg_comport->lsr) & LSR_THRE) == 0)
+ /* wait for idle */;
+ }
+}
+
+static void udbg_550_putc(char c)
+{
+ if (udbg_comport) {
+ if (c == '\n')
+ udbg_550_putc('\r');
+ udbg_550_flush();
+ out_8(&udbg_comport->thr, c);
+ }
+}
+
+static int udbg_550_getc_poll(void)
+{
+ if (udbg_comport) {
+ if ((in_8(&udbg_comport->lsr) & LSR_DR) != 0)
+ return in_8(&udbg_comport->rbr);
+ else
+ return -1;
+ }
+ return -1;
+}
+
+static int udbg_550_getc(void)
+{
+ if (udbg_comport) {
+ while ((in_8(&udbg_comport->lsr) & LSR_DR) == 0)
+ /* wait for char */;
+ return in_8(&udbg_comport->rbr);
+ }
+ return -1;
+}
+
+void udbg_init_uart(void __iomem *comport, unsigned int speed,
+ unsigned int clock)
+{
+ unsigned int dll, base_bauds;
+
+ if (clock == 0)
+ clock = 1843200;
+ if (speed == 0)
+ speed = 9600;
+
+ base_bauds = clock / 16;
+ dll = base_bauds / speed;
+
+ if (comport) {
+ udbg_comport = (struct NS16550 __iomem *)comport;
+ out_8(&udbg_comport->lcr, 0x00);
+ out_8(&udbg_comport->ier, 0xff);
+ out_8(&udbg_comport->ier, 0x00);
+ out_8(&udbg_comport->lcr, LCR_DLAB);
+ out_8(&udbg_comport->dll, dll & 0xff);
+ out_8(&udbg_comport->dlm, dll >> 8);
+ /* 8 data, 1 stop, no parity */
+ out_8(&udbg_comport->lcr, 0x03);
+ /* RTS/DTR */
+ out_8(&udbg_comport->mcr, 0x03);
+ /* Clear & enable FIFOs */
+ out_8(&udbg_comport->fcr ,0x07);
+ udbg_putc = udbg_550_putc;
+ udbg_flush = udbg_550_flush;
+ udbg_getc = udbg_550_getc;
+ udbg_getc_poll = udbg_550_getc_poll;
+ }
+}
+
+unsigned int udbg_probe_uart_speed(void __iomem *comport, unsigned int clock)
+{
+ unsigned int dll, dlm, divisor, prescaler, speed;
+ u8 old_lcr;
+ struct NS16550 __iomem *port = comport;
+
+ old_lcr = in_8(&port->lcr);
+
+ /* select divisor latch registers. */
+ out_8(&port->lcr, LCR_DLAB);
+
+ /* now, read the divisor */
+ dll = in_8(&port->dll);
+ dlm = in_8(&port->dlm);
+ divisor = dlm << 8 | dll;
+
+ /* check prescaling */
+ if (in_8(&port->mcr) & 0x80)
+ prescaler = 4;
+ else
+ prescaler = 1;
+
+ /* restore the LCR */
+ out_8(&port->lcr, old_lcr);
+
+ /* calculate speed */
+ speed = (clock / prescaler) / (divisor * 16);
+
+ /* sanity check */
+ if (speed > (clock / 16))
+ speed = 9600;
+
+ return speed;
+}
+
+#ifdef CONFIG_PPC_MAPLE
+void udbg_maple_real_flush(void)
+{
+ if (udbg_comport) {
+ while ((real_readb(&udbg_comport->lsr) & LSR_THRE) == 0)
+ /* wait for idle */;
+ }
+}
+
+void udbg_maple_real_putc(char c)
+{
+ if (udbg_comport) {
+ if (c == '\n')
+ udbg_maple_real_putc('\r');
+ udbg_maple_real_flush();
+ real_writeb(c, &udbg_comport->thr); eieio();
+ }
+}
+
+void __init udbg_init_maple_realmode(void)
+{
+ udbg_comport = (struct NS16550 __iomem *)0xf40003f8;
+
+ udbg_putc = udbg_maple_real_putc;
+ udbg_flush = udbg_maple_real_flush;
+ udbg_getc = NULL;
+ udbg_getc_poll = NULL;
+}
+#endif /* CONFIG_PPC_MAPLE */
+
+#ifdef CONFIG_PPC_PASEMI
+void udbg_pas_real_flush(void)
+{
+ if (udbg_comport) {
+ while ((real_205_readb(&udbg_comport->lsr) & LSR_THRE) == 0)
+ /* wait for idle */;
+ }
+}
+
+void udbg_pas_real_putc(char c)
+{
+ if (udbg_comport) {
+ if (c == '\n')
+ udbg_pas_real_putc('\r');
+ udbg_pas_real_flush();
+ real_205_writeb(c, &udbg_comport->thr); eieio();
+ }
+}
+
+void udbg_init_pas_realmode(void)
+{
+ udbg_comport = (struct NS16550 __iomem *)0xfcff03f8UL;
+
+ udbg_putc = udbg_pas_real_putc;
+ udbg_flush = udbg_pas_real_flush;
+ udbg_getc = NULL;
+ udbg_getc_poll = NULL;
+}
+#endif /* CONFIG_PPC_MAPLE */
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_44x
+#include <platforms/44x/44x.h>
+
+static void udbg_44x_as1_flush(void)
+{
+ if (udbg_comport) {
+ while ((as1_readb(&udbg_comport->lsr) & LSR_THRE) == 0)
+ /* wait for idle */;
+ }
+}
+
+static void udbg_44x_as1_putc(char c)
+{
+ if (udbg_comport) {
+ if (c == '\n')
+ udbg_44x_as1_putc('\r');
+ udbg_44x_as1_flush();
+ as1_writeb(c, &udbg_comport->thr); eieio();
+ }
+}
+
+static int udbg_44x_as1_getc(void)
+{
+ if (udbg_comport) {
+ while ((as1_readb(&udbg_comport->lsr) & LSR_DR) == 0)
+ ; /* wait for char */
+ return as1_readb(&udbg_comport->rbr);
+ }
+ return -1;
+}
+
+void __init udbg_init_44x_as1(void)
+{
+ udbg_comport =
+ (struct NS16550 __iomem *)PPC44x_EARLY_DEBUG_VIRTADDR;
+
+ udbg_putc = udbg_44x_as1_putc;
+ udbg_flush = udbg_44x_as1_flush;
+ udbg_getc = udbg_44x_as1_getc;
+}
+#endif /* CONFIG_PPC_EARLY_DEBUG_44x */
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_40x
+static void udbg_40x_real_flush(void)
+{
+ if (udbg_comport) {
+ while ((real_readb(&udbg_comport->lsr) & LSR_THRE) == 0)
+ /* wait for idle */;
+ }
+}
+
+static void udbg_40x_real_putc(char c)
+{
+ if (udbg_comport) {
+ if (c == '\n')
+ udbg_40x_real_putc('\r');
+ udbg_40x_real_flush();
+ real_writeb(c, &udbg_comport->thr); eieio();
+ }
+}
+
+static int udbg_40x_real_getc(void)
+{
+ if (udbg_comport) {
+ while ((real_readb(&udbg_comport->lsr) & LSR_DR) == 0)
+ ; /* wait for char */
+ return real_readb(&udbg_comport->rbr);
+ }
+ return -1;
+}
+
+void __init udbg_init_40x_realmode(void)
+{
+ udbg_comport = (struct NS16550 __iomem *)
+ CONFIG_PPC_EARLY_DEBUG_40x_PHYSADDR;
+
+ udbg_putc = udbg_40x_real_putc;
+ udbg_flush = udbg_40x_real_flush;
+ udbg_getc = udbg_40x_real_getc;
+ udbg_getc_poll = NULL;
+}
+#endif /* CONFIG_PPC_EARLY_DEBUG_40x */
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_WSP
+static void udbg_wsp_flush(void)
+{
+ if (udbg_comport) {
+ while ((readb(&udbg_comport->lsr) & LSR_THRE) == 0)
+ /* wait for idle */;
+ }
+}
+
+static void udbg_wsp_putc(char c)
+{
+ if (udbg_comport) {
+ if (c == '\n')
+ udbg_wsp_putc('\r');
+ udbg_wsp_flush();
+ writeb(c, &udbg_comport->thr); eieio();
+ }
+}
+
+static int udbg_wsp_getc(void)
+{
+ if (udbg_comport) {
+ while ((readb(&udbg_comport->lsr) & LSR_DR) == 0)
+ ; /* wait for char */
+ return readb(&udbg_comport->rbr);
+ }
+ return -1;
+}
+
+static int udbg_wsp_getc_poll(void)
+{
+ if (udbg_comport)
+ if (readb(&udbg_comport->lsr) & LSR_DR)
+ return readb(&udbg_comport->rbr);
+ return -1;
+}
+
+void __init udbg_init_wsp(void)
+{
+ udbg_comport = (struct NS16550 __iomem *)WSP_UART_VIRT;
+
+ udbg_init_uart(udbg_comport, 57600, 50000000);
+
+ udbg_putc = udbg_wsp_putc;
+ udbg_flush = udbg_wsp_flush;
+ udbg_getc = udbg_wsp_getc;
+ udbg_getc_poll = udbg_wsp_getc_poll;
+}
+#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
new file mode 100644
index 00000000..9eb5b9b5
--- /dev/null
+++ b/arch/powerpc/kernel/vdso.c
@@ -0,0 +1,829 @@
+
+/*
+ * Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
+ * <benh@kernel.crashing.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/elf.h>
+#include <linux/security.h>
+#include <linux/bootmem.h>
+#include <linux/memblock.h>
+
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/cputable.h>
+#include <asm/sections.h>
+#include <asm/firmware.h>
+#include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
+
+#include "setup.h"
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(fmt...) printk(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+/* Max supported size for symbol names */
+#define MAX_SYMNAME 64
+
+/* The alignment of the vDSO */
+#define VDSO_ALIGNMENT (1 << 16)
+
+extern char vdso32_start, vdso32_end;
+static void *vdso32_kbase = &vdso32_start;
+static unsigned int vdso32_pages;
+static struct page **vdso32_pagelist;
+unsigned long vdso32_sigtramp;
+unsigned long vdso32_rt_sigtramp;
+
+#ifdef CONFIG_PPC64
+extern char vdso64_start, vdso64_end;
+static void *vdso64_kbase = &vdso64_start;
+static unsigned int vdso64_pages;
+static struct page **vdso64_pagelist;
+unsigned long vdso64_rt_sigtramp;
+#endif /* CONFIG_PPC64 */
+
+static int vdso_ready;
+
+/*
+ * The vdso data page (aka. systemcfg for old ppc64 fans) is here.
+ * Once the early boot kernel code no longer needs to muck around
+ * with it, it will become dynamically allocated
+ */
+static union {
+ struct vdso_data data;
+ u8 page[PAGE_SIZE];
+} vdso_data_store __page_aligned_data;
+struct vdso_data *vdso_data = &vdso_data_store.data;
+
+/* Format of the patch table */
+struct vdso_patch_def
+{
+ unsigned long ftr_mask, ftr_value;
+ const char *gen_name;
+ const char *fix_name;
+};
+
+/* Table of functions to patch based on the CPU type/revision
+ *
+ * Currently, we only change sync_dicache to do nothing on processors
+ * with a coherent icache
+ */
+static struct vdso_patch_def vdso_patches[] = {
+ {
+ CPU_FTR_COHERENT_ICACHE, CPU_FTR_COHERENT_ICACHE,
+ "__kernel_sync_dicache", "__kernel_sync_dicache_p5"
+ },
+ {
+ CPU_FTR_USE_TB, 0,
+ "__kernel_gettimeofday", NULL
+ },
+ {
+ CPU_FTR_USE_TB, 0,
+ "__kernel_clock_gettime", NULL
+ },
+ {
+ CPU_FTR_USE_TB, 0,
+ "__kernel_clock_getres", NULL
+ },
+ {
+ CPU_FTR_USE_TB, 0,
+ "__kernel_get_tbfreq", NULL
+ },
+};
+
+/*
+ * Some infos carried around for each of them during parsing at
+ * boot time.
+ */
+struct lib32_elfinfo
+{
+ Elf32_Ehdr *hdr; /* ptr to ELF */
+ Elf32_Sym *dynsym; /* ptr to .dynsym section */
+ unsigned long dynsymsize; /* size of .dynsym section */
+ char *dynstr; /* ptr to .dynstr section */
+ unsigned long text; /* offset of .text section in .so */
+};
+
+struct lib64_elfinfo
+{
+ Elf64_Ehdr *hdr;
+ Elf64_Sym *dynsym;
+ unsigned long dynsymsize;
+ char *dynstr;
+ unsigned long text;
+};
+
+
+#ifdef __DEBUG
+static void dump_one_vdso_page(struct page *pg, struct page *upg)
+{
+ printk("kpg: %p (c:%d,f:%08lx)", __va(page_to_pfn(pg) << PAGE_SHIFT),
+ page_count(pg),
+ pg->flags);
+ if (upg && !IS_ERR(upg) /* && pg != upg*/) {
+ printk(" upg: %p (c:%d,f:%08lx)", __va(page_to_pfn(upg)
+ << PAGE_SHIFT),
+ page_count(upg),
+ upg->flags);
+ }
+ printk("\n");
+}
+
+static void dump_vdso_pages(struct vm_area_struct * vma)
+{
+ int i;
+
+ if (!vma || is_32bit_task()) {
+ printk("vDSO32 @ %016lx:\n", (unsigned long)vdso32_kbase);
+ for (i=0; i<vdso32_pages; i++) {
+ struct page *pg = virt_to_page(vdso32_kbase +
+ i*PAGE_SIZE);
+ struct page *upg = (vma && vma->vm_mm) ?
+ follow_page(vma, vma->vm_start + i*PAGE_SIZE, 0)
+ : NULL;
+ dump_one_vdso_page(pg, upg);
+ }
+ }
+ if (!vma || !is_32bit_task()) {
+ printk("vDSO64 @ %016lx:\n", (unsigned long)vdso64_kbase);
+ for (i=0; i<vdso64_pages; i++) {
+ struct page *pg = virt_to_page(vdso64_kbase +
+ i*PAGE_SIZE);
+ struct page *upg = (vma && vma->vm_mm) ?
+ follow_page(vma, vma->vm_start + i*PAGE_SIZE, 0)
+ : NULL;
+ dump_one_vdso_page(pg, upg);
+ }
+ }
+}
+#endif /* DEBUG */
+
+/*
+ * This is called from binfmt_elf, we create the special vma for the
+ * vDSO and insert it into the mm struct tree
+ */
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+ struct mm_struct *mm = current->mm;
+ struct page **vdso_pagelist;
+ unsigned long vdso_pages;
+ unsigned long vdso_base;
+ int rc;
+
+ if (!vdso_ready)
+ return 0;
+
+#ifdef CONFIG_PPC64
+ if (is_32bit_task()) {
+ vdso_pagelist = vdso32_pagelist;
+ vdso_pages = vdso32_pages;
+ vdso_base = VDSO32_MBASE;
+ } else {
+ vdso_pagelist = vdso64_pagelist;
+ vdso_pages = vdso64_pages;
+ /*
+ * On 64bit we don't have a preferred map address. This
+ * allows get_unmapped_area to find an area near other mmaps
+ * and most likely share a SLB entry.
+ */
+ vdso_base = 0;
+ }
+#else
+ vdso_pagelist = vdso32_pagelist;
+ vdso_pages = vdso32_pages;
+ vdso_base = VDSO32_MBASE;
+#endif
+
+ current->mm->context.vdso_base = 0;
+
+ /* vDSO has a problem and was disabled, just don't "enable" it for the
+ * process
+ */
+ if (vdso_pages == 0)
+ return 0;
+ /* Add a page to the vdso size for the data page */
+ vdso_pages ++;
+
+ /*
+ * pick a base address for the vDSO in process space. We try to put it
+ * at vdso_base which is the "natural" base for it, but we might fail
+ * and end up putting it elsewhere.
+ * Add enough to the size so that the result can be aligned.
+ */
+ down_write(&mm->mmap_sem);
+ vdso_base = get_unmapped_area(NULL, vdso_base,
+ (vdso_pages << PAGE_SHIFT) +
+ ((VDSO_ALIGNMENT - 1) & PAGE_MASK),
+ 0, 0);
+ if (IS_ERR_VALUE(vdso_base)) {
+ rc = vdso_base;
+ goto fail_mmapsem;
+ }
+
+ /* Add required alignment. */
+ vdso_base = ALIGN(vdso_base, VDSO_ALIGNMENT);
+
+ /*
+ * Put vDSO base into mm struct. We need to do this before calling
+ * install_special_mapping or the perf counter mmap tracking code
+ * will fail to recognise it as a vDSO (since arch_vma_name fails).
+ */
+ current->mm->context.vdso_base = vdso_base;
+
+ /*
+ * our vma flags don't have VM_WRITE so by default, the process isn't
+ * allowed to write those pages.
+ * gdb can break that with ptrace interface, and thus trigger COW on
+ * those pages but it's then your responsibility to never do that on
+ * the "data" page of the vDSO or you'll stop getting kernel updates
+ * and your nice userland gettimeofday will be totally dead.
+ * It's fine to use that for setting breakpoints in the vDSO code
+ * pages though.
+ */
+ rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
+ VM_READ|VM_EXEC|
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+ vdso_pagelist);
+ if (rc) {
+ current->mm->context.vdso_base = 0;
+ goto fail_mmapsem;
+ }
+
+ up_write(&mm->mmap_sem);
+ return 0;
+
+ fail_mmapsem:
+ up_write(&mm->mmap_sem);
+ return rc;
+}
+
+const char *arch_vma_name(struct vm_area_struct *vma)
+{
+ if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso_base)
+ return "[vdso]";
+ return NULL;
+}
+
+
+
+static void * __init find_section32(Elf32_Ehdr *ehdr, const char *secname,
+ unsigned long *size)
+{
+ Elf32_Shdr *sechdrs;
+ unsigned int i;
+ char *secnames;
+
+ /* Grab section headers and strings so we can tell who is who */
+ sechdrs = (void *)ehdr + ehdr->e_shoff;
+ secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
+
+ /* Find the section they want */
+ for (i = 1; i < ehdr->e_shnum; i++) {
+ if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) {
+ if (size)
+ *size = sechdrs[i].sh_size;
+ return (void *)ehdr + sechdrs[i].sh_offset;
+ }
+ }
+ *size = 0;
+ return NULL;
+}
+
+static Elf32_Sym * __init find_symbol32(struct lib32_elfinfo *lib,
+ const char *symname)
+{
+ unsigned int i;
+ char name[MAX_SYMNAME], *c;
+
+ for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) {
+ if (lib->dynsym[i].st_name == 0)
+ continue;
+ strlcpy(name, lib->dynstr + lib->dynsym[i].st_name,
+ MAX_SYMNAME);
+ c = strchr(name, '@');
+ if (c)
+ *c = 0;
+ if (strcmp(symname, name) == 0)
+ return &lib->dynsym[i];
+ }
+ return NULL;
+}
+
+/* Note that we assume the section is .text and the symbol is relative to
+ * the library base
+ */
+static unsigned long __init find_function32(struct lib32_elfinfo *lib,
+ const char *symname)
+{
+ Elf32_Sym *sym = find_symbol32(lib, symname);
+
+ if (sym == NULL) {
+ printk(KERN_WARNING "vDSO32: function %s not found !\n",
+ symname);
+ return 0;
+ }
+ return sym->st_value - VDSO32_LBASE;
+}
+
+static int __init vdso_do_func_patch32(struct lib32_elfinfo *v32,
+ struct lib64_elfinfo *v64,
+ const char *orig, const char *fix)
+{
+ Elf32_Sym *sym32_gen, *sym32_fix;
+
+ sym32_gen = find_symbol32(v32, orig);
+ if (sym32_gen == NULL) {
+ printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", orig);
+ return -1;
+ }
+ if (fix == NULL) {
+ sym32_gen->st_name = 0;
+ return 0;
+ }
+ sym32_fix = find_symbol32(v32, fix);
+ if (sym32_fix == NULL) {
+ printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", fix);
+ return -1;
+ }
+ sym32_gen->st_value = sym32_fix->st_value;
+ sym32_gen->st_size = sym32_fix->st_size;
+ sym32_gen->st_info = sym32_fix->st_info;
+ sym32_gen->st_other = sym32_fix->st_other;
+ sym32_gen->st_shndx = sym32_fix->st_shndx;
+
+ return 0;
+}
+
+
+#ifdef CONFIG_PPC64
+
+static void * __init find_section64(Elf64_Ehdr *ehdr, const char *secname,
+ unsigned long *size)
+{
+ Elf64_Shdr *sechdrs;
+ unsigned int i;
+ char *secnames;
+
+ /* Grab section headers and strings so we can tell who is who */
+ sechdrs = (void *)ehdr + ehdr->e_shoff;
+ secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
+
+ /* Find the section they want */
+ for (i = 1; i < ehdr->e_shnum; i++) {
+ if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) {
+ if (size)
+ *size = sechdrs[i].sh_size;
+ return (void *)ehdr + sechdrs[i].sh_offset;
+ }
+ }
+ if (size)
+ *size = 0;
+ return NULL;
+}
+
+static Elf64_Sym * __init find_symbol64(struct lib64_elfinfo *lib,
+ const char *symname)
+{
+ unsigned int i;
+ char name[MAX_SYMNAME], *c;
+
+ for (i = 0; i < (lib->dynsymsize / sizeof(Elf64_Sym)); i++) {
+ if (lib->dynsym[i].st_name == 0)
+ continue;
+ strlcpy(name, lib->dynstr + lib->dynsym[i].st_name,
+ MAX_SYMNAME);
+ c = strchr(name, '@');
+ if (c)
+ *c = 0;
+ if (strcmp(symname, name) == 0)
+ return &lib->dynsym[i];
+ }
+ return NULL;
+}
+
+/* Note that we assume the section is .text and the symbol is relative to
+ * the library base
+ */
+static unsigned long __init find_function64(struct lib64_elfinfo *lib,
+ const char *symname)
+{
+ Elf64_Sym *sym = find_symbol64(lib, symname);
+
+ if (sym == NULL) {
+ printk(KERN_WARNING "vDSO64: function %s not found !\n",
+ symname);
+ return 0;
+ }
+#ifdef VDS64_HAS_DESCRIPTORS
+ return *((u64 *)(vdso64_kbase + sym->st_value - VDSO64_LBASE)) -
+ VDSO64_LBASE;
+#else
+ return sym->st_value - VDSO64_LBASE;
+#endif
+}
+
+static int __init vdso_do_func_patch64(struct lib32_elfinfo *v32,
+ struct lib64_elfinfo *v64,
+ const char *orig, const char *fix)
+{
+ Elf64_Sym *sym64_gen, *sym64_fix;
+
+ sym64_gen = find_symbol64(v64, orig);
+ if (sym64_gen == NULL) {
+ printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", orig);
+ return -1;
+ }
+ if (fix == NULL) {
+ sym64_gen->st_name = 0;
+ return 0;
+ }
+ sym64_fix = find_symbol64(v64, fix);
+ if (sym64_fix == NULL) {
+ printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", fix);
+ return -1;
+ }
+ sym64_gen->st_value = sym64_fix->st_value;
+ sym64_gen->st_size = sym64_fix->st_size;
+ sym64_gen->st_info = sym64_fix->st_info;
+ sym64_gen->st_other = sym64_fix->st_other;
+ sym64_gen->st_shndx = sym64_fix->st_shndx;
+
+ return 0;
+}
+
+#endif /* CONFIG_PPC64 */
+
+
+static __init int vdso_do_find_sections(struct lib32_elfinfo *v32,
+ struct lib64_elfinfo *v64)
+{
+ void *sect;
+
+ /*
+ * Locate symbol tables & text section
+ */
+
+ v32->dynsym = find_section32(v32->hdr, ".dynsym", &v32->dynsymsize);
+ v32->dynstr = find_section32(v32->hdr, ".dynstr", NULL);
+ if (v32->dynsym == NULL || v32->dynstr == NULL) {
+ printk(KERN_ERR "vDSO32: required symbol section not found\n");
+ return -1;
+ }
+ sect = find_section32(v32->hdr, ".text", NULL);
+ if (sect == NULL) {
+ printk(KERN_ERR "vDSO32: the .text section was not found\n");
+ return -1;
+ }
+ v32->text = sect - vdso32_kbase;
+
+#ifdef CONFIG_PPC64
+ v64->dynsym = find_section64(v64->hdr, ".dynsym", &v64->dynsymsize);
+ v64->dynstr = find_section64(v64->hdr, ".dynstr", NULL);
+ if (v64->dynsym == NULL || v64->dynstr == NULL) {
+ printk(KERN_ERR "vDSO64: required symbol section not found\n");
+ return -1;
+ }
+ sect = find_section64(v64->hdr, ".text", NULL);
+ if (sect == NULL) {
+ printk(KERN_ERR "vDSO64: the .text section was not found\n");
+ return -1;
+ }
+ v64->text = sect - vdso64_kbase;
+#endif /* CONFIG_PPC64 */
+
+ return 0;
+}
+
+static __init void vdso_setup_trampolines(struct lib32_elfinfo *v32,
+ struct lib64_elfinfo *v64)
+{
+ /*
+ * Find signal trampolines
+ */
+
+#ifdef CONFIG_PPC64
+ vdso64_rt_sigtramp = find_function64(v64, "__kernel_sigtramp_rt64");
+#endif
+ vdso32_sigtramp = find_function32(v32, "__kernel_sigtramp32");
+ vdso32_rt_sigtramp = find_function32(v32, "__kernel_sigtramp_rt32");
+}
+
+static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32,
+ struct lib64_elfinfo *v64)
+{
+ Elf32_Sym *sym32;
+#ifdef CONFIG_PPC64
+ Elf64_Sym *sym64;
+
+ sym64 = find_symbol64(v64, "__kernel_datapage_offset");
+ if (sym64 == NULL) {
+ printk(KERN_ERR "vDSO64: Can't find symbol "
+ "__kernel_datapage_offset !\n");
+ return -1;
+ }
+ *((int *)(vdso64_kbase + sym64->st_value - VDSO64_LBASE)) =
+ (vdso64_pages << PAGE_SHIFT) -
+ (sym64->st_value - VDSO64_LBASE);
+#endif /* CONFIG_PPC64 */
+
+ sym32 = find_symbol32(v32, "__kernel_datapage_offset");
+ if (sym32 == NULL) {
+ printk(KERN_ERR "vDSO32: Can't find symbol "
+ "__kernel_datapage_offset !\n");
+ return -1;
+ }
+ *((int *)(vdso32_kbase + (sym32->st_value - VDSO32_LBASE))) =
+ (vdso32_pages << PAGE_SHIFT) -
+ (sym32->st_value - VDSO32_LBASE);
+
+ return 0;
+}
+
+
+static __init int vdso_fixup_features(struct lib32_elfinfo *v32,
+ struct lib64_elfinfo *v64)
+{
+ void *start32;
+ unsigned long size32;
+
+#ifdef CONFIG_PPC64
+ void *start64;
+ unsigned long size64;
+
+ start64 = find_section64(v64->hdr, "__ftr_fixup", &size64);
+ if (start64)
+ do_feature_fixups(cur_cpu_spec->cpu_features,
+ start64, start64 + size64);
+
+ start64 = find_section64(v64->hdr, "__mmu_ftr_fixup", &size64);
+ if (start64)
+ do_feature_fixups(cur_cpu_spec->mmu_features,
+ start64, start64 + size64);
+
+ start64 = find_section64(v64->hdr, "__fw_ftr_fixup", &size64);
+ if (start64)
+ do_feature_fixups(powerpc_firmware_features,
+ start64, start64 + size64);
+
+ start64 = find_section64(v64->hdr, "__lwsync_fixup", &size64);
+ if (start64)
+ do_lwsync_fixups(cur_cpu_spec->cpu_features,
+ start64, start64 + size64);
+#endif /* CONFIG_PPC64 */
+
+ start32 = find_section32(v32->hdr, "__ftr_fixup", &size32);
+ if (start32)
+ do_feature_fixups(cur_cpu_spec->cpu_features,
+ start32, start32 + size32);
+
+ start32 = find_section32(v32->hdr, "__mmu_ftr_fixup", &size32);
+ if (start32)
+ do_feature_fixups(cur_cpu_spec->mmu_features,
+ start32, start32 + size32);
+
+#ifdef CONFIG_PPC64
+ start32 = find_section32(v32->hdr, "__fw_ftr_fixup", &size32);
+ if (start32)
+ do_feature_fixups(powerpc_firmware_features,
+ start32, start32 + size32);
+#endif /* CONFIG_PPC64 */
+
+ start32 = find_section32(v32->hdr, "__lwsync_fixup", &size32);
+ if (start32)
+ do_lwsync_fixups(cur_cpu_spec->cpu_features,
+ start32, start32 + size32);
+
+ return 0;
+}
+
+static __init int vdso_fixup_alt_funcs(struct lib32_elfinfo *v32,
+ struct lib64_elfinfo *v64)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(vdso_patches); i++) {
+ struct vdso_patch_def *patch = &vdso_patches[i];
+ int match = (cur_cpu_spec->cpu_features & patch->ftr_mask)
+ == patch->ftr_value;
+ if (!match)
+ continue;
+
+ DBG("replacing %s with %s...\n", patch->gen_name,
+ patch->fix_name ? "NONE" : patch->fix_name);
+
+ /*
+ * Patch the 32 bits and 64 bits symbols. Note that we do not
+ * patch the "." symbol on 64 bits.
+ * It would be easy to do, but doesn't seem to be necessary,
+ * patching the OPD symbol is enough.
+ */
+ vdso_do_func_patch32(v32, v64, patch->gen_name,
+ patch->fix_name);
+#ifdef CONFIG_PPC64
+ vdso_do_func_patch64(v32, v64, patch->gen_name,
+ patch->fix_name);
+#endif /* CONFIG_PPC64 */
+ }
+
+ return 0;
+}
+
+
+static __init int vdso_setup(void)
+{
+ struct lib32_elfinfo v32;
+ struct lib64_elfinfo v64;
+
+ v32.hdr = vdso32_kbase;
+#ifdef CONFIG_PPC64
+ v64.hdr = vdso64_kbase;
+#endif
+ if (vdso_do_find_sections(&v32, &v64))
+ return -1;
+
+ if (vdso_fixup_datapage(&v32, &v64))
+ return -1;
+
+ if (vdso_fixup_features(&v32, &v64))
+ return -1;
+
+ if (vdso_fixup_alt_funcs(&v32, &v64))
+ return -1;
+
+ vdso_setup_trampolines(&v32, &v64);
+
+ return 0;
+}
+
+/*
+ * Called from setup_arch to initialize the bitmap of available
+ * syscalls in the systemcfg page
+ */
+static void __init vdso_setup_syscall_map(void)
+{
+ unsigned int i;
+ extern unsigned long *sys_call_table;
+ extern unsigned long sys_ni_syscall;
+
+
+ for (i = 0; i < __NR_syscalls; i++) {
+#ifdef CONFIG_PPC64
+ if (sys_call_table[i*2] != sys_ni_syscall)
+ vdso_data->syscall_map_64[i >> 5] |=
+ 0x80000000UL >> (i & 0x1f);
+ if (sys_call_table[i*2+1] != sys_ni_syscall)
+ vdso_data->syscall_map_32[i >> 5] |=
+ 0x80000000UL >> (i & 0x1f);
+#else /* CONFIG_PPC64 */
+ if (sys_call_table[i] != sys_ni_syscall)
+ vdso_data->syscall_map_32[i >> 5] |=
+ 0x80000000UL >> (i & 0x1f);
+#endif /* CONFIG_PPC64 */
+ }
+}
+
+
+static int __init vdso_init(void)
+{
+ int i;
+
+#ifdef CONFIG_PPC64
+ /*
+ * Fill up the "systemcfg" stuff for backward compatibility
+ */
+ strcpy((char *)vdso_data->eye_catcher, "SYSTEMCFG:PPC64");
+ vdso_data->version.major = SYSTEMCFG_MAJOR;
+ vdso_data->version.minor = SYSTEMCFG_MINOR;
+ vdso_data->processor = mfspr(SPRN_PVR);
+ /*
+ * Fake the old platform number for pSeries and add
+ * in LPAR bit if necessary
+ */
+ vdso_data->platform = 0x100;
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ vdso_data->platform |= 1;
+ vdso_data->physicalMemorySize = memblock_phys_mem_size();
+ vdso_data->dcache_size = ppc64_caches.dsize;
+ vdso_data->dcache_line_size = ppc64_caches.dline_size;
+ vdso_data->icache_size = ppc64_caches.isize;
+ vdso_data->icache_line_size = ppc64_caches.iline_size;
+
+ /* XXXOJN: Blocks should be added to ppc64_caches and used instead */
+ vdso_data->dcache_block_size = ppc64_caches.dline_size;
+ vdso_data->icache_block_size = ppc64_caches.iline_size;
+ vdso_data->dcache_log_block_size = ppc64_caches.log_dline_size;
+ vdso_data->icache_log_block_size = ppc64_caches.log_iline_size;
+
+ /*
+ * Calculate the size of the 64 bits vDSO
+ */
+ vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT;
+ DBG("vdso64_kbase: %p, 0x%x pages\n", vdso64_kbase, vdso64_pages);
+#else
+ vdso_data->dcache_block_size = L1_CACHE_BYTES;
+ vdso_data->dcache_log_block_size = L1_CACHE_SHIFT;
+ vdso_data->icache_block_size = L1_CACHE_BYTES;
+ vdso_data->icache_log_block_size = L1_CACHE_SHIFT;
+#endif /* CONFIG_PPC64 */
+
+
+ /*
+ * Calculate the size of the 32 bits vDSO
+ */
+ vdso32_pages = (&vdso32_end - &vdso32_start) >> PAGE_SHIFT;
+ DBG("vdso32_kbase: %p, 0x%x pages\n", vdso32_kbase, vdso32_pages);
+
+
+ /*
+ * Setup the syscall map in the vDOS
+ */
+ vdso_setup_syscall_map();
+
+ /*
+ * Initialize the vDSO images in memory, that is do necessary
+ * fixups of vDSO symbols, locate trampolines, etc...
+ */
+ if (vdso_setup()) {
+ printk(KERN_ERR "vDSO setup failure, not enabled !\n");
+ vdso32_pages = 0;
+#ifdef CONFIG_PPC64
+ vdso64_pages = 0;
+#endif
+ return 0;
+ }
+
+ /* Make sure pages are in the correct state */
+ vdso32_pagelist = kzalloc(sizeof(struct page *) * (vdso32_pages + 2),
+ GFP_KERNEL);
+ BUG_ON(vdso32_pagelist == NULL);
+ for (i = 0; i < vdso32_pages; i++) {
+ struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
+ ClearPageReserved(pg);
+ get_page(pg);
+ vdso32_pagelist[i] = pg;
+ }
+ vdso32_pagelist[i++] = virt_to_page(vdso_data);
+ vdso32_pagelist[i] = NULL;
+
+#ifdef CONFIG_PPC64
+ vdso64_pagelist = kzalloc(sizeof(struct page *) * (vdso64_pages + 2),
+ GFP_KERNEL);
+ BUG_ON(vdso64_pagelist == NULL);
+ for (i = 0; i < vdso64_pages; i++) {
+ struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE);
+ ClearPageReserved(pg);
+ get_page(pg);
+ vdso64_pagelist[i] = pg;
+ }
+ vdso64_pagelist[i++] = virt_to_page(vdso_data);
+ vdso64_pagelist[i] = NULL;
+#endif /* CONFIG_PPC64 */
+
+ get_page(virt_to_page(vdso_data));
+
+ smp_wmb();
+ vdso_ready = 1;
+
+ return 0;
+}
+arch_initcall(vdso_init);
+
+int in_gate_area_no_mm(unsigned long addr)
+{
+ return 0;
+}
+
+int in_gate_area(struct mm_struct *mm, unsigned long addr)
+{
+ return 0;
+}
+
+struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
+{
+ return NULL;
+}
+
diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
new file mode 100644
index 00000000..9a7946c4
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -0,0 +1,56 @@
+
+# List of files in the vdso, has to be asm only for now
+
+obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o
+
+# Build rules
+
+ifeq ($(CONFIG_PPC32),y)
+CROSS32CC := $(CC)
+endif
+
+targets := $(obj-vdso32) vdso32.so vdso32.so.dbg
+obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
+
+GCOV_PROFILE := n
+
+ccflags-y := -shared -fno-common -fno-builtin
+ccflags-y += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
+ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+asflags-y := -D__VDSO32__ -s
+
+obj-y += vdso32_wrapper.o
+extra-y += vdso32.lds
+CPPFLAGS_vdso32.lds += -P -C -Upowerpc
+
+# Force dependency (incbin is bad)
+$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
+
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32)
+ $(call if_changed,vdso32ld)
+
+# strip rule for the .so file
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
+ $(call if_changed,objcopy)
+
+# assembly rules for the .S files
+$(obj-vdso32): %.o: %.S
+ $(call if_changed_dep,vdso32as)
+
+# actual build commands
+quiet_cmd_vdso32ld = VDSO32L $@
+ cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $^ -o $@
+quiet_cmd_vdso32as = VDSO32A $@
+ cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $<
+
+# install commands for the unstripped file
+quiet_cmd_vdso_install = INSTALL $@
+ cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
+
+vdso32.so: $(obj)/vdso32.so.dbg
+ @mkdir -p $(MODLIB)/vdso
+ $(call cmd,vdso_install)
+
+vdso_install: vdso32.so
diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S
new file mode 100644
index 00000000..1ba6feb7
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/cacheflush.S
@@ -0,0 +1,85 @@
+/*
+ * vDSO provided cache flush routines
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org),
+ * IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+
+ .text
+
+/*
+ * Default "generic" version of __kernel_sync_dicache.
+ *
+ * void __kernel_sync_dicache(unsigned long start, unsigned long end)
+ *
+ * Flushes the data cache & invalidate the instruction cache for the
+ * provided range [start, end[
+ */
+V_FUNCTION_BEGIN(__kernel_sync_dicache)
+ .cfi_startproc
+ mflr r12
+ .cfi_register lr,r12
+ mr r11,r3
+ bl __get_datapage@local
+ mtlr r12
+ mr r10,r3
+
+ lwz r7,CFG_DCACHE_BLOCKSZ(r10)
+ addi r5,r7,-1
+ andc r6,r11,r5 /* round low to line bdy */
+ subf r8,r6,r4 /* compute length */
+ add r8,r8,r5 /* ensure we get enough */
+ lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10)
+ srw. r8,r8,r9 /* compute line count */
+ crclr cr0*4+so
+ beqlr /* nothing to do? */
+ mtctr r8
+1: dcbst 0,r6
+ add r6,r6,r7
+ bdnz 1b
+ sync
+
+/* Now invalidate the instruction cache */
+
+ lwz r7,CFG_ICACHE_BLOCKSZ(r10)
+ addi r5,r7,-1
+ andc r6,r11,r5 /* round low to line bdy */
+ subf r8,r6,r4 /* compute length */
+ add r8,r8,r5
+ lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10)
+ srw. r8,r8,r9 /* compute line count */
+ crclr cr0*4+so
+ beqlr /* nothing to do? */
+ mtctr r8
+2: icbi 0,r6
+ add r6,r6,r7
+ bdnz 2b
+ isync
+ li r3,0
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_sync_dicache)
+
+
+/*
+ * POWER5 version of __kernel_sync_dicache
+ */
+V_FUNCTION_BEGIN(__kernel_sync_dicache_p5)
+ .cfi_startproc
+ crclr cr0*4+so
+ sync
+ isync
+ li r3,0
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_sync_dicache_p5)
+
diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
new file mode 100644
index 00000000..dc21e891
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/datapage.S
@@ -0,0 +1,85 @@
+/*
+ * Access to the shared data page by the vDSO & syscall map
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+#include <asm/vdso.h>
+
+ .text
+V_FUNCTION_BEGIN(__get_datapage)
+ .cfi_startproc
+ /* We don't want that exposed or overridable as we want other objects
+ * to be able to bl directly to here
+ */
+ .protected __get_datapage
+ .hidden __get_datapage
+
+ mflr r0
+ .cfi_register lr,r0
+
+ bcl 20,31,1f
+ .global __kernel_datapage_offset;
+__kernel_datapage_offset:
+ .long 0
+1:
+ mflr r3
+ mtlr r0
+ lwz r0,0(r3)
+ add r3,r0,r3
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__get_datapage)
+
+/*
+ * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
+ *
+ * returns a pointer to the syscall map. the map is agnostic to the
+ * size of "long", unlike kernel bitops, it stores bits from top to
+ * bottom so that memory actually contains a linear bitmap
+ * check for syscall N by testing bit (0x80000000 >> (N & 0x1f)) of
+ * 32 bits int at N >> 5.
+ */
+V_FUNCTION_BEGIN(__kernel_get_syscall_map)
+ .cfi_startproc
+ mflr r12
+ .cfi_register lr,r12
+ mr r4,r3
+ bl __get_datapage@local
+ mtlr r12
+ addi r3,r3,CFG_SYSCALL_MAP32
+ cmpli cr0,r4,0
+ beqlr
+ li r0,__NR_syscalls
+ stw r0,0(r4)
+ crclr cr0*4+so
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_get_syscall_map)
+
+/*
+ * void unsigned long long __kernel_get_tbfreq(void);
+ *
+ * returns the timebase frequency in HZ
+ */
+V_FUNCTION_BEGIN(__kernel_get_tbfreq)
+ .cfi_startproc
+ mflr r12
+ .cfi_register lr,r12
+ bl __get_datapage@local
+ lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
+ lwz r3,CFG_TB_TICKS_PER_SEC(r3)
+ mtlr r12
+ crclr cr0*4+so
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_get_tbfreq)
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
new file mode 100644
index 00000000..4ee09ee2
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -0,0 +1,266 @@
+/*
+ * Userland implementation of gettimeofday() for 32 bits processes in a
+ * ppc64 kernel for use in the vDSO
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org,
+ * IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+/* Offset for the low 32-bit part of a field of long type */
+#ifdef CONFIG_PPC64
+#define LOPART 4
+#define TSPEC_TV_SEC TSPC64_TV_SEC+LOPART
+#else
+#define LOPART 0
+#define TSPEC_TV_SEC TSPC32_TV_SEC
+#endif
+
+ .text
+/*
+ * Exact prototype of gettimeofday
+ *
+ * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_gettimeofday)
+ .cfi_startproc
+ mflr r12
+ .cfi_register lr,r12
+
+ mr r10,r3 /* r10 saves tv */
+ mr r11,r4 /* r11 saves tz */
+ bl __get_datapage@local /* get data page */
+ mr r9, r3 /* datapage ptr in r9 */
+ cmplwi r10,0 /* check if tv is NULL */
+ beq 3f
+ lis r7,1000000@ha /* load up USEC_PER_SEC */
+ addi r7,r7,1000000@l /* so we get microseconds in r4 */
+ bl __do_get_tspec@local /* get sec/usec from tb & kernel */
+ stw r3,TVAL32_TV_SEC(r10)
+ stw r4,TVAL32_TV_USEC(r10)
+
+3: cmplwi r11,0 /* check if tz is NULL */
+ beq 1f
+ lwz r4,CFG_TZ_MINUTEWEST(r9)/* fill tz */
+ lwz r5,CFG_TZ_DSTTIME(r9)
+ stw r4,TZONE_TZ_MINWEST(r11)
+ stw r5,TZONE_TZ_DSTTIME(r11)
+
+1: mtlr r12
+ crclr cr0*4+so
+ li r3,0
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_gettimeofday)
+
+/*
+ * Exact prototype of clock_gettime()
+ *
+ * int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_clock_gettime)
+ .cfi_startproc
+ /* Check for supported clock IDs */
+ cmpli cr0,r3,CLOCK_REALTIME
+ cmpli cr1,r3,CLOCK_MONOTONIC
+ cror cr0*4+eq,cr0*4+eq,cr1*4+eq
+ bne cr0,99f
+
+ mflr r12 /* r12 saves lr */
+ .cfi_register lr,r12
+ mr r11,r4 /* r11 saves tp */
+ bl __get_datapage@local /* get data page */
+ mr r9,r3 /* datapage ptr in r9 */
+ lis r7,NSEC_PER_SEC@h /* want nanoseconds */
+ ori r7,r7,NSEC_PER_SEC@l
+50: bl __do_get_tspec@local /* get sec/nsec from tb & kernel */
+ bne cr1,80f /* not monotonic -> all done */
+
+ /*
+ * CLOCK_MONOTONIC
+ */
+
+ /* now we must fixup using wall to monotonic. We need to snapshot
+ * that value and do the counter trick again. Fortunately, we still
+ * have the counter value in r8 that was returned by __do_get_xsec.
+ * At this point, r3,r4 contain our sec/nsec values, r5 and r6
+ * can be used, r7 contains NSEC_PER_SEC.
+ */
+
+ lwz r5,WTOM_CLOCK_SEC(r9)
+ lwz r6,WTOM_CLOCK_NSEC(r9)
+
+ /* We now have our offset in r5,r6. We create a fake dependency
+ * on that value and re-check the counter
+ */
+ or r0,r6,r5
+ xor r0,r0,r0
+ add r9,r9,r0
+ lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
+ cmpl cr0,r8,r0 /* check if updated */
+ bne- 50b
+
+ /* Calculate and store result. Note that this mimics the C code,
+ * which may cause funny results if nsec goes negative... is that
+ * possible at all ?
+ */
+ add r3,r3,r5
+ add r4,r4,r6
+ cmpw cr0,r4,r7
+ cmpwi cr1,r4,0
+ blt 1f
+ subf r4,r7,r4
+ addi r3,r3,1
+1: bge cr1,80f
+ addi r3,r3,-1
+ add r4,r4,r7
+
+80: stw r3,TSPC32_TV_SEC(r11)
+ stw r4,TSPC32_TV_NSEC(r11)
+
+ mtlr r12
+ crclr cr0*4+so
+ li r3,0
+ blr
+
+ /*
+ * syscall fallback
+ */
+99:
+ li r0,__NR_clock_gettime
+ sc
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_clock_gettime)
+
+
+/*
+ * Exact prototype of clock_getres()
+ *
+ * int __kernel_clock_getres(clockid_t clock_id, struct timespec *res);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_clock_getres)
+ .cfi_startproc
+ /* Check for supported clock IDs */
+ cmpwi cr0,r3,CLOCK_REALTIME
+ cmpwi cr1,r3,CLOCK_MONOTONIC
+ cror cr0*4+eq,cr0*4+eq,cr1*4+eq
+ bne cr0,99f
+
+ li r3,0
+ cmpli cr0,r4,0
+ crclr cr0*4+so
+ beqlr
+ lis r5,CLOCK_REALTIME_RES@h
+ ori r5,r5,CLOCK_REALTIME_RES@l
+ stw r3,TSPC32_TV_SEC(r4)
+ stw r5,TSPC32_TV_NSEC(r4)
+ blr
+
+ /*
+ * syscall fallback
+ */
+99:
+ li r0,__NR_clock_getres
+ sc
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_clock_getres)
+
+
+/*
+ * This is the core of clock_gettime() and gettimeofday(),
+ * it returns the current time in r3 (seconds) and r4.
+ * On entry, r7 gives the resolution of r4, either USEC_PER_SEC
+ * or NSEC_PER_SEC, giving r4 in microseconds or nanoseconds.
+ * It expects the datapage ptr in r9 and doesn't clobber it.
+ * It clobbers r0, r5 and r6.
+ * On return, r8 contains the counter value that can be reused.
+ * This clobbers cr0 but not any other cr field.
+ */
+__do_get_tspec:
+ .cfi_startproc
+ /* Check for update count & load values. We use the low
+ * order 32 bits of the update count
+ */
+1: lwz r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
+ andi. r0,r8,1 /* pending update ? loop */
+ bne- 1b
+ xor r0,r8,r8 /* create dependency */
+ add r9,r9,r0
+
+ /* Load orig stamp (offset to TB) */
+ lwz r5,CFG_TB_ORIG_STAMP(r9)
+ lwz r6,(CFG_TB_ORIG_STAMP+4)(r9)
+
+ /* Get a stable TB value */
+2: mftbu r3
+ mftbl r4
+ mftbu r0
+ cmplw cr0,r3,r0
+ bne- 2b
+
+ /* Subtract tb orig stamp and shift left 12 bits.
+ */
+ subfc r4,r6,r4
+ subfe r0,r5,r3
+ slwi r0,r0,12
+ rlwimi. r0,r4,12,20,31
+ slwi r4,r4,12
+
+ /*
+ * Load scale factor & do multiplication.
+ * We only use the high 32 bits of the tb_to_xs value.
+ * Even with a 1GHz timebase clock, the high 32 bits of
+ * tb_to_xs will be at least 4 million, so the error from
+ * ignoring the low 32 bits will be no more than 0.25ppm.
+ * The error will just make the clock run very very slightly
+ * slow until the next time the kernel updates the VDSO data,
+ * at which point the clock will catch up to the kernel's value,
+ * so there is no long-term error accumulation.
+ */
+ lwz r5,CFG_TB_TO_XS(r9) /* load values */
+ mulhwu r4,r4,r5
+ li r3,0
+
+ beq+ 4f /* skip high part computation if 0 */
+ mulhwu r3,r0,r5
+ mullw r5,r0,r5
+ addc r4,r4,r5
+ addze r3,r3
+4:
+ /* At this point, we have seconds since the xtime stamp
+ * as a 32.32 fixed-point number in r3 and r4.
+ * Load & add the xtime stamp.
+ */
+ lwz r5,STAMP_XTIME+TSPEC_TV_SEC(r9)
+ lwz r6,STAMP_SEC_FRAC(r9)
+ addc r4,r4,r6
+ adde r3,r3,r5
+
+ /* We create a fake dependency on the result in r3/r4
+ * and re-check the counter
+ */
+ or r6,r4,r3
+ xor r0,r6,r6
+ add r9,r9,r0
+ lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
+ cmplw cr0,r8,r0 /* check if updated */
+ bne- 1b
+
+ mulhwu r4,r4,r7 /* convert to micro or nanoseconds */
+
+ blr
+ .cfi_endproc
diff --git a/arch/powerpc/kernel/vdso32/note.S b/arch/powerpc/kernel/vdso32/note.S
new file mode 100644
index 00000000..d4b5be4f
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/note.S
@@ -0,0 +1,25 @@
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+
+#define ASM_ELF_NOTE_BEGIN(name, flags, vendor, type) \
+ .section name, flags; \
+ .balign 4; \
+ .long 1f - 0f; /* name length */ \
+ .long 3f - 2f; /* data length */ \
+ .long type; /* note type */ \
+0: .asciz vendor; /* vendor name */ \
+1: .balign 4; \
+2:
+
+#define ASM_ELF_NOTE_END \
+3: .balign 4; /* pad out section */ \
+ .previous
+
+ ASM_ELF_NOTE_BEGIN(".note.kernel-version", "a", UTS_SYSNAME, 0)
+ .long LINUX_VERSION_CODE
+ ASM_ELF_NOTE_END
diff --git a/arch/powerpc/kernel/vdso32/sigtramp.S b/arch/powerpc/kernel/vdso32/sigtramp.S
new file mode 100644
index 00000000..cf0c9c9c
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/sigtramp.S
@@ -0,0 +1,299 @@
+/*
+ * Signal trampolines for 32 bits processes in a ppc64 kernel for
+ * use in the vDSO
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
+ * Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/unistd.h>
+#include <asm/vdso.h>
+
+ .text
+
+/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from
+ the return address to get an address in the middle of the presumed
+ call instruction. Since we don't have a call here, we artificially
+ extend the range covered by the unwind info by adding a nop before
+ the real start. */
+ nop
+V_FUNCTION_BEGIN(__kernel_sigtramp32)
+.Lsig_start = . - 4
+ li r0,__NR_sigreturn
+ sc
+.Lsig_end:
+V_FUNCTION_END(__kernel_sigtramp32)
+
+.Lsigrt_start:
+ nop
+V_FUNCTION_BEGIN(__kernel_sigtramp_rt32)
+ li r0,__NR_rt_sigreturn
+ sc
+.Lsigrt_end:
+V_FUNCTION_END(__kernel_sigtramp_rt32)
+
+ .section .eh_frame,"a",@progbits
+
+/* Register r1 can be found at offset 4 of a pt_regs structure.
+ A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */
+#define cfa_save \
+ .byte 0x0f; /* DW_CFA_def_cfa_expression */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x23; .uleb128 RSIZE; /* DW_OP_plus_uconst */ \
+ .byte 0x06; /* DW_OP_deref */ \
+9:
+
+/* Register REGNO can be found at offset OFS of a pt_regs structure.
+ A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */
+#define rsave(regno, ofs) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .ifne ofs; \
+ .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \
+ .endif; \
+9:
+
+/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16
+ of the VMX reg struct. The VMX reg struct is at offset VREGS of
+ the pt_regs struct. This macro is for REGNO == 0, and contains
+ 'subroutines' that the other macros jump to. */
+#define vsave_msr0(regno) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x30 + regno; /* DW_OP_lit0 */ \
+2: \
+ .byte 0x40; /* DW_OP_lit16 */ \
+ .byte 0x1e; /* DW_OP_mul */ \
+3: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x12; /* DW_OP_dup */ \
+ .byte 0x23; /* DW_OP_plus_uconst */ \
+ .uleb128 33*RSIZE; /* msr offset */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x0c; .long 1 << 25; /* DW_OP_const4u */ \
+ .byte 0x1a; /* DW_OP_and */ \
+ .byte 0x12; /* DW_OP_dup, ret 0 if bra taken */ \
+ .byte 0x30; /* DW_OP_lit0 */ \
+ .byte 0x29; /* DW_OP_eq */ \
+ .byte 0x28; .short 0x7fff; /* DW_OP_bra to end */ \
+ .byte 0x13; /* DW_OP_drop, pop the 0 */ \
+ .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \
+ .byte 0x22; /* DW_OP_plus */ \
+ .byte 0x2f; .short 0x7fff; /* DW_OP_skip to end */ \
+9:
+
+/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16
+ of the VMX reg struct. REGNO is 1 thru 31. */
+#define vsave_msr1(regno) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x30 + regno; /* DW_OP_lit n */ \
+ .byte 0x2f; .short 2b - 9f; /* DW_OP_skip */ \
+9:
+
+/* If msr bit 1<<25 is set, then VMX register REGNO is at offset OFS of
+ the VMX save block. */
+#define vsave_msr2(regno, ofs) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x0a; .short ofs; /* DW_OP_const2u */ \
+ .byte 0x2f; .short 3b - 9f; /* DW_OP_skip */ \
+9:
+
+/* VMX register REGNO is at offset OFS of the VMX save area. */
+#define vsave(regno, ofs) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \
+ .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \
+9:
+
+/* This is where the pt_regs pointer can be found on the stack. */
+#define PTREGS 64+28
+
+/* Size of regs. */
+#define RSIZE 4
+
+/* This is the offset of the VMX regs. */
+#define VREGS 48*RSIZE+34*8
+
+/* Describe where general purpose regs are saved. */
+#define EH_FRAME_GEN \
+ cfa_save; \
+ rsave ( 0, 0*RSIZE); \
+ rsave ( 2, 2*RSIZE); \
+ rsave ( 3, 3*RSIZE); \
+ rsave ( 4, 4*RSIZE); \
+ rsave ( 5, 5*RSIZE); \
+ rsave ( 6, 6*RSIZE); \
+ rsave ( 7, 7*RSIZE); \
+ rsave ( 8, 8*RSIZE); \
+ rsave ( 9, 9*RSIZE); \
+ rsave (10, 10*RSIZE); \
+ rsave (11, 11*RSIZE); \
+ rsave (12, 12*RSIZE); \
+ rsave (13, 13*RSIZE); \
+ rsave (14, 14*RSIZE); \
+ rsave (15, 15*RSIZE); \
+ rsave (16, 16*RSIZE); \
+ rsave (17, 17*RSIZE); \
+ rsave (18, 18*RSIZE); \
+ rsave (19, 19*RSIZE); \
+ rsave (20, 20*RSIZE); \
+ rsave (21, 21*RSIZE); \
+ rsave (22, 22*RSIZE); \
+ rsave (23, 23*RSIZE); \
+ rsave (24, 24*RSIZE); \
+ rsave (25, 25*RSIZE); \
+ rsave (26, 26*RSIZE); \
+ rsave (27, 27*RSIZE); \
+ rsave (28, 28*RSIZE); \
+ rsave (29, 29*RSIZE); \
+ rsave (30, 30*RSIZE); \
+ rsave (31, 31*RSIZE); \
+ rsave (67, 32*RSIZE); /* ap, used as temp for nip */ \
+ rsave (65, 36*RSIZE); /* lr */ \
+ rsave (70, 38*RSIZE) /* cr */
+
+/* Describe where the FP regs are saved. */
+#define EH_FRAME_FP \
+ rsave (32, 48*RSIZE + 0*8); \
+ rsave (33, 48*RSIZE + 1*8); \
+ rsave (34, 48*RSIZE + 2*8); \
+ rsave (35, 48*RSIZE + 3*8); \
+ rsave (36, 48*RSIZE + 4*8); \
+ rsave (37, 48*RSIZE + 5*8); \
+ rsave (38, 48*RSIZE + 6*8); \
+ rsave (39, 48*RSIZE + 7*8); \
+ rsave (40, 48*RSIZE + 8*8); \
+ rsave (41, 48*RSIZE + 9*8); \
+ rsave (42, 48*RSIZE + 10*8); \
+ rsave (43, 48*RSIZE + 11*8); \
+ rsave (44, 48*RSIZE + 12*8); \
+ rsave (45, 48*RSIZE + 13*8); \
+ rsave (46, 48*RSIZE + 14*8); \
+ rsave (47, 48*RSIZE + 15*8); \
+ rsave (48, 48*RSIZE + 16*8); \
+ rsave (49, 48*RSIZE + 17*8); \
+ rsave (50, 48*RSIZE + 18*8); \
+ rsave (51, 48*RSIZE + 19*8); \
+ rsave (52, 48*RSIZE + 20*8); \
+ rsave (53, 48*RSIZE + 21*8); \
+ rsave (54, 48*RSIZE + 22*8); \
+ rsave (55, 48*RSIZE + 23*8); \
+ rsave (56, 48*RSIZE + 24*8); \
+ rsave (57, 48*RSIZE + 25*8); \
+ rsave (58, 48*RSIZE + 26*8); \
+ rsave (59, 48*RSIZE + 27*8); \
+ rsave (60, 48*RSIZE + 28*8); \
+ rsave (61, 48*RSIZE + 29*8); \
+ rsave (62, 48*RSIZE + 30*8); \
+ rsave (63, 48*RSIZE + 31*8)
+
+/* Describe where the VMX regs are saved. */
+#ifdef CONFIG_ALTIVEC
+#define EH_FRAME_VMX \
+ vsave_msr0 ( 0); \
+ vsave_msr1 ( 1); \
+ vsave_msr1 ( 2); \
+ vsave_msr1 ( 3); \
+ vsave_msr1 ( 4); \
+ vsave_msr1 ( 5); \
+ vsave_msr1 ( 6); \
+ vsave_msr1 ( 7); \
+ vsave_msr1 ( 8); \
+ vsave_msr1 ( 9); \
+ vsave_msr1 (10); \
+ vsave_msr1 (11); \
+ vsave_msr1 (12); \
+ vsave_msr1 (13); \
+ vsave_msr1 (14); \
+ vsave_msr1 (15); \
+ vsave_msr1 (16); \
+ vsave_msr1 (17); \
+ vsave_msr1 (18); \
+ vsave_msr1 (19); \
+ vsave_msr1 (20); \
+ vsave_msr1 (21); \
+ vsave_msr1 (22); \
+ vsave_msr1 (23); \
+ vsave_msr1 (24); \
+ vsave_msr1 (25); \
+ vsave_msr1 (26); \
+ vsave_msr1 (27); \
+ vsave_msr1 (28); \
+ vsave_msr1 (29); \
+ vsave_msr1 (30); \
+ vsave_msr1 (31); \
+ vsave_msr2 (33, 32*16+12); \
+ vsave (32, 32*16)
+#else
+#define EH_FRAME_VMX
+#endif
+
+.Lcie:
+ .long .Lcie_end - .Lcie_start
+.Lcie_start:
+ .long 0 /* CIE ID */
+ .byte 1 /* Version number */
+ .string "zRS" /* NUL-terminated augmentation string */
+ .uleb128 4 /* Code alignment factor */
+ .sleb128 -4 /* Data alignment factor */
+ .byte 67 /* Return address register column, ap */
+ .uleb128 1 /* Augmentation value length */
+ .byte 0x1b /* DW_EH_PE_pcrel | DW_EH_PE_sdata4. */
+ .byte 0x0c,1,0 /* DW_CFA_def_cfa: r1 ofs 0 */
+ .balign 4
+.Lcie_end:
+
+ .long .Lfde0_end - .Lfde0_start
+.Lfde0_start:
+ .long .Lfde0_start - .Lcie /* CIE pointer. */
+ .long .Lsig_start - . /* PC start, length */
+ .long .Lsig_end - .Lsig_start
+ .uleb128 0 /* Augmentation */
+ EH_FRAME_GEN
+ EH_FRAME_FP
+ EH_FRAME_VMX
+ .balign 4
+.Lfde0_end:
+
+/* We have a different stack layout for rt_sigreturn. */
+#undef PTREGS
+#define PTREGS 64+16+128+20+28
+
+ .long .Lfde1_end - .Lfde1_start
+.Lfde1_start:
+ .long .Lfde1_start - .Lcie /* CIE pointer. */
+ .long .Lsigrt_start - . /* PC start, length */
+ .long .Lsigrt_end - .Lsigrt_start
+ .uleb128 0 /* Augmentation */
+ EH_FRAME_GEN
+ EH_FRAME_FP
+ EH_FRAME_VMX
+ .balign 4
+.Lfde1_end:
diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S
new file mode 100644
index 00000000..0546bcd4
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S
@@ -0,0 +1,153 @@
+/*
+ * This is the infamous ld script for the 32 bits vdso
+ * library
+ */
+#include <asm/vdso.h>
+
+OUTPUT_FORMAT("elf32-powerpc", "elf32-powerpc", "elf32-powerpc")
+OUTPUT_ARCH(powerpc:common)
+ENTRY(_start)
+
+SECTIONS
+{
+ . = VDSO32_LBASE + SIZEOF_HEADERS;
+
+ .hash : { *(.hash) } :text
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+
+ .note : { *(.note.*) } :text :note
+
+ . = ALIGN(16);
+ .text : {
+ *(.text .stub .text.* .gnu.linkonce.t.* __ftr_alt_*)
+ } :text
+ PROVIDE(__etext = .);
+ PROVIDE(_etext = .);
+ PROVIDE(etext = .);
+
+ . = ALIGN(8);
+ __ftr_fixup : { *(__ftr_fixup) }
+
+ . = ALIGN(8);
+ __mmu_ftr_fixup : { *(__mmu_ftr_fixup) }
+
+ . = ALIGN(8);
+ __lwsync_fixup : { *(__lwsync_fixup) }
+
+#ifdef CONFIG_PPC64
+ . = ALIGN(8);
+ __fw_ftr_fixup : { *(__fw_ftr_fixup) }
+#endif
+
+ /*
+ * Other stuff is appended to the text segment:
+ */
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+
+ .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
+ .eh_frame : { KEEP (*(.eh_frame)) } :text
+ .gcc_except_table : { *(.gcc_except_table) }
+ .fixup : { *(.fixup) }
+
+ .dynamic : { *(.dynamic) } :text :dynamic
+ .got : { *(.got) } :text
+ .plt : { *(.plt) }
+
+ _end = .;
+ __end = .;
+ PROVIDE(end = .);
+
+ /*
+ * Stabs debugging sections are here too.
+ */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+
+ /*
+ * DWARF debug sections.
+ * Symbols in the DWARF debugging sections are relative to the beginning
+ * of the section so we begin them at 0.
+ */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+
+ /DISCARD/ : {
+ *(.note.GNU-stack)
+ *(.data .data.* .gnu.linkonce.d.* .sdata*)
+ *(.bss .sbss .dynbss .dynsbss)
+ }
+}
+
+/*
+ * Very old versions of ld do not recognize this name token; use the constant.
+ */
+#define PT_GNU_EH_FRAME 0x6474e550
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+ text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ note PT_NOTE FLAGS(4); /* PF_R */
+ eh_frame_hdr PT_GNU_EH_FRAME;
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+ VDSO_VERSION_STRING {
+ global:
+ /*
+ * Has to be there for the kernel to find
+ */
+ __kernel_datapage_offset;
+
+ __kernel_get_syscall_map;
+ __kernel_gettimeofday;
+ __kernel_clock_gettime;
+ __kernel_clock_getres;
+ __kernel_get_tbfreq;
+ __kernel_sync_dicache;
+ __kernel_sync_dicache_p5;
+ __kernel_sigtramp32;
+ __kernel_sigtramp_rt32;
+
+ local: *;
+ };
+}
diff --git a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S
new file mode 100644
index 00000000..6e8f507e
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S
@@ -0,0 +1,14 @@
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+ __PAGE_ALIGNED_DATA
+
+ .globl vdso32_start, vdso32_end
+ .balign PAGE_SIZE
+vdso32_start:
+ .incbin "arch/powerpc/kernel/vdso32/vdso32.so"
+ .balign PAGE_SIZE
+vdso32_end:
+
+ .previous
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
new file mode 100644
index 00000000..8c500d86
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -0,0 +1,51 @@
+# List of files in the vdso, has to be asm only for now
+
+obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o
+
+# Build rules
+
+targets := $(obj-vdso64) vdso64.so vdso64.so.dbg
+obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
+
+GCOV_PROFILE := n
+
+ccflags-y := -shared -fno-common -fno-builtin
+ccflags-y += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
+ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+asflags-y := -D__VDSO64__ -s
+
+obj-y += vdso64_wrapper.o
+extra-y += vdso64.lds
+CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
+
+# Force dependency (incbin is bad)
+$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
+
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64)
+ $(call if_changed,vdso64ld)
+
+# strip rule for the .so file
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
+ $(call if_changed,objcopy)
+
+# assembly rules for the .S files
+$(obj-vdso64): %.o: %.S
+ $(call if_changed_dep,vdso64as)
+
+# actual build commands
+quiet_cmd_vdso64ld = VDSO64L $@
+ cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
+quiet_cmd_vdso64as = VDSO64A $@
+ cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
+
+# install commands for the unstripped file
+quiet_cmd_vdso_install = INSTALL $@
+ cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
+
+vdso64.so: $(obj)/vdso64.so.dbg
+ @mkdir -p $(MODLIB)/vdso
+ $(call cmd,vdso_install)
+
+vdso_install: vdso64.so
diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S
new file mode 100644
index 00000000..69c5af2b
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/cacheflush.S
@@ -0,0 +1,84 @@
+/*
+ * vDSO provided cache flush routines
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org),
+ * IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+
+ .text
+
+/*
+ * Default "generic" version of __kernel_sync_dicache.
+ *
+ * void __kernel_sync_dicache(unsigned long start, unsigned long end)
+ *
+ * Flushes the data cache & invalidate the instruction cache for the
+ * provided range [start, end[
+ */
+V_FUNCTION_BEGIN(__kernel_sync_dicache)
+ .cfi_startproc
+ mflr r12
+ .cfi_register lr,r12
+ mr r11,r3
+ bl V_LOCAL_FUNC(__get_datapage)
+ mtlr r12
+ mr r10,r3
+
+ lwz r7,CFG_DCACHE_BLOCKSZ(r10)
+ addi r5,r7,-1
+ andc r6,r11,r5 /* round low to line bdy */
+ subf r8,r6,r4 /* compute length */
+ add r8,r8,r5 /* ensure we get enough */
+ lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10)
+ srw. r8,r8,r9 /* compute line count */
+ crclr cr0*4+so
+ beqlr /* nothing to do? */
+ mtctr r8
+1: dcbst 0,r6
+ add r6,r6,r7
+ bdnz 1b
+ sync
+
+/* Now invalidate the instruction cache */
+
+ lwz r7,CFG_ICACHE_BLOCKSZ(r10)
+ addi r5,r7,-1
+ andc r6,r11,r5 /* round low to line bdy */
+ subf r8,r6,r4 /* compute length */
+ add r8,r8,r5
+ lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10)
+ srw. r8,r8,r9 /* compute line count */
+ crclr cr0*4+so
+ beqlr /* nothing to do? */
+ mtctr r8
+2: icbi 0,r6
+ add r6,r6,r7
+ bdnz 2b
+ isync
+ li r3,0
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_sync_dicache)
+
+
+/*
+ * POWER5 version of __kernel_sync_dicache
+ */
+V_FUNCTION_BEGIN(__kernel_sync_dicache_p5)
+ .cfi_startproc
+ crclr cr0*4+so
+ sync
+ isync
+ li r3,0
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_sync_dicache_p5)
diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S
new file mode 100644
index 00000000..79796de1
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/datapage.S
@@ -0,0 +1,85 @@
+/*
+ * Access to the shared data page by the vDSO & syscall map
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+#include <asm/vdso.h>
+
+ .text
+V_FUNCTION_BEGIN(__get_datapage)
+ .cfi_startproc
+ /* We don't want that exposed or overridable as we want other objects
+ * to be able to bl directly to here
+ */
+ .protected __get_datapage
+ .hidden __get_datapage
+
+ mflr r0
+ .cfi_register lr,r0
+
+ bcl 20,31,1f
+ .global __kernel_datapage_offset;
+__kernel_datapage_offset:
+ .long 0
+1:
+ mflr r3
+ mtlr r0
+ lwz r0,0(r3)
+ add r3,r0,r3
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__get_datapage)
+
+/*
+ * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
+ *
+ * returns a pointer to the syscall map. the map is agnostic to the
+ * size of "long", unlike kernel bitops, it stores bits from top to
+ * bottom so that memory actually contains a linear bitmap
+ * check for syscall N by testing bit (0x80000000 >> (N & 0x1f)) of
+ * 32 bits int at N >> 5.
+ */
+V_FUNCTION_BEGIN(__kernel_get_syscall_map)
+ .cfi_startproc
+ mflr r12
+ .cfi_register lr,r12
+ mr r4,r3
+ bl V_LOCAL_FUNC(__get_datapage)
+ mtlr r12
+ addi r3,r3,CFG_SYSCALL_MAP64
+ cmpli cr0,r4,0
+ crclr cr0*4+so
+ beqlr
+ li r0,__NR_syscalls
+ stw r0,0(r4)
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_get_syscall_map)
+
+
+/*
+ * void unsigned long __kernel_get_tbfreq(void);
+ *
+ * returns the timebase frequency in HZ
+ */
+V_FUNCTION_BEGIN(__kernel_get_tbfreq)
+ .cfi_startproc
+ mflr r12
+ .cfi_register lr,r12
+ bl V_LOCAL_FUNC(__get_datapage)
+ ld r3,CFG_TB_TICKS_PER_SEC(r3)
+ mtlr r12
+ crclr cr0*4+so
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_get_tbfreq)
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
new file mode 100644
index 00000000..e97a9a0d
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -0,0 +1,218 @@
+/*
+ * Userland implementation of gettimeofday() for 64 bits processes in a
+ * ppc64 kernel for use in the vDSO
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org),
+ * IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+ .text
+/*
+ * Exact prototype of gettimeofday
+ *
+ * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_gettimeofday)
+ .cfi_startproc
+ mflr r12
+ .cfi_register lr,r12
+
+ mr r11,r3 /* r11 holds tv */
+ mr r10,r4 /* r10 holds tz */
+ bl V_LOCAL_FUNC(__get_datapage) /* get data page */
+ cmpldi r11,0 /* check if tv is NULL */
+ beq 2f
+ lis r7,1000000@ha /* load up USEC_PER_SEC */
+ addi r7,r7,1000000@l
+ bl V_LOCAL_FUNC(__do_get_tspec) /* get sec/us from tb & kernel */
+ std r4,TVAL64_TV_SEC(r11) /* store sec in tv */
+ std r5,TVAL64_TV_USEC(r11) /* store usec in tv */
+2: cmpldi r10,0 /* check if tz is NULL */
+ beq 1f
+ lwz r4,CFG_TZ_MINUTEWEST(r3)/* fill tz */
+ lwz r5,CFG_TZ_DSTTIME(r3)
+ stw r4,TZONE_TZ_MINWEST(r10)
+ stw r5,TZONE_TZ_DSTTIME(r10)
+1: mtlr r12
+ crclr cr0*4+so
+ li r3,0 /* always success */
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_gettimeofday)
+
+
+/*
+ * Exact prototype of clock_gettime()
+ *
+ * int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_clock_gettime)
+ .cfi_startproc
+ /* Check for supported clock IDs */
+ cmpwi cr0,r3,CLOCK_REALTIME
+ cmpwi cr1,r3,CLOCK_MONOTONIC
+ cror cr0*4+eq,cr0*4+eq,cr1*4+eq
+ bne cr0,99f
+
+ mflr r12 /* r12 saves lr */
+ .cfi_register lr,r12
+ mr r11,r4 /* r11 saves tp */
+ bl V_LOCAL_FUNC(__get_datapage) /* get data page */
+ lis r7,NSEC_PER_SEC@h /* want nanoseconds */
+ ori r7,r7,NSEC_PER_SEC@l
+50: bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & kernel */
+ bne cr1,80f /* if not monotonic, all done */
+
+ /*
+ * CLOCK_MONOTONIC
+ */
+
+ /* now we must fixup using wall to monotonic. We need to snapshot
+ * that value and do the counter trick again. Fortunately, we still
+ * have the counter value in r8 that was returned by __do_get_tspec.
+ * At this point, r4,r5 contain our sec/nsec values.
+ */
+
+ lwa r6,WTOM_CLOCK_SEC(r3)
+ lwa r9,WTOM_CLOCK_NSEC(r3)
+
+ /* We now have our result in r6,r9. We create a fake dependency
+ * on that result and re-check the counter
+ */
+ or r0,r6,r9
+ xor r0,r0,r0
+ add r3,r3,r0
+ ld r0,CFG_TB_UPDATE_COUNT(r3)
+ cmpld cr0,r0,r8 /* check if updated */
+ bne- 50b
+
+ /* Add wall->monotonic offset and check for overflow or underflow.
+ */
+ add r4,r4,r6
+ add r5,r5,r9
+ cmpd cr0,r5,r7
+ cmpdi cr1,r5,0
+ blt 1f
+ subf r5,r7,r5
+ addi r4,r4,1
+1: bge cr1,80f
+ addi r4,r4,-1
+ add r5,r5,r7
+
+80: std r4,TSPC64_TV_SEC(r11)
+ std r5,TSPC64_TV_NSEC(r11)
+
+ mtlr r12
+ crclr cr0*4+so
+ li r3,0
+ blr
+
+ /*
+ * syscall fallback
+ */
+99:
+ li r0,__NR_clock_gettime
+ sc
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_clock_gettime)
+
+
+/*
+ * Exact prototype of clock_getres()
+ *
+ * int __kernel_clock_getres(clockid_t clock_id, struct timespec *res);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_clock_getres)
+ .cfi_startproc
+ /* Check for supported clock IDs */
+ cmpwi cr0,r3,CLOCK_REALTIME
+ cmpwi cr1,r3,CLOCK_MONOTONIC
+ cror cr0*4+eq,cr0*4+eq,cr1*4+eq
+ bne cr0,99f
+
+ li r3,0
+ cmpli cr0,r4,0
+ crclr cr0*4+so
+ beqlr
+ lis r5,CLOCK_REALTIME_RES@h
+ ori r5,r5,CLOCK_REALTIME_RES@l
+ std r3,TSPC64_TV_SEC(r4)
+ std r5,TSPC64_TV_NSEC(r4)
+ blr
+
+ /*
+ * syscall fallback
+ */
+99:
+ li r0,__NR_clock_getres
+ sc
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_clock_getres)
+
+
+/*
+ * This is the core of clock_gettime() and gettimeofday(),
+ * it returns the current time in r4 (seconds) and r5.
+ * On entry, r7 gives the resolution of r5, either USEC_PER_SEC
+ * or NSEC_PER_SEC, giving r5 in microseconds or nanoseconds.
+ * It expects the datapage ptr in r3 and doesn't clobber it.
+ * It clobbers r0, r6 and r9.
+ * On return, r8 contains the counter value that can be reused.
+ * This clobbers cr0 but not any other cr field.
+ */
+V_FUNCTION_BEGIN(__do_get_tspec)
+ .cfi_startproc
+ /* check for update count & load values */
+1: ld r8,CFG_TB_UPDATE_COUNT(r3)
+ andi. r0,r8,1 /* pending update ? loop */
+ bne- 1b
+ xor r0,r8,r8 /* create dependency */
+ add r3,r3,r0
+
+ /* Get TB & offset it. We use the MFTB macro which will generate
+ * workaround code for Cell.
+ */
+ MFTB(r6)
+ ld r9,CFG_TB_ORIG_STAMP(r3)
+ subf r6,r9,r6
+
+ /* Scale result */
+ ld r5,CFG_TB_TO_XS(r3)
+ sldi r6,r6,12 /* compute time since stamp_xtime */
+ mulhdu r6,r6,r5 /* in units of 2^-32 seconds */
+
+ /* Add stamp since epoch */
+ ld r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
+ lwz r5,STAMP_SEC_FRAC(r3)
+ or r0,r4,r5
+ or r0,r0,r6
+ xor r0,r0,r0
+ add r3,r3,r0
+ ld r0,CFG_TB_UPDATE_COUNT(r3)
+ cmpld r0,r8 /* check if updated */
+ bne- 1b /* reload if so */
+
+ /* convert to seconds & nanoseconds and add to stamp */
+ add r6,r6,r5 /* add on fractional seconds of xtime */
+ mulhwu r5,r6,r7 /* compute micro or nanoseconds and */
+ srdi r6,r6,32 /* seconds since stamp_xtime */
+ clrldi r5,r5,32
+ add r4,r4,r6
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__do_get_tspec)
diff --git a/arch/powerpc/kernel/vdso64/note.S b/arch/powerpc/kernel/vdso64/note.S
new file mode 100644
index 00000000..dc2a509f
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/note.S
@@ -0,0 +1 @@
+#include "../vdso32/note.S"
diff --git a/arch/powerpc/kernel/vdso64/sigtramp.S b/arch/powerpc/kernel/vdso64/sigtramp.S
new file mode 100644
index 00000000..45ea281e
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/sigtramp.S
@@ -0,0 +1,297 @@
+/*
+ * Signal trampoline for 64 bits processes in a ppc64 kernel for
+ * use in the vDSO
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
+ * Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/unistd.h>
+#include <asm/vdso.h>
+#include <asm/ptrace.h> /* XXX for __SIGNAL_FRAMESIZE */
+
+ .text
+
+/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from
+ the return address to get an address in the middle of the presumed
+ call instruction. Since we don't have a call here, we artificially
+ extend the range covered by the unwind info by padding before the
+ real start. */
+ nop
+ .balign 8
+V_FUNCTION_BEGIN(__kernel_sigtramp_rt64)
+.Lsigrt_start = . - 4
+ addi r1, r1, __SIGNAL_FRAMESIZE
+ li r0,__NR_rt_sigreturn
+ sc
+.Lsigrt_end:
+V_FUNCTION_END(__kernel_sigtramp_rt64)
+/* The ".balign 8" above and the following zeros mimic the old stack
+ trampoline layout. The last magic value is the ucontext pointer,
+ chosen in such a way that older libgcc unwind code returns a zero
+ for a sigcontext pointer. */
+ .long 0,0,0
+ .quad 0,-21*8
+
+/* Register r1 can be found at offset 8 of a pt_regs structure.
+ A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */
+#define cfa_save \
+ .byte 0x0f; /* DW_CFA_def_cfa_expression */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x23; .uleb128 RSIZE; /* DW_OP_plus_uconst */ \
+ .byte 0x06; /* DW_OP_deref */ \
+9:
+
+/* Register REGNO can be found at offset OFS of a pt_regs structure.
+ A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */
+#define rsave(regno, ofs) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .ifne ofs; \
+ .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \
+ .endif; \
+9:
+
+/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16
+ of the VMX reg struct. A pointer to the VMX reg struct is at VREGS in
+ the pt_regs struct. This macro is for REGNO == 0, and contains
+ 'subroutines' that the other macros jump to. */
+#define vsave_msr0(regno) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x30 + regno; /* DW_OP_lit0 */ \
+2: \
+ .byte 0x40; /* DW_OP_lit16 */ \
+ .byte 0x1e; /* DW_OP_mul */ \
+3: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x12; /* DW_OP_dup */ \
+ .byte 0x23; /* DW_OP_plus_uconst */ \
+ .uleb128 33*RSIZE; /* msr offset */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x0c; .long 1 << 25; /* DW_OP_const4u */ \
+ .byte 0x1a; /* DW_OP_and */ \
+ .byte 0x12; /* DW_OP_dup, ret 0 if bra taken */ \
+ .byte 0x30; /* DW_OP_lit0 */ \
+ .byte 0x29; /* DW_OP_eq */ \
+ .byte 0x28; .short 0x7fff; /* DW_OP_bra to end */ \
+ .byte 0x13; /* DW_OP_drop, pop the 0 */ \
+ .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x22; /* DW_OP_plus */ \
+ .byte 0x2f; .short 0x7fff; /* DW_OP_skip to end */ \
+9:
+
+/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16
+ of the VMX reg struct. REGNO is 1 thru 31. */
+#define vsave_msr1(regno) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x30 + regno; /* DW_OP_lit n */ \
+ .byte 0x2f; .short 2b - 9f; /* DW_OP_skip */ \
+9:
+
+/* If msr bit 1<<25 is set, then VMX register REGNO is at offset OFS of
+ the VMX save block. */
+#define vsave_msr2(regno, ofs) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x0a; .short ofs; /* DW_OP_const2u */ \
+ .byte 0x2f; .short 3b - 9f; /* DW_OP_skip */ \
+9:
+
+/* VMX register REGNO is at offset OFS of the VMX save area. */
+#define vsave(regno, ofs) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \
+9:
+
+/* This is where the pt_regs pointer can be found on the stack. */
+#define PTREGS 128+168+56
+
+/* Size of regs. */
+#define RSIZE 8
+
+/* Size of CR reg in DWARF unwind info. */
+#define CRSIZE 4
+
+/* This is the offset of the VMX reg pointer. */
+#define VREGS 48*RSIZE+33*8
+
+/* Describe where general purpose regs are saved. */
+#define EH_FRAME_GEN \
+ cfa_save; \
+ rsave ( 0, 0*RSIZE); \
+ rsave ( 2, 2*RSIZE); \
+ rsave ( 3, 3*RSIZE); \
+ rsave ( 4, 4*RSIZE); \
+ rsave ( 5, 5*RSIZE); \
+ rsave ( 6, 6*RSIZE); \
+ rsave ( 7, 7*RSIZE); \
+ rsave ( 8, 8*RSIZE); \
+ rsave ( 9, 9*RSIZE); \
+ rsave (10, 10*RSIZE); \
+ rsave (11, 11*RSIZE); \
+ rsave (12, 12*RSIZE); \
+ rsave (13, 13*RSIZE); \
+ rsave (14, 14*RSIZE); \
+ rsave (15, 15*RSIZE); \
+ rsave (16, 16*RSIZE); \
+ rsave (17, 17*RSIZE); \
+ rsave (18, 18*RSIZE); \
+ rsave (19, 19*RSIZE); \
+ rsave (20, 20*RSIZE); \
+ rsave (21, 21*RSIZE); \
+ rsave (22, 22*RSIZE); \
+ rsave (23, 23*RSIZE); \
+ rsave (24, 24*RSIZE); \
+ rsave (25, 25*RSIZE); \
+ rsave (26, 26*RSIZE); \
+ rsave (27, 27*RSIZE); \
+ rsave (28, 28*RSIZE); \
+ rsave (29, 29*RSIZE); \
+ rsave (30, 30*RSIZE); \
+ rsave (31, 31*RSIZE); \
+ rsave (67, 32*RSIZE); /* ap, used as temp for nip */ \
+ rsave (65, 36*RSIZE); /* lr */ \
+ rsave (70, 38*RSIZE + (RSIZE - CRSIZE)) /* cr */
+
+/* Describe where the FP regs are saved. */
+#define EH_FRAME_FP \
+ rsave (32, 48*RSIZE + 0*8); \
+ rsave (33, 48*RSIZE + 1*8); \
+ rsave (34, 48*RSIZE + 2*8); \
+ rsave (35, 48*RSIZE + 3*8); \
+ rsave (36, 48*RSIZE + 4*8); \
+ rsave (37, 48*RSIZE + 5*8); \
+ rsave (38, 48*RSIZE + 6*8); \
+ rsave (39, 48*RSIZE + 7*8); \
+ rsave (40, 48*RSIZE + 8*8); \
+ rsave (41, 48*RSIZE + 9*8); \
+ rsave (42, 48*RSIZE + 10*8); \
+ rsave (43, 48*RSIZE + 11*8); \
+ rsave (44, 48*RSIZE + 12*8); \
+ rsave (45, 48*RSIZE + 13*8); \
+ rsave (46, 48*RSIZE + 14*8); \
+ rsave (47, 48*RSIZE + 15*8); \
+ rsave (48, 48*RSIZE + 16*8); \
+ rsave (49, 48*RSIZE + 17*8); \
+ rsave (50, 48*RSIZE + 18*8); \
+ rsave (51, 48*RSIZE + 19*8); \
+ rsave (52, 48*RSIZE + 20*8); \
+ rsave (53, 48*RSIZE + 21*8); \
+ rsave (54, 48*RSIZE + 22*8); \
+ rsave (55, 48*RSIZE + 23*8); \
+ rsave (56, 48*RSIZE + 24*8); \
+ rsave (57, 48*RSIZE + 25*8); \
+ rsave (58, 48*RSIZE + 26*8); \
+ rsave (59, 48*RSIZE + 27*8); \
+ rsave (60, 48*RSIZE + 28*8); \
+ rsave (61, 48*RSIZE + 29*8); \
+ rsave (62, 48*RSIZE + 30*8); \
+ rsave (63, 48*RSIZE + 31*8)
+
+/* Describe where the VMX regs are saved. */
+#ifdef CONFIG_ALTIVEC
+#define EH_FRAME_VMX \
+ vsave_msr0 ( 0); \
+ vsave_msr1 ( 1); \
+ vsave_msr1 ( 2); \
+ vsave_msr1 ( 3); \
+ vsave_msr1 ( 4); \
+ vsave_msr1 ( 5); \
+ vsave_msr1 ( 6); \
+ vsave_msr1 ( 7); \
+ vsave_msr1 ( 8); \
+ vsave_msr1 ( 9); \
+ vsave_msr1 (10); \
+ vsave_msr1 (11); \
+ vsave_msr1 (12); \
+ vsave_msr1 (13); \
+ vsave_msr1 (14); \
+ vsave_msr1 (15); \
+ vsave_msr1 (16); \
+ vsave_msr1 (17); \
+ vsave_msr1 (18); \
+ vsave_msr1 (19); \
+ vsave_msr1 (20); \
+ vsave_msr1 (21); \
+ vsave_msr1 (22); \
+ vsave_msr1 (23); \
+ vsave_msr1 (24); \
+ vsave_msr1 (25); \
+ vsave_msr1 (26); \
+ vsave_msr1 (27); \
+ vsave_msr1 (28); \
+ vsave_msr1 (29); \
+ vsave_msr1 (30); \
+ vsave_msr1 (31); \
+ vsave_msr2 (33, 32*16+12); \
+ vsave (32, 33*16)
+#else
+#define EH_FRAME_VMX
+#endif
+
+ .section .eh_frame,"a",@progbits
+.Lcie:
+ .long .Lcie_end - .Lcie_start
+.Lcie_start:
+ .long 0 /* CIE ID */
+ .byte 1 /* Version number */
+ .string "zRS" /* NUL-terminated augmentation string */
+ .uleb128 4 /* Code alignment factor */
+ .sleb128 -8 /* Data alignment factor */
+ .byte 67 /* Return address register column, ap */
+ .uleb128 1 /* Augmentation value length */
+ .byte 0x14 /* DW_EH_PE_pcrel | DW_EH_PE_udata8. */
+ .byte 0x0c,1,0 /* DW_CFA_def_cfa: r1 ofs 0 */
+ .balign 8
+.Lcie_end:
+
+ .long .Lfde0_end - .Lfde0_start
+.Lfde0_start:
+ .long .Lfde0_start - .Lcie /* CIE pointer. */
+ .quad .Lsigrt_start - . /* PC start, length */
+ .quad .Lsigrt_end - .Lsigrt_start
+ .uleb128 0 /* Augmentation */
+ EH_FRAME_GEN
+ EH_FRAME_FP
+ EH_FRAME_VMX
+# Do we really need to describe the frame at this point? ie. will
+# we ever have some call chain that returns somewhere past the addi?
+# I don't think so, since gcc doesn't support async signals.
+# .byte 0x41 /* DW_CFA_advance_loc 1*4 */
+#undef PTREGS
+#define PTREGS 168+56
+# EH_FRAME_GEN
+# EH_FRAME_FP
+# EH_FRAME_VMX
+ .balign 8
+.Lfde0_end:
diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S
new file mode 100644
index 00000000..0e615404
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S
@@ -0,0 +1,152 @@
+/*
+ * This is the infamous ld script for the 64 bits vdso
+ * library
+ */
+#include <asm/vdso.h>
+
+OUTPUT_FORMAT("elf64-powerpc", "elf64-powerpc", "elf64-powerpc")
+OUTPUT_ARCH(powerpc:common64)
+ENTRY(_start)
+
+SECTIONS
+{
+ . = VDSO64_LBASE + SIZEOF_HEADERS;
+
+ .hash : { *(.hash) } :text
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+
+ .note : { *(.note.*) } :text :note
+
+ . = ALIGN(16);
+ .text : {
+ *(.text .stub .text.* .gnu.linkonce.t.* __ftr_alt_*)
+ *(.sfpr .glink)
+ } :text
+ PROVIDE(__etext = .);
+ PROVIDE(_etext = .);
+ PROVIDE(etext = .);
+
+ . = ALIGN(8);
+ __ftr_fixup : { *(__ftr_fixup) }
+
+ . = ALIGN(8);
+ __mmu_ftr_fixup : { *(__mmu_ftr_fixup) }
+
+ . = ALIGN(8);
+ __lwsync_fixup : { *(__lwsync_fixup) }
+
+ . = ALIGN(8);
+ __fw_ftr_fixup : { *(__fw_ftr_fixup) }
+
+ /*
+ * Other stuff is appended to the text segment:
+ */
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+
+ .dynamic : { *(.dynamic) } :text :dynamic
+
+ .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
+ .eh_frame : { KEEP (*(.eh_frame)) } :text
+ .gcc_except_table : { *(.gcc_except_table) }
+ .rela.dyn ALIGN(8) : { *(.rela.dyn) }
+
+ .opd ALIGN(8) : { KEEP (*(.opd)) }
+ .got ALIGN(8) : { *(.got .toc) }
+
+ _end = .;
+ PROVIDE(end = .);
+
+ /*
+ * Stabs debugging sections are here too.
+ */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+
+ /*
+ * DWARF debug sections.
+ * Symbols in the DWARF debugging sections are relative to the beginning
+ * of the section so we begin them at 0.
+ */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+
+ /DISCARD/ : {
+ *(.note.GNU-stack)
+ *(.branch_lt)
+ *(.data .data.* .gnu.linkonce.d.* .sdata*)
+ *(.bss .sbss .dynbss .dynsbss)
+ }
+}
+
+/*
+ * Very old versions of ld do not recognize this name token; use the constant.
+ */
+#define PT_GNU_EH_FRAME 0x6474e550
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+ text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ note PT_NOTE FLAGS(4); /* PF_R */
+ eh_frame_hdr PT_GNU_EH_FRAME;
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+ VDSO_VERSION_STRING {
+ global:
+ /*
+ * Has to be there for the kernel to find
+ */
+ __kernel_datapage_offset;
+
+ __kernel_get_syscall_map;
+ __kernel_gettimeofday;
+ __kernel_clock_gettime;
+ __kernel_clock_getres;
+ __kernel_get_tbfreq;
+ __kernel_sync_dicache;
+ __kernel_sync_dicache_p5;
+ __kernel_sigtramp_rt64;
+
+ local: *;
+ };
+}
diff --git a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S
new file mode 100644
index 00000000..b8553d62
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S
@@ -0,0 +1,14 @@
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+ __PAGE_ALIGNED_DATA
+
+ .globl vdso64_start, vdso64_end
+ .balign PAGE_SIZE
+vdso64_start:
+ .incbin "arch/powerpc/kernel/vdso64/vdso64.so"
+ .balign PAGE_SIZE
+vdso64_end:
+
+ .previous
diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c
new file mode 100644
index 00000000..604d0947
--- /dev/null
+++ b/arch/powerpc/kernel/vecemu.c
@@ -0,0 +1,345 @@
+/*
+ * Routines to emulate some Altivec/VMX instructions, specifically
+ * those that can trap when given denormalized operands in Java mode.
+ */
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <asm/ptrace.h>
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+
+/* Functions in vector.S */
+extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b);
+extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b);
+extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
+extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
+extern void vrefp(vector128 *dst, vector128 *src);
+extern void vrsqrtefp(vector128 *dst, vector128 *src);
+extern void vexptep(vector128 *dst, vector128 *src);
+
+static unsigned int exp2s[8] = {
+ 0x800000,
+ 0x8b95c2,
+ 0x9837f0,
+ 0xa5fed7,
+ 0xb504f3,
+ 0xc5672a,
+ 0xd744fd,
+ 0xeac0c7
+};
+
+/*
+ * Computes an estimate of 2^x. The `s' argument is the 32-bit
+ * single-precision floating-point representation of x.
+ */
+static unsigned int eexp2(unsigned int s)
+{
+ int exp, pwr;
+ unsigned int mant, frac;
+
+ /* extract exponent field from input */
+ exp = ((s >> 23) & 0xff) - 127;
+ if (exp > 7) {
+ /* check for NaN input */
+ if (exp == 128 && (s & 0x7fffff) != 0)
+ return s | 0x400000; /* return QNaN */
+ /* 2^-big = 0, 2^+big = +Inf */
+ return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */
+ }
+ if (exp < -23)
+ return 0x3f800000; /* 1.0 */
+
+ /* convert to fixed point integer in 9.23 representation */
+ pwr = (s & 0x7fffff) | 0x800000;
+ if (exp > 0)
+ pwr <<= exp;
+ else
+ pwr >>= -exp;
+ if (s & 0x80000000)
+ pwr = -pwr;
+
+ /* extract integer part, which becomes exponent part of result */
+ exp = (pwr >> 23) + 126;
+ if (exp >= 254)
+ return 0x7f800000;
+ if (exp < -23)
+ return 0;
+
+ /* table lookup on top 3 bits of fraction to get mantissa */
+ mant = exp2s[(pwr >> 20) & 7];
+
+ /* linear interpolation using remaining 20 bits of fraction */
+ asm("mulhwu %0,%1,%2" : "=r" (frac)
+ : "r" (pwr << 12), "r" (0x172b83ff));
+ asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant));
+ mant += frac;
+
+ if (exp >= 0)
+ return mant + (exp << 23);
+
+ /* denormalized result */
+ exp = -exp;
+ mant += 1 << (exp - 1);
+ return mant >> exp;
+}
+
+/*
+ * Computes an estimate of log_2(x). The `s' argument is the 32-bit
+ * single-precision floating-point representation of x.
+ */
+static unsigned int elog2(unsigned int s)
+{
+ int exp, mant, lz, frac;
+
+ exp = s & 0x7f800000;
+ mant = s & 0x7fffff;
+ if (exp == 0x7f800000) { /* Inf or NaN */
+ if (mant != 0)
+ s |= 0x400000; /* turn NaN into QNaN */
+ return s;
+ }
+ if ((exp | mant) == 0) /* +0 or -0 */
+ return 0xff800000; /* return -Inf */
+
+ if (exp == 0) {
+ /* denormalized */
+ asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant));
+ mant <<= lz - 8;
+ exp = (-118 - lz) << 23;
+ } else {
+ mant |= 0x800000;
+ exp -= 127 << 23;
+ }
+
+ if (mant >= 0xb504f3) { /* 2^0.5 * 2^23 */
+ exp |= 0x400000; /* 0.5 * 2^23 */
+ asm("mulhwu %0,%1,%2" : "=r" (mant)
+ : "r" (mant), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */
+ }
+ if (mant >= 0x9837f0) { /* 2^0.25 * 2^23 */
+ exp |= 0x200000; /* 0.25 * 2^23 */
+ asm("mulhwu %0,%1,%2" : "=r" (mant)
+ : "r" (mant), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */
+ }
+ if (mant >= 0x8b95c2) { /* 2^0.125 * 2^23 */
+ exp |= 0x100000; /* 0.125 * 2^23 */
+ asm("mulhwu %0,%1,%2" : "=r" (mant)
+ : "r" (mant), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */
+ }
+ if (mant > 0x800000) { /* 1.0 * 2^23 */
+ /* calculate (mant - 1) * 1.381097463 */
+ /* 1.381097463 == 0.125 / (2^0.125 - 1) */
+ asm("mulhwu %0,%1,%2" : "=r" (frac)
+ : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a));
+ exp += frac;
+ }
+ s = exp & 0x80000000;
+ if (exp != 0) {
+ if (s)
+ exp = -exp;
+ asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp));
+ lz = 8 - lz;
+ if (lz > 0)
+ exp >>= lz;
+ else if (lz < 0)
+ exp <<= -lz;
+ s += ((lz + 126) << 23) + exp;
+ }
+ return s;
+}
+
+#define VSCR_SAT 1
+
+static int ctsxs(unsigned int x, int scale, unsigned int *vscrp)
+{
+ int exp, mant;
+
+ exp = (x >> 23) & 0xff;
+ mant = x & 0x7fffff;
+ if (exp == 255 && mant != 0)
+ return 0; /* NaN -> 0 */
+ exp = exp - 127 + scale;
+ if (exp < 0)
+ return 0; /* round towards zero */
+ if (exp >= 31) {
+ /* saturate, unless the result would be -2^31 */
+ if (x + (scale << 23) != 0xcf000000)
+ *vscrp |= VSCR_SAT;
+ return (x & 0x80000000)? 0x80000000: 0x7fffffff;
+ }
+ mant |= 0x800000;
+ mant = (mant << 7) >> (30 - exp);
+ return (x & 0x80000000)? -mant: mant;
+}
+
+static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp)
+{
+ int exp;
+ unsigned int mant;
+
+ exp = (x >> 23) & 0xff;
+ mant = x & 0x7fffff;
+ if (exp == 255 && mant != 0)
+ return 0; /* NaN -> 0 */
+ exp = exp - 127 + scale;
+ if (exp < 0)
+ return 0; /* round towards zero */
+ if (x & 0x80000000) {
+ /* negative => saturate to 0 */
+ *vscrp |= VSCR_SAT;
+ return 0;
+ }
+ if (exp >= 32) {
+ /* saturate */
+ *vscrp |= VSCR_SAT;
+ return 0xffffffff;
+ }
+ mant |= 0x800000;
+ mant = (mant << 8) >> (31 - exp);
+ return mant;
+}
+
+/* Round to floating integer, towards 0 */
+static unsigned int rfiz(unsigned int x)
+{
+ int exp;
+
+ exp = ((x >> 23) & 0xff) - 127;
+ if (exp == 128 && (x & 0x7fffff) != 0)
+ return x | 0x400000; /* NaN -> make it a QNaN */
+ if (exp >= 23)
+ return x; /* it's an integer already (or Inf) */
+ if (exp < 0)
+ return x & 0x80000000; /* |x| < 1.0 rounds to 0 */
+ return x & ~(0x7fffff >> exp);
+}
+
+/* Round to floating integer, towards +/- Inf */
+static unsigned int rfii(unsigned int x)
+{
+ int exp, mask;
+
+ exp = ((x >> 23) & 0xff) - 127;
+ if (exp == 128 && (x & 0x7fffff) != 0)
+ return x | 0x400000; /* NaN -> make it a QNaN */
+ if (exp >= 23)
+ return x; /* it's an integer already (or Inf) */
+ if ((x & 0x7fffffff) == 0)
+ return x; /* +/-0 -> +/-0 */
+ if (exp < 0)
+ /* 0 < |x| < 1.0 rounds to +/- 1.0 */
+ return (x & 0x80000000) | 0x3f800000;
+ mask = 0x7fffff >> exp;
+ /* mantissa overflows into exponent - that's OK,
+ it can't overflow into the sign bit */
+ return (x + mask) & ~mask;
+}
+
+/* Round to floating integer, to nearest */
+static unsigned int rfin(unsigned int x)
+{
+ int exp, half;
+
+ exp = ((x >> 23) & 0xff) - 127;
+ if (exp == 128 && (x & 0x7fffff) != 0)
+ return x | 0x400000; /* NaN -> make it a QNaN */
+ if (exp >= 23)
+ return x; /* it's an integer already (or Inf) */
+ if (exp < -1)
+ return x & 0x80000000; /* |x| < 0.5 -> +/-0 */
+ if (exp == -1)
+ /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */
+ return (x & 0x80000000) | 0x3f800000;
+ half = 0x400000 >> exp;
+ /* add 0.5 to the magnitude and chop off the fraction bits */
+ return (x + half) & ~(0x7fffff >> exp);
+}
+
+int emulate_altivec(struct pt_regs *regs)
+{
+ unsigned int instr, i;
+ unsigned int va, vb, vc, vd;
+ vector128 *vrs;
+
+ if (get_user(instr, (unsigned int __user *) regs->nip))
+ return -EFAULT;
+ if ((instr >> 26) != 4)
+ return -EINVAL; /* not an altivec instruction */
+ vd = (instr >> 21) & 0x1f;
+ va = (instr >> 16) & 0x1f;
+ vb = (instr >> 11) & 0x1f;
+ vc = (instr >> 6) & 0x1f;
+
+ vrs = current->thread.vr;
+ switch (instr & 0x3f) {
+ case 10:
+ switch (vc) {
+ case 0: /* vaddfp */
+ vaddfp(&vrs[vd], &vrs[va], &vrs[vb]);
+ break;
+ case 1: /* vsubfp */
+ vsubfp(&vrs[vd], &vrs[va], &vrs[vb]);
+ break;
+ case 4: /* vrefp */
+ vrefp(&vrs[vd], &vrs[vb]);
+ break;
+ case 5: /* vrsqrtefp */
+ vrsqrtefp(&vrs[vd], &vrs[vb]);
+ break;
+ case 6: /* vexptefp */
+ for (i = 0; i < 4; ++i)
+ vrs[vd].u[i] = eexp2(vrs[vb].u[i]);
+ break;
+ case 7: /* vlogefp */
+ for (i = 0; i < 4; ++i)
+ vrs[vd].u[i] = elog2(vrs[vb].u[i]);
+ break;
+ case 8: /* vrfin */
+ for (i = 0; i < 4; ++i)
+ vrs[vd].u[i] = rfin(vrs[vb].u[i]);
+ break;
+ case 9: /* vrfiz */
+ for (i = 0; i < 4; ++i)
+ vrs[vd].u[i] = rfiz(vrs[vb].u[i]);
+ break;
+ case 10: /* vrfip */
+ for (i = 0; i < 4; ++i) {
+ u32 x = vrs[vb].u[i];
+ x = (x & 0x80000000)? rfiz(x): rfii(x);
+ vrs[vd].u[i] = x;
+ }
+ break;
+ case 11: /* vrfim */
+ for (i = 0; i < 4; ++i) {
+ u32 x = vrs[vb].u[i];
+ x = (x & 0x80000000)? rfii(x): rfiz(x);
+ vrs[vd].u[i] = x;
+ }
+ break;
+ case 14: /* vctuxs */
+ for (i = 0; i < 4; ++i)
+ vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va,
+ &current->thread.vscr.u[3]);
+ break;
+ case 15: /* vctsxs */
+ for (i = 0; i < 4; ++i)
+ vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va,
+ &current->thread.vscr.u[3]);
+ break;
+ default:
+ return -EINVAL;
+ }
+ break;
+ case 46: /* vmaddfp */
+ vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
+ break;
+ case 47: /* vnmsubfp */
+ vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
new file mode 100644
index 00000000..4d5a3edf
--- /dev/null
+++ b/arch/powerpc/kernel/vector.S
@@ -0,0 +1,407 @@
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/reg.h>
+#include <asm/asm-offsets.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+
+/*
+ * load_up_altivec(unused, unused, tsk)
+ * Disable VMX for the task which had it previously,
+ * and save its vector registers in its thread_struct.
+ * Enables the VMX for use in the kernel on return.
+ * On SMP we know the VMX is free, since we give it up every
+ * switch (ie, no lazy save of the vector registers).
+ */
+_GLOBAL(load_up_altivec)
+ mfmsr r5 /* grab the current MSR */
+ oris r5,r5,MSR_VEC@h
+ MTMSRD(r5) /* enable use of AltiVec now */
+ isync
+
+/*
+ * For SMP, we don't do lazy VMX switching because it just gets too
+ * horrendously complex, especially when a task switches from one CPU
+ * to another. Instead we call giveup_altvec in switch_to.
+ * VRSAVE isn't dealt with here, that is done in the normal context
+ * switch code. Note that we could rely on vrsave value to eventually
+ * avoid saving all of the VREGs here...
+ */
+#ifndef CONFIG_SMP
+ LOAD_REG_ADDRBASE(r3, last_task_used_altivec)
+ toreal(r3)
+ PPC_LL r4,ADDROFF(last_task_used_altivec)(r3)
+ PPC_LCMPI 0,r4,0
+ beq 1f
+
+ /* Save VMX state to last_task_used_altivec's THREAD struct */
+ toreal(r4)
+ addi r4,r4,THREAD
+ SAVE_32VRS(0,r5,r4)
+ mfvscr vr0
+ li r10,THREAD_VSCR
+ stvx vr0,r10,r4
+ /* Disable VMX for last_task_used_altivec */
+ PPC_LL r5,PT_REGS(r4)
+ toreal(r5)
+ PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+ lis r10,MSR_VEC@h
+ andc r4,r4,r10
+ PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#endif /* CONFIG_SMP */
+
+ /* Hack: if we get an altivec unavailable trap with VRSAVE
+ * set to all zeros, we assume this is a broken application
+ * that fails to set it properly, and thus we switch it to
+ * all 1's
+ */
+ mfspr r4,SPRN_VRSAVE
+ cmpwi 0,r4,0
+ bne+ 1f
+ li r4,-1
+ mtspr SPRN_VRSAVE,r4
+1:
+ /* enable use of VMX after return */
+#ifdef CONFIG_PPC32
+ mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */
+ oris r9,r9,MSR_VEC@h
+#else
+ ld r4,PACACURRENT(r13)
+ addi r5,r4,THREAD /* Get THREAD */
+ oris r12,r12,MSR_VEC@h
+ std r12,_MSR(r1)
+#endif
+ li r4,1
+ li r10,THREAD_VSCR
+ stw r4,THREAD_USED_VR(r5)
+ lvx vr0,r10,r5
+ mtvscr vr0
+ REST_32VRS(0,r4,r5)
+#ifndef CONFIG_SMP
+ /* Update last_task_used_altivec to 'current' */
+ subi r4,r5,THREAD /* Back to 'current' */
+ fromreal(r4)
+ PPC_STL r4,ADDROFF(last_task_used_altivec)(r3)
+#endif /* CONFIG_SMP */
+ /* restore registers and return */
+ blr
+
+/*
+ * giveup_altivec(tsk)
+ * Disable VMX for the task given as the argument,
+ * and save the vector registers in its thread_struct.
+ * Enables the VMX for use in the kernel on return.
+ */
+_GLOBAL(giveup_altivec)
+ mfmsr r5
+ oris r5,r5,MSR_VEC@h
+ SYNC
+ MTMSRD(r5) /* enable use of VMX now */
+ isync
+ PPC_LCMPI 0,r3,0
+ beqlr /* if no previous owner, done */
+ addi r3,r3,THREAD /* want THREAD of task */
+ PPC_LL r5,PT_REGS(r3)
+ PPC_LCMPI 0,r5,0
+ SAVE_32VRS(0,r4,r3)
+ mfvscr vr0
+ li r4,THREAD_VSCR
+ stvx vr0,r4,r3
+ beq 1f
+ PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+ lis r3,(MSR_VEC|MSR_VSX)@h
+FTR_SECTION_ELSE
+ lis r3,MSR_VEC@h
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
+#else
+ lis r3,MSR_VEC@h
+#endif
+ andc r4,r4,r3 /* disable FP for previous task */
+ PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#ifndef CONFIG_SMP
+ li r5,0
+ LOAD_REG_ADDRBASE(r4,last_task_used_altivec)
+ PPC_STL r5,ADDROFF(last_task_used_altivec)(r4)
+#endif /* CONFIG_SMP */
+ blr
+
+#ifdef CONFIG_VSX
+
+#ifdef CONFIG_PPC32
+#error This asm code isn't ready for 32-bit kernels
+#endif
+
+/*
+ * load_up_vsx(unused, unused, tsk)
+ * Disable VSX for the task which had it previously,
+ * and save its vector registers in its thread_struct.
+ * Reuse the fp and vsx saves, but first check to see if they have
+ * been saved already.
+ */
+_GLOBAL(load_up_vsx)
+/* Load FP and VSX registers if they haven't been done yet */
+ andi. r5,r12,MSR_FP
+ beql+ load_up_fpu /* skip if already loaded */
+ andis. r5,r12,MSR_VEC@h
+ beql+ load_up_altivec /* skip if already loaded */
+
+#ifndef CONFIG_SMP
+ ld r3,last_task_used_vsx@got(r2)
+ ld r4,0(r3)
+ cmpdi 0,r4,0
+ beq 1f
+ /* Disable VSX for last_task_used_vsx */
+ addi r4,r4,THREAD
+ ld r5,PT_REGS(r4)
+ ld r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+ lis r6,MSR_VSX@h
+ andc r6,r4,r6
+ std r6,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#endif /* CONFIG_SMP */
+ ld r4,PACACURRENT(r13)
+ addi r4,r4,THREAD /* Get THREAD */
+ li r6,1
+ stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */
+ /* enable use of VSX after return */
+ oris r12,r12,MSR_VSX@h
+ std r12,_MSR(r1)
+#ifndef CONFIG_SMP
+ /* Update last_task_used_vsx to 'current' */
+ ld r4,PACACURRENT(r13)
+ std r4,0(r3)
+#endif /* CONFIG_SMP */
+ b fast_exception_return
+
+/*
+ * __giveup_vsx(tsk)
+ * Disable VSX for the task given as the argument.
+ * Does NOT save vsx registers.
+ * Enables the VSX for use in the kernel on return.
+ */
+_GLOBAL(__giveup_vsx)
+ mfmsr r5
+ oris r5,r5,MSR_VSX@h
+ mtmsrd r5 /* enable use of VSX now */
+ isync
+
+ cmpdi 0,r3,0
+ beqlr- /* if no previous owner, done */
+ addi r3,r3,THREAD /* want THREAD of task */
+ ld r5,PT_REGS(r3)
+ cmpdi 0,r5,0
+ beq 1f
+ ld r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+ lis r3,MSR_VSX@h
+ andc r4,r4,r3 /* disable VSX for previous task */
+ std r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#ifndef CONFIG_SMP
+ li r5,0
+ ld r4,last_task_used_vsx@got(r2)
+ std r5,0(r4)
+#endif /* CONFIG_SMP */
+ blr
+
+#endif /* CONFIG_VSX */
+
+
+/*
+ * The routines below are in assembler so we can closely control the
+ * usage of floating-point registers. These routines must be called
+ * with preempt disabled.
+ */
+#ifdef CONFIG_PPC32
+ .data
+fpzero:
+ .long 0
+fpone:
+ .long 0x3f800000 /* 1.0 in single-precision FP */
+fphalf:
+ .long 0x3f000000 /* 0.5 in single-precision FP */
+
+#define LDCONST(fr, name) \
+ lis r11,name@ha; \
+ lfs fr,name@l(r11)
+#else
+
+ .section ".toc","aw"
+fpzero:
+ .tc FD_0_0[TC],0
+fpone:
+ .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */
+fphalf:
+ .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */
+
+#define LDCONST(fr, name) \
+ lfd fr,name@toc(r2)
+#endif
+
+ .text
+/*
+ * Internal routine to enable floating point and set FPSCR to 0.
+ * Don't call it from C; it doesn't use the normal calling convention.
+ */
+fpenable:
+#ifdef CONFIG_PPC32
+ stwu r1,-64(r1)
+#else
+ stdu r1,-64(r1)
+#endif
+ mfmsr r10
+ ori r11,r10,MSR_FP
+ mtmsr r11
+ isync
+ stfd fr0,24(r1)
+ stfd fr1,16(r1)
+ stfd fr31,8(r1)
+ LDCONST(fr1, fpzero)
+ mffs fr31
+ MTFSF_L(fr1)
+ blr
+
+fpdisable:
+ mtlr r12
+ MTFSF_L(fr31)
+ lfd fr31,8(r1)
+ lfd fr1,16(r1)
+ lfd fr0,24(r1)
+ mtmsr r10
+ isync
+ addi r1,r1,64
+ blr
+
+/*
+ * Vector add, floating point.
+ */
+_GLOBAL(vaddfp)
+ mflr r12
+ bl fpenable
+ li r0,4
+ mtctr r0
+ li r6,0
+1: lfsx fr0,r4,r6
+ lfsx fr1,r5,r6
+ fadds fr0,fr0,fr1
+ stfsx fr0,r3,r6
+ addi r6,r6,4
+ bdnz 1b
+ b fpdisable
+
+/*
+ * Vector subtract, floating point.
+ */
+_GLOBAL(vsubfp)
+ mflr r12
+ bl fpenable
+ li r0,4
+ mtctr r0
+ li r6,0
+1: lfsx fr0,r4,r6
+ lfsx fr1,r5,r6
+ fsubs fr0,fr0,fr1
+ stfsx fr0,r3,r6
+ addi r6,r6,4
+ bdnz 1b
+ b fpdisable
+
+/*
+ * Vector multiply and add, floating point.
+ */
+_GLOBAL(vmaddfp)
+ mflr r12
+ bl fpenable
+ stfd fr2,32(r1)
+ li r0,4
+ mtctr r0
+ li r7,0
+1: lfsx fr0,r4,r7
+ lfsx fr1,r5,r7
+ lfsx fr2,r6,r7
+ fmadds fr0,fr0,fr2,fr1
+ stfsx fr0,r3,r7
+ addi r7,r7,4
+ bdnz 1b
+ lfd fr2,32(r1)
+ b fpdisable
+
+/*
+ * Vector negative multiply and subtract, floating point.
+ */
+_GLOBAL(vnmsubfp)
+ mflr r12
+ bl fpenable
+ stfd fr2,32(r1)
+ li r0,4
+ mtctr r0
+ li r7,0
+1: lfsx fr0,r4,r7
+ lfsx fr1,r5,r7
+ lfsx fr2,r6,r7
+ fnmsubs fr0,fr0,fr2,fr1
+ stfsx fr0,r3,r7
+ addi r7,r7,4
+ bdnz 1b
+ lfd fr2,32(r1)
+ b fpdisable
+
+/*
+ * Vector reciprocal estimate. We just compute 1.0/x.
+ * r3 -> destination, r4 -> source.
+ */
+_GLOBAL(vrefp)
+ mflr r12
+ bl fpenable
+ li r0,4
+ LDCONST(fr1, fpone)
+ mtctr r0
+ li r6,0
+1: lfsx fr0,r4,r6
+ fdivs fr0,fr1,fr0
+ stfsx fr0,r3,r6
+ addi r6,r6,4
+ bdnz 1b
+ b fpdisable
+
+/*
+ * Vector reciprocal square-root estimate, floating point.
+ * We use the frsqrte instruction for the initial estimate followed
+ * by 2 iterations of Newton-Raphson to get sufficient accuracy.
+ * r3 -> destination, r4 -> source.
+ */
+_GLOBAL(vrsqrtefp)
+ mflr r12
+ bl fpenable
+ stfd fr2,32(r1)
+ stfd fr3,40(r1)
+ stfd fr4,48(r1)
+ stfd fr5,56(r1)
+ li r0,4
+ LDCONST(fr4, fpone)
+ LDCONST(fr5, fphalf)
+ mtctr r0
+ li r6,0
+1: lfsx fr0,r4,r6
+ frsqrte fr1,fr0 /* r = frsqrte(s) */
+ fmuls fr3,fr1,fr0 /* r * s */
+ fmuls fr2,fr1,fr5 /* r * 0.5 */
+ fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
+ fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
+ fmuls fr3,fr1,fr0 /* r * s */
+ fmuls fr2,fr1,fr5 /* r * 0.5 */
+ fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
+ fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
+ stfsx fr1,r3,r6
+ addi r6,r6,4
+ bdnz 1b
+ lfd fr5,56(r1)
+ lfd fr4,48(r1)
+ lfd fr3,40(r1)
+ lfd fr2,32(r1)
+ b fpdisable
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
new file mode 100644
index 00000000..a3a99901
--- /dev/null
+++ b/arch/powerpc/kernel/vio.c
@@ -0,0 +1,1467 @@
+/*
+ * IBM PowerPC Virtual I/O Infrastructure Support.
+ *
+ * Copyright (c) 2003,2008 IBM Corp.
+ * Dave Engebretsen engebret@us.ibm.com
+ * Santiago Leon santil@us.ibm.com
+ * Hollis Blanchard <hollisb@us.ibm.com>
+ * Stephen Rothwell
+ * Robert Jennings <rcjenn@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/types.h>
+#include <linux/stat.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/console.h>
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/kobject.h>
+
+#include <asm/iommu.h>
+#include <asm/dma.h>
+#include <asm/vio.h>
+#include <asm/prom.h>
+#include <asm/firmware.h>
+#include <asm/tce.h>
+#include <asm/abs_addr.h>
+#include <asm/page.h>
+#include <asm/hvcall.h>
+
+static struct bus_type vio_bus_type;
+
+static struct vio_dev vio_bus_device = { /* fake "parent" device */
+ .name = "vio",
+ .type = "",
+ .dev.init_name = "vio",
+ .dev.bus = &vio_bus_type,
+};
+
+#ifdef CONFIG_PPC_SMLPAR
+/**
+ * vio_cmo_pool - A pool of IO memory for CMO use
+ *
+ * @size: The size of the pool in bytes
+ * @free: The amount of free memory in the pool
+ */
+struct vio_cmo_pool {
+ size_t size;
+ size_t free;
+};
+
+/* How many ms to delay queued balance work */
+#define VIO_CMO_BALANCE_DELAY 100
+
+/* Portion out IO memory to CMO devices by this chunk size */
+#define VIO_CMO_BALANCE_CHUNK 131072
+
+/**
+ * vio_cmo_dev_entry - A device that is CMO-enabled and requires entitlement
+ *
+ * @vio_dev: struct vio_dev pointer
+ * @list: pointer to other devices on bus that are being tracked
+ */
+struct vio_cmo_dev_entry {
+ struct vio_dev *viodev;
+ struct list_head list;
+};
+
+/**
+ * vio_cmo - VIO bus accounting structure for CMO entitlement
+ *
+ * @lock: spinlock for entire structure
+ * @balance_q: work queue for balancing system entitlement
+ * @device_list: list of CMO-enabled devices requiring entitlement
+ * @entitled: total system entitlement in bytes
+ * @reserve: pool of memory from which devices reserve entitlement, incl. spare
+ * @excess: pool of excess entitlement not needed for device reserves or spare
+ * @spare: IO memory for device hotplug functionality
+ * @min: minimum necessary for system operation
+ * @desired: desired memory for system operation
+ * @curr: bytes currently allocated
+ * @high: high water mark for IO data usage
+ */
+struct vio_cmo {
+ spinlock_t lock;
+ struct delayed_work balance_q;
+ struct list_head device_list;
+ size_t entitled;
+ struct vio_cmo_pool reserve;
+ struct vio_cmo_pool excess;
+ size_t spare;
+ size_t min;
+ size_t desired;
+ size_t curr;
+ size_t high;
+} vio_cmo;
+
+/**
+ * vio_cmo_OF_devices - Count the number of OF devices that have DMA windows
+ */
+static int vio_cmo_num_OF_devs(void)
+{
+ struct device_node *node_vroot;
+ int count = 0;
+
+ /*
+ * Count the number of vdevice entries with an
+ * ibm,my-dma-window OF property
+ */
+ node_vroot = of_find_node_by_name(NULL, "vdevice");
+ if (node_vroot) {
+ struct device_node *of_node;
+ struct property *prop;
+
+ for_each_child_of_node(node_vroot, of_node) {
+ prop = of_find_property(of_node, "ibm,my-dma-window",
+ NULL);
+ if (prop)
+ count++;
+ }
+ }
+ of_node_put(node_vroot);
+ return count;
+}
+
+/**
+ * vio_cmo_alloc - allocate IO memory for CMO-enable devices
+ *
+ * @viodev: VIO device requesting IO memory
+ * @size: size of allocation requested
+ *
+ * Allocations come from memory reserved for the devices and any excess
+ * IO memory available to all devices. The spare pool used to service
+ * hotplug must be equal to %VIO_CMO_MIN_ENT for the excess pool to be
+ * made available.
+ *
+ * Return codes:
+ * 0 for successful allocation and -ENOMEM for a failure
+ */
+static inline int vio_cmo_alloc(struct vio_dev *viodev, size_t size)
+{
+ unsigned long flags;
+ size_t reserve_free = 0;
+ size_t excess_free = 0;
+ int ret = -ENOMEM;
+
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+
+ /* Determine the amount of free entitlement available in reserve */
+ if (viodev->cmo.entitled > viodev->cmo.allocated)
+ reserve_free = viodev->cmo.entitled - viodev->cmo.allocated;
+
+ /* If spare is not fulfilled, the excess pool can not be used. */
+ if (vio_cmo.spare >= VIO_CMO_MIN_ENT)
+ excess_free = vio_cmo.excess.free;
+
+ /* The request can be satisfied */
+ if ((reserve_free + excess_free) >= size) {
+ vio_cmo.curr += size;
+ if (vio_cmo.curr > vio_cmo.high)
+ vio_cmo.high = vio_cmo.curr;
+ viodev->cmo.allocated += size;
+ size -= min(reserve_free, size);
+ vio_cmo.excess.free -= size;
+ ret = 0;
+ }
+
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+ return ret;
+}
+
+/**
+ * vio_cmo_dealloc - deallocate IO memory from CMO-enable devices
+ * @viodev: VIO device freeing IO memory
+ * @size: size of deallocation
+ *
+ * IO memory is freed by the device back to the correct memory pools.
+ * The spare pool is replenished first from either memory pool, then
+ * the reserve pool is used to reduce device entitlement, the excess
+ * pool is used to increase the reserve pool toward the desired entitlement
+ * target, and then the remaining memory is returned to the pools.
+ *
+ */
+static inline void vio_cmo_dealloc(struct vio_dev *viodev, size_t size)
+{
+ unsigned long flags;
+ size_t spare_needed = 0;
+ size_t excess_freed = 0;
+ size_t reserve_freed = size;
+ size_t tmp;
+ int balance = 0;
+
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+ vio_cmo.curr -= size;
+
+ /* Amount of memory freed from the excess pool */
+ if (viodev->cmo.allocated > viodev->cmo.entitled) {
+ excess_freed = min(reserve_freed, (viodev->cmo.allocated -
+ viodev->cmo.entitled));
+ reserve_freed -= excess_freed;
+ }
+
+ /* Remove allocation from device */
+ viodev->cmo.allocated -= (reserve_freed + excess_freed);
+
+ /* Spare is a subset of the reserve pool, replenish it first. */
+ spare_needed = VIO_CMO_MIN_ENT - vio_cmo.spare;
+
+ /*
+ * Replenish the spare in the reserve pool from the excess pool.
+ * This moves entitlement into the reserve pool.
+ */
+ if (spare_needed && excess_freed) {
+ tmp = min(excess_freed, spare_needed);
+ vio_cmo.excess.size -= tmp;
+ vio_cmo.reserve.size += tmp;
+ vio_cmo.spare += tmp;
+ excess_freed -= tmp;
+ spare_needed -= tmp;
+ balance = 1;
+ }
+
+ /*
+ * Replenish the spare in the reserve pool from the reserve pool.
+ * This removes entitlement from the device down to VIO_CMO_MIN_ENT,
+ * if needed, and gives it to the spare pool. The amount of used
+ * memory in this pool does not change.
+ */
+ if (spare_needed && reserve_freed) {
+ tmp = min3(spare_needed, reserve_freed, (viodev->cmo.entitled - VIO_CMO_MIN_ENT));
+
+ vio_cmo.spare += tmp;
+ viodev->cmo.entitled -= tmp;
+ reserve_freed -= tmp;
+ spare_needed -= tmp;
+ balance = 1;
+ }
+
+ /*
+ * Increase the reserve pool until the desired allocation is met.
+ * Move an allocation freed from the excess pool into the reserve
+ * pool and schedule a balance operation.
+ */
+ if (excess_freed && (vio_cmo.desired > vio_cmo.reserve.size)) {
+ tmp = min(excess_freed, (vio_cmo.desired - vio_cmo.reserve.size));
+
+ vio_cmo.excess.size -= tmp;
+ vio_cmo.reserve.size += tmp;
+ excess_freed -= tmp;
+ balance = 1;
+ }
+
+ /* Return memory from the excess pool to that pool */
+ if (excess_freed)
+ vio_cmo.excess.free += excess_freed;
+
+ if (balance)
+ schedule_delayed_work(&vio_cmo.balance_q, VIO_CMO_BALANCE_DELAY);
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+}
+
+/**
+ * vio_cmo_entitlement_update - Manage system entitlement changes
+ *
+ * @new_entitlement: new system entitlement to attempt to accommodate
+ *
+ * Increases in entitlement will be used to fulfill the spare entitlement
+ * and the rest is given to the excess pool. Decreases, if they are
+ * possible, come from the excess pool and from unused device entitlement
+ *
+ * Returns: 0 on success, -ENOMEM when change can not be made
+ */
+int vio_cmo_entitlement_update(size_t new_entitlement)
+{
+ struct vio_dev *viodev;
+ struct vio_cmo_dev_entry *dev_ent;
+ unsigned long flags;
+ size_t avail, delta, tmp;
+
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+
+ /* Entitlement increases */
+ if (new_entitlement > vio_cmo.entitled) {
+ delta = new_entitlement - vio_cmo.entitled;
+
+ /* Fulfill spare allocation */
+ if (vio_cmo.spare < VIO_CMO_MIN_ENT) {
+ tmp = min(delta, (VIO_CMO_MIN_ENT - vio_cmo.spare));
+ vio_cmo.spare += tmp;
+ vio_cmo.reserve.size += tmp;
+ delta -= tmp;
+ }
+
+ /* Remaining new allocation goes to the excess pool */
+ vio_cmo.entitled += delta;
+ vio_cmo.excess.size += delta;
+ vio_cmo.excess.free += delta;
+
+ goto out;
+ }
+
+ /* Entitlement decreases */
+ delta = vio_cmo.entitled - new_entitlement;
+ avail = vio_cmo.excess.free;
+
+ /*
+ * Need to check how much unused entitlement each device can
+ * sacrifice to fulfill entitlement change.
+ */
+ list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+ if (avail >= delta)
+ break;
+
+ viodev = dev_ent->viodev;
+ if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
+ (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
+ avail += viodev->cmo.entitled -
+ max_t(size_t, viodev->cmo.allocated,
+ VIO_CMO_MIN_ENT);
+ }
+
+ if (delta <= avail) {
+ vio_cmo.entitled -= delta;
+
+ /* Take entitlement from the excess pool first */
+ tmp = min(vio_cmo.excess.free, delta);
+ vio_cmo.excess.size -= tmp;
+ vio_cmo.excess.free -= tmp;
+ delta -= tmp;
+
+ /*
+ * Remove all but VIO_CMO_MIN_ENT bytes from devices
+ * until entitlement change is served
+ */
+ list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+ if (!delta)
+ break;
+
+ viodev = dev_ent->viodev;
+ tmp = 0;
+ if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
+ (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
+ tmp = viodev->cmo.entitled -
+ max_t(size_t, viodev->cmo.allocated,
+ VIO_CMO_MIN_ENT);
+ viodev->cmo.entitled -= min(tmp, delta);
+ delta -= min(tmp, delta);
+ }
+ } else {
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+ return -ENOMEM;
+ }
+
+out:
+ schedule_delayed_work(&vio_cmo.balance_q, 0);
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+ return 0;
+}
+
+/**
+ * vio_cmo_balance - Balance entitlement among devices
+ *
+ * @work: work queue structure for this operation
+ *
+ * Any system entitlement above the minimum needed for devices, or
+ * already allocated to devices, can be distributed to the devices.
+ * The list of devices is iterated through to recalculate the desired
+ * entitlement level and to determine how much entitlement above the
+ * minimum entitlement is allocated to devices.
+ *
+ * Small chunks of the available entitlement are given to devices until
+ * their requirements are fulfilled or there is no entitlement left to give.
+ * Upon completion sizes of the reserve and excess pools are calculated.
+ *
+ * The system minimum entitlement level is also recalculated here.
+ * Entitlement will be reserved for devices even after vio_bus_remove to
+ * accommodate reloading the driver. The OF tree is walked to count the
+ * number of devices present and this will remove entitlement for devices
+ * that have actually left the system after having vio_bus_remove called.
+ */
+static void vio_cmo_balance(struct work_struct *work)
+{
+ struct vio_cmo *cmo;
+ struct vio_dev *viodev;
+ struct vio_cmo_dev_entry *dev_ent;
+ unsigned long flags;
+ size_t avail = 0, level, chunk, need;
+ int devcount = 0, fulfilled;
+
+ cmo = container_of(work, struct vio_cmo, balance_q.work);
+
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+
+ /* Calculate minimum entitlement and fulfill spare */
+ cmo->min = vio_cmo_num_OF_devs() * VIO_CMO_MIN_ENT;
+ BUG_ON(cmo->min > cmo->entitled);
+ cmo->spare = min_t(size_t, VIO_CMO_MIN_ENT, (cmo->entitled - cmo->min));
+ cmo->min += cmo->spare;
+ cmo->desired = cmo->min;
+
+ /*
+ * Determine how much entitlement is available and reset device
+ * entitlements
+ */
+ avail = cmo->entitled - cmo->spare;
+ list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+ viodev = dev_ent->viodev;
+ devcount++;
+ viodev->cmo.entitled = VIO_CMO_MIN_ENT;
+ cmo->desired += (viodev->cmo.desired - VIO_CMO_MIN_ENT);
+ avail -= max_t(size_t, viodev->cmo.allocated, VIO_CMO_MIN_ENT);
+ }
+
+ /*
+ * Having provided each device with the minimum entitlement, loop
+ * over the devices portioning out the remaining entitlement
+ * until there is nothing left.
+ */
+ level = VIO_CMO_MIN_ENT;
+ while (avail) {
+ fulfilled = 0;
+ list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+ viodev = dev_ent->viodev;
+
+ if (viodev->cmo.desired <= level) {
+ fulfilled++;
+ continue;
+ }
+
+ /*
+ * Give the device up to VIO_CMO_BALANCE_CHUNK
+ * bytes of entitlement, but do not exceed the
+ * desired level of entitlement for the device.
+ */
+ chunk = min_t(size_t, avail, VIO_CMO_BALANCE_CHUNK);
+ chunk = min(chunk, (viodev->cmo.desired -
+ viodev->cmo.entitled));
+ viodev->cmo.entitled += chunk;
+
+ /*
+ * If the memory for this entitlement increase was
+ * already allocated to the device it does not come
+ * from the available pool being portioned out.
+ */
+ need = max(viodev->cmo.allocated, viodev->cmo.entitled)-
+ max(viodev->cmo.allocated, level);
+ avail -= need;
+
+ }
+ if (fulfilled == devcount)
+ break;
+ level += VIO_CMO_BALANCE_CHUNK;
+ }
+
+ /* Calculate new reserve and excess pool sizes */
+ cmo->reserve.size = cmo->min;
+ cmo->excess.free = 0;
+ cmo->excess.size = 0;
+ need = 0;
+ list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+ viodev = dev_ent->viodev;
+ /* Calculated reserve size above the minimum entitlement */
+ if (viodev->cmo.entitled)
+ cmo->reserve.size += (viodev->cmo.entitled -
+ VIO_CMO_MIN_ENT);
+ /* Calculated used excess entitlement */
+ if (viodev->cmo.allocated > viodev->cmo.entitled)
+ need += viodev->cmo.allocated - viodev->cmo.entitled;
+ }
+ cmo->excess.size = cmo->entitled - cmo->reserve.size;
+ cmo->excess.free = cmo->excess.size - need;
+
+ cancel_delayed_work(to_delayed_work(work));
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+}
+
+static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag,
+ struct dma_attrs *attrs)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ void *ret;
+
+ if (vio_cmo_alloc(viodev, roundup(size, PAGE_SIZE))) {
+ atomic_inc(&viodev->cmo.allocs_failed);
+ return NULL;
+ }
+
+ ret = dma_iommu_ops.alloc(dev, size, dma_handle, flag, attrs);
+ if (unlikely(ret == NULL)) {
+ vio_cmo_dealloc(viodev, roundup(size, PAGE_SIZE));
+ atomic_inc(&viodev->cmo.allocs_failed);
+ }
+
+ return ret;
+}
+
+static void vio_dma_iommu_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_handle,
+ struct dma_attrs *attrs)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+
+ dma_iommu_ops.free(dev, size, vaddr, dma_handle, attrs);
+
+ vio_cmo_dealloc(viodev, roundup(size, PAGE_SIZE));
+}
+
+static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ dma_addr_t ret = DMA_ERROR_CODE;
+
+ if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) {
+ atomic_inc(&viodev->cmo.allocs_failed);
+ return ret;
+ }
+
+ ret = dma_iommu_ops.map_page(dev, page, offset, size, direction, attrs);
+ if (unlikely(dma_mapping_error(dev, ret))) {
+ vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
+ atomic_inc(&viodev->cmo.allocs_failed);
+ }
+
+ return ret;
+}
+
+static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
+ size_t size,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+
+ dma_iommu_ops.unmap_page(dev, dma_handle, size, direction, attrs);
+
+ vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
+}
+
+static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
+ int nelems, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ struct scatterlist *sgl;
+ int ret, count = 0;
+ size_t alloc_size = 0;
+
+ for (sgl = sglist; count < nelems; count++, sgl++)
+ alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE);
+
+ if (vio_cmo_alloc(viodev, alloc_size)) {
+ atomic_inc(&viodev->cmo.allocs_failed);
+ return 0;
+ }
+
+ ret = dma_iommu_ops.map_sg(dev, sglist, nelems, direction, attrs);
+
+ if (unlikely(!ret)) {
+ vio_cmo_dealloc(viodev, alloc_size);
+ atomic_inc(&viodev->cmo.allocs_failed);
+ return ret;
+ }
+
+ for (sgl = sglist, count = 0; count < ret; count++, sgl++)
+ alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE);
+ if (alloc_size)
+ vio_cmo_dealloc(viodev, alloc_size);
+
+ return ret;
+}
+
+static void vio_dma_iommu_unmap_sg(struct device *dev,
+ struct scatterlist *sglist, int nelems,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ struct scatterlist *sgl;
+ size_t alloc_size = 0;
+ int count = 0;
+
+ for (sgl = sglist; count < nelems; count++, sgl++)
+ alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE);
+
+ dma_iommu_ops.unmap_sg(dev, sglist, nelems, direction, attrs);
+
+ vio_cmo_dealloc(viodev, alloc_size);
+}
+
+static int vio_dma_iommu_dma_supported(struct device *dev, u64 mask)
+{
+ return dma_iommu_ops.dma_supported(dev, mask);
+}
+
+static u64 vio_dma_get_required_mask(struct device *dev)
+{
+ return dma_iommu_ops.get_required_mask(dev);
+}
+
+struct dma_map_ops vio_dma_mapping_ops = {
+ .alloc = vio_dma_iommu_alloc_coherent,
+ .free = vio_dma_iommu_free_coherent,
+ .map_sg = vio_dma_iommu_map_sg,
+ .unmap_sg = vio_dma_iommu_unmap_sg,
+ .map_page = vio_dma_iommu_map_page,
+ .unmap_page = vio_dma_iommu_unmap_page,
+ .dma_supported = vio_dma_iommu_dma_supported,
+ .get_required_mask = vio_dma_get_required_mask,
+};
+
+/**
+ * vio_cmo_set_dev_desired - Set desired entitlement for a device
+ *
+ * @viodev: struct vio_dev for device to alter
+ * @new_desired: new desired entitlement level in bytes
+ *
+ * For use by devices to request a change to their entitlement at runtime or
+ * through sysfs. The desired entitlement level is changed and a balancing
+ * of system resources is scheduled to run in the future.
+ */
+void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired)
+{
+ unsigned long flags;
+ struct vio_cmo_dev_entry *dev_ent;
+ int found = 0;
+
+ if (!firmware_has_feature(FW_FEATURE_CMO))
+ return;
+
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+ if (desired < VIO_CMO_MIN_ENT)
+ desired = VIO_CMO_MIN_ENT;
+
+ /*
+ * Changes will not be made for devices not in the device list.
+ * If it is not in the device list, then no driver is loaded
+ * for the device and it can not receive entitlement.
+ */
+ list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
+ if (viodev == dev_ent->viodev) {
+ found = 1;
+ break;
+ }
+ if (!found) {
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+ return;
+ }
+
+ /* Increase/decrease in desired device entitlement */
+ if (desired >= viodev->cmo.desired) {
+ /* Just bump the bus and device values prior to a balance*/
+ vio_cmo.desired += desired - viodev->cmo.desired;
+ viodev->cmo.desired = desired;
+ } else {
+ /* Decrease bus and device values for desired entitlement */
+ vio_cmo.desired -= viodev->cmo.desired - desired;
+ viodev->cmo.desired = desired;
+ /*
+ * If less entitlement is desired than current entitlement, move
+ * any reserve memory in the change region to the excess pool.
+ */
+ if (viodev->cmo.entitled > desired) {
+ vio_cmo.reserve.size -= viodev->cmo.entitled - desired;
+ vio_cmo.excess.size += viodev->cmo.entitled - desired;
+ /*
+ * If entitlement moving from the reserve pool to the
+ * excess pool is currently unused, add to the excess
+ * free counter.
+ */
+ if (viodev->cmo.allocated < viodev->cmo.entitled)
+ vio_cmo.excess.free += viodev->cmo.entitled -
+ max(viodev->cmo.allocated, desired);
+ viodev->cmo.entitled = desired;
+ }
+ }
+ schedule_delayed_work(&vio_cmo.balance_q, 0);
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+}
+
+/**
+ * vio_cmo_bus_probe - Handle CMO specific bus probe activities
+ *
+ * @viodev - Pointer to struct vio_dev for device
+ *
+ * Determine the devices IO memory entitlement needs, attempting
+ * to satisfy the system minimum entitlement at first and scheduling
+ * a balance operation to take care of the rest at a later time.
+ *
+ * Returns: 0 on success, -EINVAL when device doesn't support CMO, and
+ * -ENOMEM when entitlement is not available for device or
+ * device entry.
+ *
+ */
+static int vio_cmo_bus_probe(struct vio_dev *viodev)
+{
+ struct vio_cmo_dev_entry *dev_ent;
+ struct device *dev = &viodev->dev;
+ struct vio_driver *viodrv = to_vio_driver(dev->driver);
+ unsigned long flags;
+ size_t size;
+
+ /*
+ * Check to see that device has a DMA window and configure
+ * entitlement for the device.
+ */
+ if (of_get_property(viodev->dev.of_node,
+ "ibm,my-dma-window", NULL)) {
+ /* Check that the driver is CMO enabled and get desired DMA */
+ if (!viodrv->get_desired_dma) {
+ dev_err(dev, "%s: device driver does not support CMO\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ viodev->cmo.desired = IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev));
+ if (viodev->cmo.desired < VIO_CMO_MIN_ENT)
+ viodev->cmo.desired = VIO_CMO_MIN_ENT;
+ size = VIO_CMO_MIN_ENT;
+
+ dev_ent = kmalloc(sizeof(struct vio_cmo_dev_entry),
+ GFP_KERNEL);
+ if (!dev_ent)
+ return -ENOMEM;
+
+ dev_ent->viodev = viodev;
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+ list_add(&dev_ent->list, &vio_cmo.device_list);
+ } else {
+ viodev->cmo.desired = 0;
+ size = 0;
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+ }
+
+ /*
+ * If the needs for vio_cmo.min have not changed since they
+ * were last set, the number of devices in the OF tree has
+ * been constant and the IO memory for this is already in
+ * the reserve pool.
+ */
+ if (vio_cmo.min == ((vio_cmo_num_OF_devs() + 1) *
+ VIO_CMO_MIN_ENT)) {
+ /* Updated desired entitlement if device requires it */
+ if (size)
+ vio_cmo.desired += (viodev->cmo.desired -
+ VIO_CMO_MIN_ENT);
+ } else {
+ size_t tmp;
+
+ tmp = vio_cmo.spare + vio_cmo.excess.free;
+ if (tmp < size) {
+ dev_err(dev, "%s: insufficient free "
+ "entitlement to add device. "
+ "Need %lu, have %lu\n", __func__,
+ size, (vio_cmo.spare + tmp));
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+ return -ENOMEM;
+ }
+
+ /* Use excess pool first to fulfill request */
+ tmp = min(size, vio_cmo.excess.free);
+ vio_cmo.excess.free -= tmp;
+ vio_cmo.excess.size -= tmp;
+ vio_cmo.reserve.size += tmp;
+
+ /* Use spare if excess pool was insufficient */
+ vio_cmo.spare -= size - tmp;
+
+ /* Update bus accounting */
+ vio_cmo.min += size;
+ vio_cmo.desired += viodev->cmo.desired;
+ }
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+ return 0;
+}
+
+/**
+ * vio_cmo_bus_remove - Handle CMO specific bus removal activities
+ *
+ * @viodev - Pointer to struct vio_dev for device
+ *
+ * Remove the device from the cmo device list. The minimum entitlement
+ * will be reserved for the device as long as it is in the system. The
+ * rest of the entitlement the device had been allocated will be returned
+ * to the system.
+ */
+static void vio_cmo_bus_remove(struct vio_dev *viodev)
+{
+ struct vio_cmo_dev_entry *dev_ent;
+ unsigned long flags;
+ size_t tmp;
+
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+ if (viodev->cmo.allocated) {
+ dev_err(&viodev->dev, "%s: device had %lu bytes of IO "
+ "allocated after remove operation.\n",
+ __func__, viodev->cmo.allocated);
+ BUG();
+ }
+
+ /*
+ * Remove the device from the device list being maintained for
+ * CMO enabled devices.
+ */
+ list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
+ if (viodev == dev_ent->viodev) {
+ list_del(&dev_ent->list);
+ kfree(dev_ent);
+ break;
+ }
+
+ /*
+ * Devices may not require any entitlement and they do not need
+ * to be processed. Otherwise, return the device's entitlement
+ * back to the pools.
+ */
+ if (viodev->cmo.entitled) {
+ /*
+ * This device has not yet left the OF tree, it's
+ * minimum entitlement remains in vio_cmo.min and
+ * vio_cmo.desired
+ */
+ vio_cmo.desired -= (viodev->cmo.desired - VIO_CMO_MIN_ENT);
+
+ /*
+ * Save min allocation for device in reserve as long
+ * as it exists in OF tree as determined by later
+ * balance operation
+ */
+ viodev->cmo.entitled -= VIO_CMO_MIN_ENT;
+
+ /* Replenish spare from freed reserve pool */
+ if (viodev->cmo.entitled && (vio_cmo.spare < VIO_CMO_MIN_ENT)) {
+ tmp = min(viodev->cmo.entitled, (VIO_CMO_MIN_ENT -
+ vio_cmo.spare));
+ vio_cmo.spare += tmp;
+ viodev->cmo.entitled -= tmp;
+ }
+
+ /* Remaining reserve goes to excess pool */
+ vio_cmo.excess.size += viodev->cmo.entitled;
+ vio_cmo.excess.free += viodev->cmo.entitled;
+ vio_cmo.reserve.size -= viodev->cmo.entitled;
+
+ /*
+ * Until the device is removed it will keep a
+ * minimum entitlement; this will guarantee that
+ * a module unload/load will result in a success.
+ */
+ viodev->cmo.entitled = VIO_CMO_MIN_ENT;
+ viodev->cmo.desired = VIO_CMO_MIN_ENT;
+ atomic_set(&viodev->cmo.allocs_failed, 0);
+ }
+
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+}
+
+static void vio_cmo_set_dma_ops(struct vio_dev *viodev)
+{
+ set_dma_ops(&viodev->dev, &vio_dma_mapping_ops);
+}
+
+/**
+ * vio_cmo_bus_init - CMO entitlement initialization at bus init time
+ *
+ * Set up the reserve and excess entitlement pools based on available
+ * system entitlement and the number of devices in the OF tree that
+ * require entitlement in the reserve pool.
+ */
+static void vio_cmo_bus_init(void)
+{
+ struct hvcall_mpp_data mpp_data;
+ int err;
+
+ memset(&vio_cmo, 0, sizeof(struct vio_cmo));
+ spin_lock_init(&vio_cmo.lock);
+ INIT_LIST_HEAD(&vio_cmo.device_list);
+ INIT_DELAYED_WORK(&vio_cmo.balance_q, vio_cmo_balance);
+
+ /* Get current system entitlement */
+ err = h_get_mpp(&mpp_data);
+
+ /*
+ * On failure, continue with entitlement set to 0, will panic()
+ * later when spare is reserved.
+ */
+ if (err != H_SUCCESS) {
+ printk(KERN_ERR "%s: unable to determine system IO "\
+ "entitlement. (%d)\n", __func__, err);
+ vio_cmo.entitled = 0;
+ } else {
+ vio_cmo.entitled = mpp_data.entitled_mem;
+ }
+
+ /* Set reservation and check against entitlement */
+ vio_cmo.spare = VIO_CMO_MIN_ENT;
+ vio_cmo.reserve.size = vio_cmo.spare;
+ vio_cmo.reserve.size += (vio_cmo_num_OF_devs() *
+ VIO_CMO_MIN_ENT);
+ if (vio_cmo.reserve.size > vio_cmo.entitled) {
+ printk(KERN_ERR "%s: insufficient system entitlement\n",
+ __func__);
+ panic("%s: Insufficient system entitlement", __func__);
+ }
+
+ /* Set the remaining accounting variables */
+ vio_cmo.excess.size = vio_cmo.entitled - vio_cmo.reserve.size;
+ vio_cmo.excess.free = vio_cmo.excess.size;
+ vio_cmo.min = vio_cmo.reserve.size;
+ vio_cmo.desired = vio_cmo.reserve.size;
+}
+
+/* sysfs device functions and data structures for CMO */
+
+#define viodev_cmo_rd_attr(name) \
+static ssize_t viodev_cmo_##name##_show(struct device *dev, \
+ struct device_attribute *attr, \
+ char *buf) \
+{ \
+ return sprintf(buf, "%lu\n", to_vio_dev(dev)->cmo.name); \
+}
+
+static ssize_t viodev_cmo_allocs_failed_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ return sprintf(buf, "%d\n", atomic_read(&viodev->cmo.allocs_failed));
+}
+
+static ssize_t viodev_cmo_allocs_failed_reset(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ atomic_set(&viodev->cmo.allocs_failed, 0);
+ return count;
+}
+
+static ssize_t viodev_cmo_desired_set(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ size_t new_desired;
+ int ret;
+
+ ret = strict_strtoul(buf, 10, &new_desired);
+ if (ret)
+ return ret;
+
+ vio_cmo_set_dev_desired(viodev, new_desired);
+ return count;
+}
+
+viodev_cmo_rd_attr(desired);
+viodev_cmo_rd_attr(entitled);
+viodev_cmo_rd_attr(allocated);
+
+static ssize_t name_show(struct device *, struct device_attribute *, char *);
+static ssize_t devspec_show(struct device *, struct device_attribute *, char *);
+static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
+ char *buf);
+static struct device_attribute vio_cmo_dev_attrs[] = {
+ __ATTR_RO(name),
+ __ATTR_RO(devspec),
+ __ATTR_RO(modalias),
+ __ATTR(cmo_desired, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
+ viodev_cmo_desired_show, viodev_cmo_desired_set),
+ __ATTR(cmo_entitled, S_IRUGO, viodev_cmo_entitled_show, NULL),
+ __ATTR(cmo_allocated, S_IRUGO, viodev_cmo_allocated_show, NULL),
+ __ATTR(cmo_allocs_failed, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
+ viodev_cmo_allocs_failed_show, viodev_cmo_allocs_failed_reset),
+ __ATTR_NULL
+};
+
+/* sysfs bus functions and data structures for CMO */
+
+#define viobus_cmo_rd_attr(name) \
+static ssize_t \
+viobus_cmo_##name##_show(struct bus_type *bt, char *buf) \
+{ \
+ return sprintf(buf, "%lu\n", vio_cmo.name); \
+}
+
+#define viobus_cmo_pool_rd_attr(name, var) \
+static ssize_t \
+viobus_cmo_##name##_pool_show_##var(struct bus_type *bt, char *buf) \
+{ \
+ return sprintf(buf, "%lu\n", vio_cmo.name.var); \
+}
+
+static ssize_t viobus_cmo_high_reset(struct bus_type *bt, const char *buf,
+ size_t count)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+ vio_cmo.high = vio_cmo.curr;
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+
+ return count;
+}
+
+viobus_cmo_rd_attr(entitled);
+viobus_cmo_pool_rd_attr(reserve, size);
+viobus_cmo_pool_rd_attr(excess, size);
+viobus_cmo_pool_rd_attr(excess, free);
+viobus_cmo_rd_attr(spare);
+viobus_cmo_rd_attr(min);
+viobus_cmo_rd_attr(desired);
+viobus_cmo_rd_attr(curr);
+viobus_cmo_rd_attr(high);
+
+static struct bus_attribute vio_cmo_bus_attrs[] = {
+ __ATTR(cmo_entitled, S_IRUGO, viobus_cmo_entitled_show, NULL),
+ __ATTR(cmo_reserve_size, S_IRUGO, viobus_cmo_reserve_pool_show_size, NULL),
+ __ATTR(cmo_excess_size, S_IRUGO, viobus_cmo_excess_pool_show_size, NULL),
+ __ATTR(cmo_excess_free, S_IRUGO, viobus_cmo_excess_pool_show_free, NULL),
+ __ATTR(cmo_spare, S_IRUGO, viobus_cmo_spare_show, NULL),
+ __ATTR(cmo_min, S_IRUGO, viobus_cmo_min_show, NULL),
+ __ATTR(cmo_desired, S_IRUGO, viobus_cmo_desired_show, NULL),
+ __ATTR(cmo_curr, S_IRUGO, viobus_cmo_curr_show, NULL),
+ __ATTR(cmo_high, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
+ viobus_cmo_high_show, viobus_cmo_high_reset),
+ __ATTR_NULL
+};
+
+static void vio_cmo_sysfs_init(void)
+{
+ vio_bus_type.dev_attrs = vio_cmo_dev_attrs;
+ vio_bus_type.bus_attrs = vio_cmo_bus_attrs;
+}
+#else /* CONFIG_PPC_SMLPAR */
+int vio_cmo_entitlement_update(size_t new_entitlement) { return 0; }
+void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) {}
+static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; }
+static void vio_cmo_bus_remove(struct vio_dev *viodev) {}
+static void vio_cmo_set_dma_ops(struct vio_dev *viodev) {}
+static void vio_cmo_bus_init(void) {}
+static void vio_cmo_sysfs_init(void) { }
+#endif /* CONFIG_PPC_SMLPAR */
+EXPORT_SYMBOL(vio_cmo_entitlement_update);
+EXPORT_SYMBOL(vio_cmo_set_dev_desired);
+
+static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
+{
+ const unsigned char *dma_window;
+ struct iommu_table *tbl;
+ unsigned long offset, size;
+
+ dma_window = of_get_property(dev->dev.of_node,
+ "ibm,my-dma-window", NULL);
+ if (!dma_window)
+ return NULL;
+
+ tbl = kzalloc(sizeof(*tbl), GFP_KERNEL);
+ if (tbl == NULL)
+ return NULL;
+
+ of_parse_dma_window(dev->dev.of_node, dma_window,
+ &tbl->it_index, &offset, &size);
+
+ /* TCE table size - measured in tce entries */
+ tbl->it_size = size >> IOMMU_PAGE_SHIFT;
+ /* offset for VIO should always be 0 */
+ tbl->it_offset = offset >> IOMMU_PAGE_SHIFT;
+ tbl->it_busno = 0;
+ tbl->it_type = TCE_VB;
+ tbl->it_blocksize = 16;
+
+ return iommu_init_table(tbl, -1);
+}
+
+/**
+ * vio_match_device: - Tell if a VIO device has a matching
+ * VIO device id structure.
+ * @ids: array of VIO device id structures to search in
+ * @dev: the VIO device structure to match against
+ *
+ * Used by a driver to check whether a VIO device present in the
+ * system is in its list of supported devices. Returns the matching
+ * vio_device_id structure or NULL if there is no match.
+ */
+static const struct vio_device_id *vio_match_device(
+ const struct vio_device_id *ids, const struct vio_dev *dev)
+{
+ while (ids->type[0] != '\0') {
+ if ((strncmp(dev->type, ids->type, strlen(ids->type)) == 0) &&
+ of_device_is_compatible(dev->dev.of_node,
+ ids->compat))
+ return ids;
+ ids++;
+ }
+ return NULL;
+}
+
+/*
+ * Convert from struct device to struct vio_dev and pass to driver.
+ * dev->driver has already been set by generic code because vio_bus_match
+ * succeeded.
+ */
+static int vio_bus_probe(struct device *dev)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ struct vio_driver *viodrv = to_vio_driver(dev->driver);
+ const struct vio_device_id *id;
+ int error = -ENODEV;
+
+ if (!viodrv->probe)
+ return error;
+
+ id = vio_match_device(viodrv->id_table, viodev);
+ if (id) {
+ memset(&viodev->cmo, 0, sizeof(viodev->cmo));
+ if (firmware_has_feature(FW_FEATURE_CMO)) {
+ error = vio_cmo_bus_probe(viodev);
+ if (error)
+ return error;
+ }
+ error = viodrv->probe(viodev, id);
+ if (error && firmware_has_feature(FW_FEATURE_CMO))
+ vio_cmo_bus_remove(viodev);
+ }
+
+ return error;
+}
+
+/* convert from struct device to struct vio_dev and pass to driver. */
+static int vio_bus_remove(struct device *dev)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ struct vio_driver *viodrv = to_vio_driver(dev->driver);
+ struct device *devptr;
+ int ret = 1;
+
+ /*
+ * Hold a reference to the device after the remove function is called
+ * to allow for CMO accounting cleanup for the device.
+ */
+ devptr = get_device(dev);
+
+ if (viodrv->remove)
+ ret = viodrv->remove(viodev);
+
+ if (!ret && firmware_has_feature(FW_FEATURE_CMO))
+ vio_cmo_bus_remove(viodev);
+
+ put_device(devptr);
+ return ret;
+}
+
+/**
+ * vio_register_driver: - Register a new vio driver
+ * @drv: The vio_driver structure to be registered.
+ */
+int __vio_register_driver(struct vio_driver *viodrv, struct module *owner,
+ const char *mod_name)
+{
+ pr_debug("%s: driver %s registering\n", __func__, viodrv->name);
+
+ /* fill in 'struct driver' fields */
+ viodrv->driver.name = viodrv->name;
+ viodrv->driver.pm = viodrv->pm;
+ viodrv->driver.bus = &vio_bus_type;
+ viodrv->driver.owner = owner;
+ viodrv->driver.mod_name = mod_name;
+
+ return driver_register(&viodrv->driver);
+}
+EXPORT_SYMBOL(__vio_register_driver);
+
+/**
+ * vio_unregister_driver - Remove registration of vio driver.
+ * @driver: The vio_driver struct to be removed form registration
+ */
+void vio_unregister_driver(struct vio_driver *viodrv)
+{
+ driver_unregister(&viodrv->driver);
+}
+EXPORT_SYMBOL(vio_unregister_driver);
+
+/* vio_dev refcount hit 0 */
+static void __devinit vio_dev_release(struct device *dev)
+{
+ struct iommu_table *tbl = get_iommu_table_base(dev);
+
+ if (tbl)
+ iommu_free_table(tbl, dev->of_node ?
+ dev->of_node->full_name : dev_name(dev));
+ of_node_put(dev->of_node);
+ kfree(to_vio_dev(dev));
+}
+
+/**
+ * vio_register_device_node: - Register a new vio device.
+ * @of_node: The OF node for this device.
+ *
+ * Creates and initializes a vio_dev structure from the data in
+ * of_node and adds it to the list of virtual devices.
+ * Returns a pointer to the created vio_dev or NULL if node has
+ * NULL device_type or compatible fields.
+ */
+struct vio_dev *vio_register_device_node(struct device_node *of_node)
+{
+ struct vio_dev *viodev;
+ const unsigned int *unit_address;
+
+ /* we need the 'device_type' property, in order to match with drivers */
+ if (of_node->type == NULL) {
+ printk(KERN_WARNING "%s: node %s missing 'device_type'\n",
+ __func__,
+ of_node->name ? of_node->name : "<unknown>");
+ return NULL;
+ }
+
+ unit_address = of_get_property(of_node, "reg", NULL);
+ if (unit_address == NULL) {
+ printk(KERN_WARNING "%s: node %s missing 'reg'\n",
+ __func__,
+ of_node->name ? of_node->name : "<unknown>");
+ return NULL;
+ }
+
+ /* allocate a vio_dev for this node */
+ viodev = kzalloc(sizeof(struct vio_dev), GFP_KERNEL);
+ if (viodev == NULL)
+ return NULL;
+
+ viodev->irq = irq_of_parse_and_map(of_node, 0);
+
+ dev_set_name(&viodev->dev, "%x", *unit_address);
+ viodev->name = of_node->name;
+ viodev->type = of_node->type;
+ viodev->unit_address = *unit_address;
+ viodev->dev.of_node = of_node_get(of_node);
+
+ if (firmware_has_feature(FW_FEATURE_CMO))
+ vio_cmo_set_dma_ops(viodev);
+ else
+ set_dma_ops(&viodev->dev, &dma_iommu_ops);
+ set_iommu_table_base(&viodev->dev, vio_build_iommu_table(viodev));
+ set_dev_node(&viodev->dev, of_node_to_nid(of_node));
+
+ /* init generic 'struct device' fields: */
+ viodev->dev.parent = &vio_bus_device.dev;
+ viodev->dev.bus = &vio_bus_type;
+ viodev->dev.release = vio_dev_release;
+ /* needed to ensure proper operation of coherent allocations
+ * later, in case driver doesn't set it explicitly */
+ dma_set_mask(&viodev->dev, DMA_BIT_MASK(64));
+ dma_set_coherent_mask(&viodev->dev, DMA_BIT_MASK(64));
+
+ /* register with generic device framework */
+ if (device_register(&viodev->dev)) {
+ printk(KERN_ERR "%s: failed to register device %s\n",
+ __func__, dev_name(&viodev->dev));
+ put_device(&viodev->dev);
+ return NULL;
+ }
+
+ return viodev;
+}
+EXPORT_SYMBOL(vio_register_device_node);
+
+/**
+ * vio_bus_init: - Initialize the virtual IO bus
+ */
+static int __init vio_bus_init(void)
+{
+ int err;
+ struct device_node *node_vroot;
+
+ if (firmware_has_feature(FW_FEATURE_CMO))
+ vio_cmo_sysfs_init();
+
+ err = bus_register(&vio_bus_type);
+ if (err) {
+ printk(KERN_ERR "failed to register VIO bus\n");
+ return err;
+ }
+
+ /*
+ * The fake parent of all vio devices, just to give us
+ * a nice directory
+ */
+ err = device_register(&vio_bus_device.dev);
+ if (err) {
+ printk(KERN_WARNING "%s: device_register returned %i\n",
+ __func__, err);
+ return err;
+ }
+
+ if (firmware_has_feature(FW_FEATURE_CMO))
+ vio_cmo_bus_init();
+
+ node_vroot = of_find_node_by_name(NULL, "vdevice");
+ if (node_vroot) {
+ struct device_node *of_node;
+
+ /*
+ * Create struct vio_devices for each virtual device in
+ * the device tree. Drivers will associate with them later.
+ */
+ for (of_node = node_vroot->child; of_node != NULL;
+ of_node = of_node->sibling)
+ vio_register_device_node(of_node);
+ of_node_put(node_vroot);
+ }
+
+ return 0;
+}
+__initcall(vio_bus_init);
+
+static ssize_t name_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%s\n", to_vio_dev(dev)->name);
+}
+
+static ssize_t devspec_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct device_node *of_node = dev->of_node;
+
+ return sprintf(buf, "%s\n", of_node ? of_node->full_name : "none");
+}
+
+static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ const struct vio_dev *vio_dev = to_vio_dev(dev);
+ struct device_node *dn;
+ const char *cp;
+
+ dn = dev->of_node;
+ if (!dn)
+ return -ENODEV;
+ cp = of_get_property(dn, "compatible", NULL);
+ if (!cp)
+ return -ENODEV;
+
+ return sprintf(buf, "vio:T%sS%s\n", vio_dev->type, cp);
+}
+
+static struct device_attribute vio_dev_attrs[] = {
+ __ATTR_RO(name),
+ __ATTR_RO(devspec),
+ __ATTR_RO(modalias),
+ __ATTR_NULL
+};
+
+void __devinit vio_unregister_device(struct vio_dev *viodev)
+{
+ device_unregister(&viodev->dev);
+}
+EXPORT_SYMBOL(vio_unregister_device);
+
+static int vio_bus_match(struct device *dev, struct device_driver *drv)
+{
+ const struct vio_dev *vio_dev = to_vio_dev(dev);
+ struct vio_driver *vio_drv = to_vio_driver(drv);
+ const struct vio_device_id *ids = vio_drv->id_table;
+
+ return (ids != NULL) && (vio_match_device(ids, vio_dev) != NULL);
+}
+
+static int vio_hotplug(struct device *dev, struct kobj_uevent_env *env)
+{
+ const struct vio_dev *vio_dev = to_vio_dev(dev);
+ struct device_node *dn;
+ const char *cp;
+
+ dn = dev->of_node;
+ if (!dn)
+ return -ENODEV;
+ cp = of_get_property(dn, "compatible", NULL);
+ if (!cp)
+ return -ENODEV;
+
+ add_uevent_var(env, "MODALIAS=vio:T%sS%s", vio_dev->type, cp);
+ return 0;
+}
+
+static struct bus_type vio_bus_type = {
+ .name = "vio",
+ .dev_attrs = vio_dev_attrs,
+ .uevent = vio_hotplug,
+ .match = vio_bus_match,
+ .probe = vio_bus_probe,
+ .remove = vio_bus_remove,
+};
+
+/**
+ * vio_get_attribute: - get attribute for virtual device
+ * @vdev: The vio device to get property.
+ * @which: The property/attribute to be extracted.
+ * @length: Pointer to length of returned data size (unused if NULL).
+ *
+ * Calls prom.c's of_get_property() to return the value of the
+ * attribute specified by @which
+*/
+const void *vio_get_attribute(struct vio_dev *vdev, char *which, int *length)
+{
+ return of_get_property(vdev->dev.of_node, which, length);
+}
+EXPORT_SYMBOL(vio_get_attribute);
+
+#ifdef CONFIG_PPC_PSERIES
+/* vio_find_name() - internal because only vio.c knows how we formatted the
+ * kobject name
+ */
+static struct vio_dev *vio_find_name(const char *name)
+{
+ struct device *found;
+
+ found = bus_find_device_by_name(&vio_bus_type, NULL, name);
+ if (!found)
+ return NULL;
+
+ return to_vio_dev(found);
+}
+
+/**
+ * vio_find_node - find an already-registered vio_dev
+ * @vnode: device_node of the virtual device we're looking for
+ */
+struct vio_dev *vio_find_node(struct device_node *vnode)
+{
+ const uint32_t *unit_address;
+ char kobj_name[20];
+
+ /* construct the kobject name from the device node */
+ unit_address = of_get_property(vnode, "reg", NULL);
+ if (!unit_address)
+ return NULL;
+ snprintf(kobj_name, sizeof(kobj_name), "%x", *unit_address);
+
+ return vio_find_name(kobj_name);
+}
+EXPORT_SYMBOL(vio_find_node);
+
+int vio_enable_interrupts(struct vio_dev *dev)
+{
+ int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE);
+ if (rc != H_SUCCESS)
+ printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc);
+ return rc;
+}
+EXPORT_SYMBOL(vio_enable_interrupts);
+
+int vio_disable_interrupts(struct vio_dev *dev)
+{
+ int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE);
+ if (rc != H_SUCCESS)
+ printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc);
+ return rc;
+}
+EXPORT_SYMBOL(vio_disable_interrupts);
+#endif /* CONFIG_PPC_PSERIES */
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
new file mode 100644
index 00000000..65d1c08c
--- /dev/null
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -0,0 +1,262 @@
+#ifdef CONFIG_PPC64
+#define PROVIDE32(x) PROVIDE(__unused__##x)
+#else
+#define PROVIDE32(x) PROVIDE(x)
+#endif
+#include <asm/page.h>
+#include <asm-generic/vmlinux.lds.h>
+#include <asm/cache.h>
+#include <asm/thread_info.h>
+
+ENTRY(_stext)
+
+PHDRS {
+ kernel PT_LOAD FLAGS(7); /* RWX */
+ notes PT_NOTE FLAGS(0);
+ dummy PT_NOTE FLAGS(0);
+
+ /* binutils < 2.18 has a bug that makes it misbehave when taking an
+ ELF file with all segments at load address 0 as input. This
+ happens when running "strip" on vmlinux, because of the AT() magic
+ in this linker script. People using GCC >= 4.2 won't run into
+ this problem, because the "build-id" support will put some data
+ into the "notes" segment (at a non-zero load address).
+
+ To work around this, we force some data into both the "dummy"
+ segment and the kernel segment, so the dummy segment will get a
+ non-zero load address. It's not enough to always create the
+ "notes" segment, since if nothing gets assigned to it, its load
+ address will be zero. */
+}
+
+#ifdef CONFIG_PPC64
+OUTPUT_ARCH(powerpc:common64)
+jiffies = jiffies_64;
+#else
+OUTPUT_ARCH(powerpc:common)
+jiffies = jiffies_64 + 4;
+#endif
+SECTIONS
+{
+ . = 0;
+ reloc_start = .;
+
+ . = KERNELBASE;
+
+/*
+ * Text, read only data and other permanent read-only sections
+ */
+
+ /* Text and gots */
+ .text : AT(ADDR(.text) - LOAD_OFFSET) {
+ ALIGN_FUNCTION();
+ HEAD_TEXT
+ _text = .;
+ /* careful! __ftr_alt_* sections need to be close to .text */
+ *(.text .fixup __ftr_alt_* .ref.text)
+ SCHED_TEXT
+ LOCK_TEXT
+ KPROBES_TEXT
+ IRQENTRY_TEXT
+
+#ifdef CONFIG_PPC32
+ *(.got1)
+ __got2_start = .;
+ *(.got2)
+ __got2_end = .;
+#endif /* CONFIG_PPC32 */
+
+ } :kernel
+
+ . = ALIGN(PAGE_SIZE);
+ _etext = .;
+ PROVIDE32 (etext = .);
+
+ /* Read-only data */
+ RODATA
+
+ EXCEPTION_TABLE(0)
+
+ NOTES :kernel :notes
+
+ /* The dummy segment contents for the bug workaround mentioned above
+ near PHDRS. */
+ .dummy : AT(ADDR(.dummy) - LOAD_OFFSET) {
+ LONG(0)
+ LONG(0)
+ LONG(0)
+ } :kernel :dummy
+
+/*
+ * Init sections discarded at runtime
+ */
+ . = ALIGN(PAGE_SIZE);
+ __init_begin = .;
+ INIT_TEXT_SECTION(PAGE_SIZE) :kernel
+
+ /* .exit.text is discarded at runtime, not link time,
+ * to deal with references from __bug_table
+ */
+ .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {
+ EXIT_TEXT
+ }
+
+ .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
+ INIT_DATA
+ __vtop_table_begin = .;
+ *(.vtop_fixup);
+ __vtop_table_end = .;
+ __ptov_table_begin = .;
+ *(.ptov_fixup);
+ __ptov_table_end = .;
+ }
+
+ .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
+ INIT_SETUP(16)
+ }
+
+ .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
+ INIT_CALLS
+ }
+
+ .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
+ CON_INITCALL
+ }
+
+ SECURITY_INIT
+
+ . = ALIGN(8);
+ __ftr_fixup : AT(ADDR(__ftr_fixup) - LOAD_OFFSET) {
+ __start___ftr_fixup = .;
+ *(__ftr_fixup)
+ __stop___ftr_fixup = .;
+ }
+ . = ALIGN(8);
+ __mmu_ftr_fixup : AT(ADDR(__mmu_ftr_fixup) - LOAD_OFFSET) {
+ __start___mmu_ftr_fixup = .;
+ *(__mmu_ftr_fixup)
+ __stop___mmu_ftr_fixup = .;
+ }
+ . = ALIGN(8);
+ __lwsync_fixup : AT(ADDR(__lwsync_fixup) - LOAD_OFFSET) {
+ __start___lwsync_fixup = .;
+ *(__lwsync_fixup)
+ __stop___lwsync_fixup = .;
+ }
+#ifdef CONFIG_PPC64
+ . = ALIGN(8);
+ __fw_ftr_fixup : AT(ADDR(__fw_ftr_fixup) - LOAD_OFFSET) {
+ __start___fw_ftr_fixup = .;
+ *(__fw_ftr_fixup)
+ __stop___fw_ftr_fixup = .;
+ }
+#endif
+ .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
+ INIT_RAM_FS
+ }
+
+ PERCPU_SECTION(L1_CACHE_BYTES)
+
+ . = ALIGN(8);
+ .machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) {
+ __machine_desc_start = . ;
+ *(.machine.desc)
+ __machine_desc_end = . ;
+ }
+#ifdef CONFIG_RELOCATABLE
+ . = ALIGN(8);
+ .dynsym : AT(ADDR(.dynsym) - LOAD_OFFSET)
+ {
+#ifdef CONFIG_RELOCATABLE_PPC32
+ __dynamic_symtab = .;
+#endif
+ *(.dynsym)
+ }
+ .dynstr : AT(ADDR(.dynstr) - LOAD_OFFSET) { *(.dynstr) }
+ .dynamic : AT(ADDR(.dynamic) - LOAD_OFFSET)
+ {
+ __dynamic_start = .;
+ *(.dynamic)
+ }
+ .hash : AT(ADDR(.hash) - LOAD_OFFSET) { *(.hash) }
+ .interp : AT(ADDR(.interp) - LOAD_OFFSET) { *(.interp) }
+ .rela.dyn : AT(ADDR(.rela.dyn) - LOAD_OFFSET)
+ {
+ __rela_dyn_start = .;
+ *(.rela*)
+ }
+#endif
+
+ /* freed after init ends here */
+ . = ALIGN(PAGE_SIZE);
+ __init_end = .;
+
+/*
+ * And now the various read/write data
+ */
+
+ . = ALIGN(PAGE_SIZE);
+ _sdata = .;
+
+#ifdef CONFIG_PPC32
+ .data : AT(ADDR(.data) - LOAD_OFFSET) {
+ DATA_DATA
+ *(.sdata)
+ *(.got.plt) *(.got)
+ }
+#else
+ .data : AT(ADDR(.data) - LOAD_OFFSET) {
+ DATA_DATA
+ *(.data.rel*)
+ *(.toc1)
+ *(.branch_lt)
+ }
+
+ .opd : AT(ADDR(.opd) - LOAD_OFFSET) {
+ *(.opd)
+ }
+
+ .got : AT(ADDR(.got) - LOAD_OFFSET) {
+ __toc_start = .;
+ *(.got)
+ *(.toc)
+ }
+#endif
+
+ /* The initial task and kernel stack */
+ INIT_TASK_DATA_SECTION(THREAD_SIZE)
+
+ .data..page_aligned : AT(ADDR(.data..page_aligned) - LOAD_OFFSET) {
+ PAGE_ALIGNED_DATA(PAGE_SIZE)
+ }
+
+ .data..cacheline_aligned : AT(ADDR(.data..cacheline_aligned) - LOAD_OFFSET) {
+ CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
+ }
+
+ .data..read_mostly : AT(ADDR(.data..read_mostly) - LOAD_OFFSET) {
+ READ_MOSTLY_DATA(L1_CACHE_BYTES)
+ }
+
+ . = ALIGN(PAGE_SIZE);
+ .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
+ NOSAVE_DATA
+ }
+
+ . = ALIGN(PAGE_SIZE);
+ _edata = .;
+ PROVIDE32 (edata = .);
+
+/*
+ * And finally the bss
+ */
+
+ BSS_SECTION(0, 0, 0)
+
+ . = ALIGN(PAGE_SIZE);
+ _end = . ;
+ PROVIDE32 (end = .);
+
+ /* Sections to be discarded. */
+ DISCARDS
+}