diff options
author | Srikant Patnaik | 2015-01-13 15:08:24 +0530 |
---|---|---|
committer | Srikant Patnaik | 2015-01-13 15:08:24 +0530 |
commit | 97327692361306d1e6259021bc425e32832fdb50 (patch) | |
tree | fe9088f3248ec61e24f404f21b9793cb644b7f01 /arch/alpha | |
parent | 2d05a8f663478a44e088d122e0d62109bbc801d0 (diff) | |
parent | a3a8b90b61e21be3dde9101c4e86c881e0f06210 (diff) | |
download | FOSSEE-netbook-kernel-source-97327692361306d1e6259021bc425e32832fdb50.tar.gz FOSSEE-netbook-kernel-source-97327692361306d1e6259021bc425e32832fdb50.tar.bz2 FOSSEE-netbook-kernel-source-97327692361306d1e6259021bc425e32832fdb50.zip |
dirty fix to merging
Diffstat (limited to 'arch/alpha')
301 files changed, 62981 insertions, 0 deletions
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig new file mode 100644 index 00000000..22e58a99 --- /dev/null +++ b/arch/alpha/Kconfig @@ -0,0 +1,670 @@ +config ALPHA + bool + default y + select HAVE_AOUT + select HAVE_IDE + select HAVE_OPROFILE + select HAVE_SYSCALL_WRAPPERS + select HAVE_IRQ_WORK + select HAVE_PCSPKR_PLATFORM + select HAVE_PERF_EVENTS + select HAVE_DMA_ATTRS + select HAVE_GENERIC_HARDIRQS + select GENERIC_IRQ_PROBE + select AUTO_IRQ_AFFINITY if SMP + select GENERIC_IRQ_SHOW + select ARCH_WANT_OPTIONAL_GPIOLIB + select ARCH_HAVE_NMI_SAFE_CMPXCHG + help + The Alpha is a 64-bit general-purpose processor designed and + marketed by the Digital Equipment Corporation of blessed memory, + now Hewlett-Packard. The Alpha Linux project has a home page at + <http://www.alphalinux.org/>. + +config 64BIT + def_bool y + +config MMU + bool + default y + +config RWSEM_GENERIC_SPINLOCK + bool + +config RWSEM_XCHGADD_ALGORITHM + bool + default y + +config ARCH_HAS_ILOG2_U32 + bool + default n + +config ARCH_HAS_ILOG2_U64 + bool + default n + +config GENERIC_CALIBRATE_DELAY + bool + default y + +config GENERIC_CMOS_UPDATE + def_bool y + +config GENERIC_GPIO + bool + +config ZONE_DMA + bool + default y + +config ARCH_DMA_ADDR_T_64BIT + def_bool y + +config NEED_DMA_MAP_STATE + def_bool y + +config NEED_SG_DMA_LENGTH + def_bool y + +config GENERIC_ISA_DMA + bool + default y + +source "init/Kconfig" +source "kernel/Kconfig.freezer" + + +menu "System setup" + +choice + prompt "Alpha system type" + default ALPHA_GENERIC + ---help--- + This is the system type of your hardware. A "generic" kernel will + run on any supported Alpha system. However, if you configure a + kernel for your specific system, it will be faster and smaller. + + To find out what type of Alpha system you have, you may want to + check out the Linux/Alpha FAQ, accessible on the WWW from + <http://www.alphalinux.org/>. In summary: + + Alcor/Alpha-XLT AS 600, AS 500, XL-300, XL-366 + Alpha-XL XL-233, XL-266 + AlphaBook1 Alpha laptop + Avanti AS 200, AS 205, AS 250, AS 255, AS 300, AS 400 + Cabriolet AlphaPC64, AlphaPCI64 + DP264 DP264 / DS20 / ES40 / DS10 / DS10L + EB164 EB164 21164 evaluation board + EB64+ EB64+ 21064 evaluation board + EB66 EB66 21066 evaluation board + EB66+ EB66+ 21066 evaluation board + Jensen DECpc 150, DEC 2000 models 300, 500 + LX164 AlphaPC164-LX + Lynx AS 2100A + Miata Personal Workstation 433/500/600 a/au + Marvel AlphaServer ES47 / ES80 / GS1280 + Mikasa AS 1000 + Noname AXPpci33, UDB (Multia) + Noritake AS 1000A, AS 600A, AS 800 + PC164 AlphaPC164 + Rawhide AS 1200, AS 4000, AS 4100 + Ruffian RPX164-2, AlphaPC164-UX, AlphaPC164-BX + SX164 AlphaPC164-SX + Sable AS 2000, AS 2100 + Shark DS 20L + Takara Takara (OEM) + Titan AlphaServer ES45 / DS25 / DS15 + Wildfire AlphaServer GS 40/80/160/320 + + If you don't know what to do, choose "generic". + +config ALPHA_GENERIC + bool "Generic" + help + A generic kernel will run on all supported Alpha hardware. + +config ALPHA_ALCOR + bool "Alcor/Alpha-XLT" + help + For systems using the Digital ALCOR chipset: 5 chips (4, 64-bit data + slices (Data Switch, DSW) - 208-pin PQFP and 1 control (Control, I/O + Address, CIA) - a 383 pin plastic PGA). It provides a DRAM + controller (256-bit memory bus) and a PCI interface. It also does + all the work required to support an external Bcache and to maintain + memory coherence when a PCI device DMAs into (or out of) memory. + +config ALPHA_XL + bool "Alpha-XL" + help + XL-233 and XL-266-based Alpha systems. + +config ALPHA_BOOK1 + bool "AlphaBook1" + help + Dec AlphaBook1/Burns Alpha-based laptops. + +config ALPHA_AVANTI_CH + bool "Avanti" + +config ALPHA_CABRIOLET + bool "Cabriolet" + help + Cabriolet AlphaPC64, AlphaPCI64 systems. Derived from EB64+ but now + baby-AT with Flash boot ROM, no on-board SCSI or Ethernet. 3 ISA + slots, 4 PCI slots (one pair are on a shared slot), uses plug-in + Bcache SIMMs. Requires power supply with 3.3V output. + +config ALPHA_DP264 + bool "DP264" + help + Various 21264 systems with the tsunami core logic chipset. + API Networks: 264DP, UP2000(+), CS20; + Compaq: DS10(E,L), XP900, XP1000, DS20(E), ES40. + +config ALPHA_EB164 + bool "EB164" + help + EB164 21164 evaluation board from DEC. Uses 21164 and ALCOR. Has + ISA and PCI expansion (3 ISA slots, 2 64-bit PCI slots (one is + shared with an ISA slot) and 2 32-bit PCI slots. Uses plus-in + Bcache SIMMs. I/O sub-system provides SuperI/O (2S, 1P, FD), KBD, + MOUSE (PS2 style), RTC/NVRAM. Boot ROM is Flash. PC-AT-sized + motherboard. Requires power supply with 3.3V output. + +config ALPHA_EB64P_CH + bool "EB64+" + +config ALPHA_EB66 + bool "EB66" + help + A Digital DS group board. Uses 21066 or 21066A. I/O sub-system is + identical to EB64+. Baby PC-AT size. Runs from standard PC power + supply. The EB66 schematic was published as a marketing poster + advertising the 21066 as "the first microprocessor in the world with + embedded PCI". + +config ALPHA_EB66P + bool "EB66+" + help + Later variant of the EB66 board. + +config ALPHA_EIGER + bool "Eiger" + help + Apparently an obscure OEM single-board computer based on the + Typhoon/Tsunami chipset family. Information on it is scanty. + +config ALPHA_JENSEN + bool "Jensen" + help + DEC PC 150 AXP (aka Jensen): This is a very old Digital system - one + of the first-generation Alpha systems. A number of these systems + seem to be available on the second- hand market. The Jensen is a + floor-standing tower system which originally used a 150MHz 21064 It + used programmable logic to interface a 486 EISA I/O bridge to the + CPU. + +config ALPHA_LX164 + bool "LX164" + help + A technical overview of this board is available at + <http://www.unix-ag.org/Linux-Alpha/Architectures/LX164.html>. + +config ALPHA_LYNX + bool "Lynx" + help + AlphaServer 2100A-based systems. + +config ALPHA_MARVEL + bool "Marvel" + help + AlphaServer ES47 / ES80 / GS1280 based on EV7. + +config ALPHA_MIATA + bool "Miata" + help + The Digital PersonalWorkStation (PWS 433a, 433au, 500a, 500au, 600a, + or 600au). + +config ALPHA_MIKASA + bool "Mikasa" + help + AlphaServer 1000-based Alpha systems. + +config ALPHA_NAUTILUS + bool "Nautilus" + help + Alpha systems based on the AMD 751 & ALI 1543C chipsets. + +config ALPHA_NONAME_CH + bool "Noname" + +config ALPHA_NORITAKE + bool "Noritake" + help + AlphaServer 1000A, AlphaServer 600A, and AlphaServer 800-based + systems. + +config ALPHA_PC164 + bool "PC164" + +config ALPHA_P2K + bool "Platform2000" + +config ALPHA_RAWHIDE + bool "Rawhide" + help + AlphaServer 1200, AlphaServer 4000 and AlphaServer 4100 machines. + See HOWTO at + <http://www.alphalinux.org/docs/rawhide/4100_install.shtml>. + +config ALPHA_RUFFIAN + bool "Ruffian" + help + Samsung APC164UX. There is a page on known problems and workarounds + at <http://www.alphalinux.org/faq/FAQ-11.html>. + +config ALPHA_RX164 + bool "RX164" + +config ALPHA_SX164 + bool "SX164" + +config ALPHA_SABLE + bool "Sable" + help + Digital AlphaServer 2000 and 2100-based systems. + +config ALPHA_SHARK + bool "Shark" + +config ALPHA_TAKARA + bool "Takara" + help + Alpha 11164-based OEM single-board computer. + +config ALPHA_TITAN + bool "Titan" + help + AlphaServer ES45/DS25 SMP based on EV68 and Titan chipset. + +config ALPHA_WILDFIRE + bool "Wildfire" + help + AlphaServer GS 40/80/160/320 SMP based on the EV67 core. + +endchoice + +# clear all implied options (don't want default values for those): +# Most of these machines have ISA slots; not exactly sure which don't, +# and this doesn't activate hordes of code, so do it always. +config ISA + bool + default y + help + Find out whether you have ISA slots on your motherboard. ISA is the + name of a bus system, i.e. the way the CPU talks to the other stuff + inside your box. Other bus systems are PCI, EISA, MicroChannel + (MCA) or VESA. ISA is an older system, now being displaced by PCI; + newer boards don't support it. If you have ISA, say Y, otherwise N. + +config ISA_DMA_API + bool + default y + +config PCI + bool + depends on !ALPHA_JENSEN + select GENERIC_PCI_IOMAP + default y + help + Find out whether you have a PCI motherboard. PCI is the name of a + bus system, i.e. the way the CPU talks to the other stuff inside + your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or + VESA. If you have PCI, say Y, otherwise N. + +config PCI_DOMAINS + bool + default y + +config PCI_SYSCALL + def_bool PCI + +config IOMMU_HELPER + def_bool PCI + +config ALPHA_NONAME + bool + depends on ALPHA_BOOK1 || ALPHA_NONAME_CH + default y + help + The AXPpci33 (aka NoName), is based on the EB66 (includes the Multia + UDB). This design was produced by Digital's Technical OEM (TOEM) + group. It uses the 21066 processor running at 166MHz or 233MHz. It + is a baby-AT size, and runs from a standard PC power supply. It has + 5 ISA slots and 3 PCI slots (one pair are a shared slot). There are + 2 versions, with either PS/2 or large DIN connectors for the + keyboard. + +config ALPHA_EV4 + bool + depends on ALPHA_JENSEN || (ALPHA_SABLE && !ALPHA_GAMMA) || ALPHA_LYNX || ALPHA_NORITAKE && !ALPHA_PRIMO || ALPHA_MIKASA && !ALPHA_PRIMO || ALPHA_CABRIOLET || ALPHA_AVANTI_CH || ALPHA_EB64P_CH || ALPHA_XL || ALPHA_NONAME || ALPHA_EB66 || ALPHA_EB66P || ALPHA_P2K + default y if !ALPHA_LYNX + +config ALPHA_LCA + bool + depends on ALPHA_NONAME || ALPHA_EB66 || ALPHA_EB66P || ALPHA_P2K + default y + +config ALPHA_APECS + bool + depends on !ALPHA_PRIMO && (ALPHA_NORITAKE || ALPHA_MIKASA) || ALPHA_CABRIOLET || ALPHA_AVANTI_CH || ALPHA_EB64P_CH || ALPHA_XL + default y + +config ALPHA_EB64P + bool + depends on ALPHA_CABRIOLET || ALPHA_EB64P_CH + default y + help + Uses 21064 or 21064A and APECs. Has ISA and PCI expansion (3 ISA, + 2 PCI, one pair are on a shared slot). Supports 36-bit DRAM SIMs. + ISA bus generated by Intel SaturnI/O PCI-ISA bridge. On-board SCSI + (NCR 810 on PCI) Ethernet (Digital 21040), KBD, MOUSE (PS2 style), + SuperI/O (2S, 1P, FD), RTC/NVRAM. Boot ROM is EPROM. PC-AT size. + Runs from standard PC power supply. + +config ALPHA_EV5 + bool "EV5 CPU(s) (model 5/xxx)?" if ALPHA_LYNX + default y if ALPHA_RX164 || ALPHA_RAWHIDE || ALPHA_MIATA || ALPHA_LX164 || ALPHA_SX164 || ALPHA_RUFFIAN || ALPHA_SABLE && ALPHA_GAMMA || ALPHA_NORITAKE && ALPHA_PRIMO || ALPHA_MIKASA && ALPHA_PRIMO || ALPHA_PC164 || ALPHA_TAKARA || ALPHA_EB164 || ALPHA_ALCOR + +config ALPHA_EV4 + bool + default y if ALPHA_LYNX && !ALPHA_EV5 + +config ALPHA_CIA + bool + depends on ALPHA_MIATA || ALPHA_LX164 || ALPHA_SX164 || ALPHA_RUFFIAN || ALPHA_NORITAKE && ALPHA_PRIMO || ALPHA_MIKASA && ALPHA_PRIMO || ALPHA_PC164 || ALPHA_TAKARA || ALPHA_EB164 || ALPHA_ALCOR + default y + +config ALPHA_EV56 + bool "EV56 CPU (speed >= 366MHz)?" if ALPHA_ALCOR + default y if ALPHA_RX164 || ALPHA_MIATA || ALPHA_LX164 || ALPHA_SX164 || ALPHA_RUFFIAN || ALPHA_PC164 || ALPHA_TAKARA + +config ALPHA_EV56 + prompt "EV56 CPU (speed >= 333MHz)?" + depends on ALPHA_NORITAKE || ALPHA_PRIMO + +config ALPHA_EV56 + prompt "EV56 CPU (speed >= 400MHz)?" + depends on ALPHA_RAWHIDE + +config ALPHA_PRIMO + bool "EV5 CPU daughtercard (model 5/xxx)?" + depends on ALPHA_NORITAKE || ALPHA_MIKASA + help + Say Y if you have an AS 1000 5/xxx or an AS 1000A 5/xxx. + +config ALPHA_GAMMA + bool "EV5 CPU(s) (model 5/xxx)?" + depends on ALPHA_SABLE + help + Say Y if you have an AS 2000 5/xxx or an AS 2100 5/xxx. + +config ALPHA_GAMMA + bool + depends on ALPHA_LYNX + default y + +config ALPHA_T2 + bool + depends on ALPHA_SABLE || ALPHA_LYNX + default y + +config ALPHA_PYXIS + bool + depends on ALPHA_MIATA || ALPHA_LX164 || ALPHA_SX164 || ALPHA_RUFFIAN + default y + +config ALPHA_EV6 + bool + depends on ALPHA_NAUTILUS || ALPHA_WILDFIRE || ALPHA_TITAN || ALPHA_SHARK || ALPHA_DP264 || ALPHA_EIGER || ALPHA_MARVEL + default y + +config ALPHA_TSUNAMI + bool + depends on ALPHA_SHARK || ALPHA_DP264 || ALPHA_EIGER + default y + +config ALPHA_EV67 + bool "EV67 (or later) CPU (speed > 600MHz)?" if ALPHA_DP264 || ALPHA_EIGER + default y if ALPHA_NAUTILUS || ALPHA_WILDFIRE || ALPHA_TITAN || ALPHA_SHARK || ALPHA_MARVEL + help + Is this a machine based on the EV67 core? If in doubt, select N here + and the machine will be treated as an EV6. + +config ALPHA_MCPCIA + bool + depends on ALPHA_RAWHIDE + default y + +config ALPHA_POLARIS + bool + depends on ALPHA_RX164 + default y + +config ALPHA_IRONGATE + bool + depends on ALPHA_NAUTILUS + default y + +config GENERIC_HWEIGHT + bool + default y if !ALPHA_EV67 + +config ALPHA_AVANTI + bool + depends on ALPHA_XL || ALPHA_AVANTI_CH + default y + help + Avanti AS 200, AS 205, AS 250, AS 255, AS 300, and AS 400-based + Alphas. Info at + <http://www.unix-ag.org/Linux-Alpha/Architectures/Avanti.html>. + +config ALPHA_BROKEN_IRQ_MASK + bool + depends on ALPHA_GENERIC || ALPHA_PC164 + default y + +config VGA_HOSE + bool + depends on VGA_CONSOLE && (ALPHA_GENERIC || ALPHA_TITAN || ALPHA_MARVEL || ALPHA_TSUNAMI) + default y + help + Support VGA on an arbitrary hose; needed for several platforms + which always have multiple hoses, and whose consoles support it. + + +config ALPHA_SRM + bool "Use SRM as bootloader" if ALPHA_CABRIOLET || ALPHA_AVANTI_CH || ALPHA_EB64P || ALPHA_PC164 || ALPHA_TAKARA || ALPHA_EB164 || ALPHA_ALCOR || ALPHA_MIATA || ALPHA_LX164 || ALPHA_SX164 || ALPHA_NAUTILUS || ALPHA_NONAME + default y if ALPHA_JENSEN || ALPHA_MIKASA || ALPHA_SABLE || ALPHA_LYNX || ALPHA_NORITAKE || ALPHA_DP264 || ALPHA_RAWHIDE || ALPHA_EIGER || ALPHA_WILDFIRE || ALPHA_TITAN || ALPHA_SHARK || ALPHA_MARVEL + ---help--- + There are two different types of booting firmware on Alphas: SRM, + which is command line driven, and ARC, which uses menus and arrow + keys. Details about the Linux/Alpha booting process are contained in + the Linux/Alpha FAQ, accessible on the WWW from + <http://www.alphalinux.org/>. + + The usual way to load Linux on an Alpha machine is to use MILO + (a bootloader that lets you pass command line parameters to the + kernel just like lilo does for the x86 architecture) which can be + loaded either from ARC or can be installed directly as a permanent + firmware replacement from floppy (which requires changing a certain + jumper on the motherboard). If you want to do either of these, say N + here. If MILO doesn't work on your system (true for Jensen + motherboards), you can bypass it altogether and boot Linux directly + from an SRM console; say Y here in order to do that. Note that you + won't be able to boot from an IDE disk using SRM. + + If unsure, say N. + +config EISA + bool + depends on ALPHA_GENERIC || ALPHA_JENSEN || ALPHA_ALCOR || ALPHA_MIKASA || ALPHA_SABLE || ALPHA_LYNX || ALPHA_NORITAKE || ALPHA_RAWHIDE + default y + +config ARCH_MAY_HAVE_PC_FDC + def_bool y + +config SMP + bool "Symmetric multi-processing support" + depends on ALPHA_SABLE || ALPHA_LYNX || ALPHA_RAWHIDE || ALPHA_DP264 || ALPHA_WILDFIRE || ALPHA_TITAN || ALPHA_GENERIC || ALPHA_SHARK || ALPHA_MARVEL + select USE_GENERIC_SMP_HELPERS + ---help--- + This enables support for systems with more than one CPU. If you have + a system with only one CPU, like most personal computers, say N. If + you have a system with more than one CPU, say Y. + + If you say N here, the kernel will run on single and multiprocessor + machines, but will use only one CPU of a multiprocessor machine. If + you say Y here, the kernel will run on many, but not all, + singleprocessor machines. On a singleprocessor machine, the kernel + will run faster if you say N here. + + See also the SMP-HOWTO available at + <http://www.tldp.org/docs.html#howto>. + + If you don't know what to do here, say N. + +config HAVE_DEC_LOCK + bool + depends on SMP + default y + +config NR_CPUS + int "Maximum number of CPUs (2-32)" + range 2 32 + depends on SMP + default "32" if ALPHA_GENERIC || ALPHA_MARVEL + default "4" if !ALPHA_GENERIC && !ALPHA_MARVEL + help + MARVEL support can handle a maximum of 32 CPUs, all the others + with working support have a maximum of 4 CPUs. + +config ARCH_DISCONTIGMEM_ENABLE + bool "Discontiguous Memory Support (EXPERIMENTAL)" + depends on EXPERIMENTAL + help + Say Y to support efficient handling of discontiguous physical memory, + for architectures which are either NUMA (Non-Uniform Memory Access) + or have huge holes in the physical address space for other reasons. + See <file:Documentation/vm/numa> for more. + +source "mm/Kconfig" + +config NUMA + bool "NUMA Support (EXPERIMENTAL)" + depends on DISCONTIGMEM && BROKEN + help + Say Y to compile the kernel to support NUMA (Non-Uniform Memory + Access). This option is for configuring high-end multiprocessor + server machines. If in doubt, say N. + +config NODES_SHIFT + int + default "7" + depends on NEED_MULTIPLE_NODES + +# LARGE_VMALLOC is racy, if you *really* need it then fix it first +config ALPHA_LARGE_VMALLOC + bool + ---help--- + Process creation and other aspects of virtual memory management can + be streamlined if we restrict the kernel to one PGD for all vmalloc + allocations. This equates to about 8GB. + + Under normal circumstances, this is so far and above what is needed + as to be laughable. However, there are certain applications (such + as benchmark-grade in-kernel web serving) that can make use of as + much vmalloc space as is available. + + Say N unless you know you need gobs and gobs of vmalloc space. + +config VERBOSE_MCHECK + bool "Verbose Machine Checks" + +config VERBOSE_MCHECK_ON + int "Verbose Printing Mode (0=off, 1=on, 2=all)" + depends on VERBOSE_MCHECK + default 1 + ---help--- + This option allows the default printing mode to be set, and then + possibly overridden by a boot command argument. + + For example, if one wanted the option of printing verbose + machine checks, but wanted the default to be as if verbose + machine check printing was turned off, then one would choose + the printing mode to be 0. Then, upon reboot, one could add + the boot command line "verbose_mcheck=1" to get the normal + verbose machine check printing, or "verbose_mcheck=2" to get + the maximum information available. + + Take the default (1) unless you want more control or more info. + +config HZ + int + default 1200 if ALPHA_RAWHIDE + default 1024 + +source "drivers/pci/Kconfig" +source "drivers/eisa/Kconfig" + +source "drivers/pcmcia/Kconfig" + +config SRM_ENV + tristate "SRM environment through procfs" + depends on PROC_FS + ---help--- + If you enable this option, a subdirectory inside /proc called + /proc/srm_environment will give you access to the all important + SRM environment variables (those which have a name) and also + to all others (by their internal number). + + SRM is something like a BIOS for Alpha machines. There are some + other such BIOSes, like AlphaBIOS, which this driver cannot + support (hey, that's not SRM!). + + Despite the fact that this driver doesn't work on all Alphas (but + only on those which have SRM as their firmware), it's save to + build it even if your particular machine doesn't know about SRM + (or if you intend to compile a generic kernel). It will simply + not create those subdirectory in /proc (and give you some warning, + of course). + + This driver is also available as a module and will be called + srm_env then. + +source "fs/Kconfig.binfmt" + +endmenu + +source "net/Kconfig" + +source "drivers/Kconfig" + +source "fs/Kconfig" + +source "arch/alpha/Kconfig.debug" + +# DUMMY_CONSOLE may be defined in drivers/video/console/Kconfig +# but we also need it if VGA_HOSE is set +config DUMMY_CONSOLE + bool + depends on VGA_HOSE + default y + +source "security/Kconfig" + +source "crypto/Kconfig" + +source "lib/Kconfig" + diff --git a/arch/alpha/Kconfig.debug b/arch/alpha/Kconfig.debug new file mode 100644 index 00000000..3f6265f2 --- /dev/null +++ b/arch/alpha/Kconfig.debug @@ -0,0 +1,42 @@ +menu "Kernel hacking" + +source "lib/Kconfig.debug" + +config EARLY_PRINTK + bool + depends on ALPHA_GENERIC || ALPHA_SRM + default y + +config ALPHA_LEGACY_START_ADDRESS + bool "Legacy kernel start address" + depends on ALPHA_GENERIC + default n + ---help--- + The 2.4 kernel changed the kernel start address from 0x310000 + to 0x810000 to make room for the Wildfire's larger SRM console. + Recent consoles on Titan and Marvel machines also require the + extra room. + + If you're using aboot 0.7 or later, the bootloader will examine the + ELF headers to determine where to transfer control. Unfortunately, + most older bootloaders -- APB or MILO -- hardcoded the kernel start + address rather than examining the ELF headers, and the result is a + hard lockup. + + Say Y if you have a broken bootloader. Say N if you do not, or if + you wish to run on Wildfire, Titan, or Marvel. + +config ALPHA_LEGACY_START_ADDRESS + bool + depends on !ALPHA_GENERIC && !ALPHA_TITAN && !ALPHA_MARVEL && !ALPHA_WILDFIRE + default y + +config MATHEMU + tristate "Kernel FP software completion" if DEBUG_KERNEL && !SMP + default y if !DEBUG_KERNEL || SMP + help + This option is required for IEEE compliant floating point arithmetic + on the Alpha. The only time you would ever not say Y is to say M in + order to debug the code. Say Y unless you know what you are doing. + +endmenu diff --git a/arch/alpha/Makefile b/arch/alpha/Makefile new file mode 100644 index 00000000..4759fe75 --- /dev/null +++ b/arch/alpha/Makefile @@ -0,0 +1,69 @@ +# +# alpha/Makefile +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1994 by Linus Torvalds +# + +NM := $(NM) -B + +LDFLAGS_vmlinux := -static -N #-relax +CHECKFLAGS += -D__alpha__ -m64 +cflags-y := -pipe -mno-fp-regs -ffixed-8 -msmall-data +cflags-y += $(call cc-option, -fno-jump-tables) + +cpuflags-$(CONFIG_ALPHA_EV4) := -mcpu=ev4 +cpuflags-$(CONFIG_ALPHA_EV5) := -mcpu=ev5 +cpuflags-$(CONFIG_ALPHA_EV56) := -mcpu=ev56 +cpuflags-$(CONFIG_ALPHA_POLARIS) := -mcpu=pca56 +cpuflags-$(CONFIG_ALPHA_SX164) := -mcpu=pca56 +cpuflags-$(CONFIG_ALPHA_EV6) := -mcpu=ev6 +cpuflags-$(CONFIG_ALPHA_EV67) := -mcpu=ev67 +# If GENERIC, make sure to turn off any instruction set extensions that +# the host compiler might have on by default. Given that EV4 and EV5 +# have the same instruction set, prefer EV5 because an EV5 schedule is +# more likely to keep an EV4 processor busy than vice-versa. +cpuflags-$(CONFIG_ALPHA_GENERIC) := -mcpu=ev5 + +cflags-y += $(cpuflags-y) + + +# For TSUNAMI, we must have the assembler not emulate our instructions. +# The same is true for IRONGATE, POLARIS, PYXIS. +# BWX is most important, but we don't really want any emulation ever. +KBUILD_CFLAGS += $(cflags-y) -Wa,-mev6 + +head-y := arch/alpha/kernel/head.o + +core-y += arch/alpha/kernel/ arch/alpha/mm/ +core-$(CONFIG_MATHEMU) += arch/alpha/math-emu/ +drivers-$(CONFIG_OPROFILE) += arch/alpha/oprofile/ +libs-y += arch/alpha/lib/ + +# export what is needed by arch/alpha/boot/Makefile +LIBS_Y := $(patsubst %/, %/lib.a, $(libs-y)) +export LIBS_Y + +boot := arch/alpha/boot + +#Default target when executing make with no arguments +all boot: $(boot)/vmlinux.gz + +$(boot)/vmlinux.gz: vmlinux + $(Q)$(MAKE) $(build)=$(boot) $@ + +bootimage bootpfile bootpzfile: vmlinux + $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ + +archclean: + $(Q)$(MAKE) $(clean)=$(boot) + +define archhelp + echo '* boot - Compressed kernel image (arch/alpha/boot/vmlinux.gz)' + echo ' bootimage - SRM bootable image (arch/alpha/boot/bootimage)' + echo ' bootpfile - BOOTP bootable image (arch/alpha/boot/bootpfile)' + echo ' bootpzfile - compressed kernel BOOTP image (arch/alpha/boot/bootpzfile)' +endef diff --git a/arch/alpha/boot/Makefile b/arch/alpha/boot/Makefile new file mode 100644 index 00000000..cd143887 --- /dev/null +++ b/arch/alpha/boot/Makefile @@ -0,0 +1,116 @@ +# +# arch/alpha/boot/Makefile +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1994 by Linus Torvalds +# + +hostprogs-y := tools/mkbb tools/objstrip +targets := vmlinux.gz vmlinux \ + vmlinux.nh tools/lxboot tools/bootlx tools/bootph \ + tools/bootpzh bootloader bootpheader bootpzheader +OBJSTRIP := $(obj)/tools/objstrip + +# SRM bootable image. Copy to offset 512 of a partition. +$(obj)/bootimage: $(addprefix $(obj)/tools/,mkbb lxboot bootlx) $(obj)/vmlinux.nh + ( cat $(obj)/tools/lxboot $(obj)/tools/bootlx $(obj)/vmlinux.nh ) > $@ + $(obj)/tools/mkbb $@ $(obj)/tools/lxboot + @echo ' Bootimage $@ is ready' + +# BOOTP bootable image. Define INITRD during make to append initrd image. +$(obj)/bootpfile: $(obj)/tools/bootph $(obj)/vmlinux.nh + cat $(obj)/tools/bootph $(obj)/vmlinux.nh > $@ +ifdef INITRD + cat $(INITRD) >> $@ +endif + +# Compressed kernel BOOTP bootable image. +# Define INITRD during make to append initrd image. +$(obj)/bootpzfile: $(obj)/tools/bootpzh $(obj)/vmlinux.nh.gz + cat $(obj)/tools/bootpzh $(obj)/vmlinux.nh.gz > $@ +ifdef INITRD + cat $(INITRD) >> $@ +endif + +# Compressed kernel image +$(obj)/vmlinux.gz: $(obj)/vmlinux FORCE + $(call if_changed,gzip) + @echo ' Kernel $@ is ready' + +$(obj)/main.o: $(obj)/ksize.h +$(obj)/bootp.o: $(obj)/ksize.h +$(obj)/bootpz.o: $(obj)/kzsize.h + +$(obj)/ksize.h: $(obj)/vmlinux.nh FORCE + echo "#define KERNEL_SIZE `ls -l $(obj)/vmlinux.nh | awk '{print $$5}'`" > $@T +ifdef INITRD + [ -f $(INITRD) ] || exit 1 + echo "#define INITRD_IMAGE_SIZE `ls -l $(INITRD) | awk '{print $$5}'`" >> $@T +endif + cmp -s $@T $@ || mv -f $@T $@ + rm -f $@T + +$(obj)/kzsize.h: $(obj)/vmlinux.nh.gz FORCE + echo "#define KERNEL_SIZE `ls -l $(obj)/vmlinux.nh | awk '{print $$5}'`" > $@T + echo "#define KERNEL_Z_SIZE `ls -l $(obj)/vmlinux.nh.gz | awk '{print $$5}'`" >> $@T +ifdef INITRD + [ -f $(INITRD) ] || exit 1 + echo "#define INITRD_IMAGE_SIZE `ls -l $(INITRD) | awk '{print $$5}'`" >> $@T +endif + cmp -s $@T $@ || mv -f $@T $@ + rm -f $@T + +quiet_cmd_strip = STRIP $@ + cmd_strip = $(STRIP) -o $@ $< + +$(obj)/vmlinux: vmlinux FORCE + $(call if_changed,strip) + +quiet_cmd_objstrip = OBJSTRIP $@ + cmd_objstrip = $(OBJSTRIP) $(OSFLAGS_$(@F)) $< $@ + +OSFLAGS_vmlinux.nh := -v +OSFLAGS_lxboot := -p +OSFLAGS_bootlx := -vb +OSFLAGS_bootph := -vb +OSFLAGS_bootpzh := -vb + +$(obj)/vmlinux.nh: vmlinux $(OBJSTRIP) FORCE + $(call if_changed,objstrip) + +$(obj)/vmlinux.nh.gz: $(obj)/vmlinux.nh FORCE + $(call if_changed,gzip) + +$(obj)/tools/lxboot: $(obj)/bootloader $(OBJSTRIP) FORCE + $(call if_changed,objstrip) + +$(obj)/tools/bootlx: $(obj)/bootloader $(OBJSTRIP) FORCE + $(call if_changed,objstrip) + +$(obj)/tools/bootph: $(obj)/bootpheader $(OBJSTRIP) FORCE + $(call if_changed,objstrip) + +$(obj)/tools/bootpzh: $(obj)/bootpzheader $(OBJSTRIP) FORCE + $(call if_changed,objstrip) + +LDFLAGS_bootloader := -static -uvsprintf -T #-N -relax +LDFLAGS_bootpheader := -static -uvsprintf -T #-N -relax +LDFLAGS_bootpzheader := -static -uvsprintf -T #-N -relax + +OBJ_bootlx := $(obj)/head.o $(obj)/main.o +OBJ_bootph := $(obj)/head.o $(obj)/bootp.o +OBJ_bootpzh := $(obj)/head.o $(obj)/bootpz.o $(obj)/misc.o + +$(obj)/bootloader: $(obj)/bootloader.lds $(OBJ_bootlx) $(LIBS_Y) FORCE + $(call if_changed,ld) + +$(obj)/bootpheader: $(obj)/bootloader.lds $(OBJ_bootph) $(LIBS_Y) FORCE + $(call if_changed,ld) + +$(obj)/bootpzheader: $(obj)/bootloader.lds $(OBJ_bootpzh) $(LIBS_Y) FORCE + $(call if_changed,ld) + +$(obj)/misc.o: lib/inflate.c diff --git a/arch/alpha/boot/bootloader.lds b/arch/alpha/boot/bootloader.lds new file mode 100644 index 00000000..31c081ce --- /dev/null +++ b/arch/alpha/boot/bootloader.lds @@ -0,0 +1,24 @@ +OUTPUT_FORMAT("elf64-alpha") +ENTRY(__start) +printk = srm_printk; +SECTIONS +{ + . = 0x20000000; + .text : { *(.text) } + _etext = .; + PROVIDE (etext = .); + .rodata : { *(.rodata) *(.rodata.*) } + .data : { *(.data) CONSTRUCTORS } + .got : { *(.got) } + .sdata : { *(.sdata) } + _edata = .; + PROVIDE (edata = .); + .sbss : { *(.sbss) *(.scommon) } + .bss : { *(.bss) *(COMMON) } + _end = . ; + PROVIDE (end = .); + + .mdebug 0 : { *(.mdebug) } + .note 0 : { *(.note) } + .comment 0 : { *(.comment) } +} diff --git a/arch/alpha/boot/bootp.c b/arch/alpha/boot/bootp.c new file mode 100644 index 00000000..2a542a50 --- /dev/null +++ b/arch/alpha/boot/bootp.c @@ -0,0 +1,214 @@ +/* + * arch/alpha/boot/bootp.c + * + * Copyright (C) 1997 Jay Estabrook + * + * This file is used for creating a bootp file for the Linux/AXP kernel + * + * based significantly on the arch/alpha/boot/main.c of Linus Torvalds + */ +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <generated/utsrelease.h> +#include <linux/mm.h> + +#include <asm/console.h> +#include <asm/hwrpb.h> +#include <asm/pgtable.h> +#include <asm/io.h> + +#include <stdarg.h> + +#include "ksize.h" + +extern unsigned long switch_to_osf_pal(unsigned long nr, + struct pcb_struct * pcb_va, struct pcb_struct * pcb_pa, + unsigned long *vptb); + +extern void move_stack(unsigned long new_stack); + +struct hwrpb_struct *hwrpb = INIT_HWRPB; +static struct pcb_struct pcb_va[1]; + +/* + * Find a physical address of a virtual object.. + * + * This is easy using the virtual page table address. + */ + +static inline void * +find_pa(unsigned long *vptb, void *ptr) +{ + unsigned long address = (unsigned long) ptr; + unsigned long result; + + result = vptb[address >> 13]; + result >>= 32; + result <<= 13; + result |= address & 0x1fff; + return (void *) result; +} + +/* + * This function moves into OSF/1 pal-code, and has a temporary + * PCB for that. The kernel proper should replace this PCB with + * the real one as soon as possible. + * + * The page table muckery in here depends on the fact that the boot + * code has the L1 page table identity-map itself in the second PTE + * in the L1 page table. Thus the L1-page is virtually addressable + * itself (through three levels) at virtual address 0x200802000. + */ + +#define VPTB ((unsigned long *) 0x200000000) +#define L1 ((unsigned long *) 0x200802000) + +void +pal_init(void) +{ + unsigned long i, rev; + struct percpu_struct * percpu; + struct pcb_struct * pcb_pa; + + /* Create the dummy PCB. */ + pcb_va->ksp = 0; + pcb_va->usp = 0; + pcb_va->ptbr = L1[1] >> 32; + pcb_va->asn = 0; + pcb_va->pcc = 0; + pcb_va->unique = 0; + pcb_va->flags = 1; + pcb_va->res1 = 0; + pcb_va->res2 = 0; + pcb_pa = find_pa(VPTB, pcb_va); + + /* + * a0 = 2 (OSF) + * a1 = return address, but we give the asm the vaddr of the PCB + * a2 = physical addr of PCB + * a3 = new virtual page table pointer + * a4 = KSP (but the asm sets it) + */ + srm_printk("Switching to OSF PAL-code .. "); + + i = switch_to_osf_pal(2, pcb_va, pcb_pa, VPTB); + if (i) { + srm_printk("failed, code %ld\n", i); + __halt(); + } + + percpu = (struct percpu_struct *) + (INIT_HWRPB->processor_offset + (unsigned long) INIT_HWRPB); + rev = percpu->pal_revision = percpu->palcode_avail[2]; + + srm_printk("Ok (rev %lx)\n", rev); + + tbia(); /* do it directly in case we are SMP */ +} + +static inline void +load(unsigned long dst, unsigned long src, unsigned long count) +{ + memcpy((void *)dst, (void *)src, count); +} + +/* + * Start the kernel. + */ +static inline void +runkernel(void) +{ + __asm__ __volatile__( + "bis %0,%0,$27\n\t" + "jmp ($27)" + : /* no outputs: it doesn't even return */ + : "r" (START_ADDR)); +} + +extern char _end; +#define KERNEL_ORIGIN \ + ((((unsigned long)&_end) + 511) & ~511) + +void +start_kernel(void) +{ + /* + * Note that this crufty stuff with static and envval + * and envbuf is because: + * + * 1. Frequently, the stack is short, and we don't want to overrun; + * 2. Frequently the stack is where we are going to copy the kernel to; + * 3. A certain SRM console required the GET_ENV output to stack. + * ??? A comment in the aboot sources indicates that the GET_ENV + * destination must be quadword aligned. Might this explain the + * behaviour, rather than requiring output to the stack, which + * seems rather far-fetched. + */ + static long nbytes; + static char envval[256] __attribute__((aligned(8))); + static unsigned long initrd_start; + + srm_printk("Linux/AXP bootp loader for Linux " UTS_RELEASE "\n"); + if (INIT_HWRPB->pagesize != 8192) { + srm_printk("Expected 8kB pages, got %ldkB\n", + INIT_HWRPB->pagesize >> 10); + return; + } + if (INIT_HWRPB->vptb != (unsigned long) VPTB) { + srm_printk("Expected vptb at %p, got %p\n", + VPTB, (void *)INIT_HWRPB->vptb); + return; + } + pal_init(); + + /* The initrd must be page-aligned. See below for the + cause of the magic number 5. */ + initrd_start = ((START_ADDR + 5*KERNEL_SIZE + PAGE_SIZE) | + (PAGE_SIZE-1)) + 1; +#ifdef INITRD_IMAGE_SIZE + srm_printk("Initrd positioned at %#lx\n", initrd_start); +#endif + + /* + * Move the stack to a safe place to ensure it won't be + * overwritten by kernel image. + */ + move_stack(initrd_start - PAGE_SIZE); + + nbytes = callback_getenv(ENV_BOOTED_OSFLAGS, envval, sizeof(envval)); + if (nbytes < 0 || nbytes >= sizeof(envval)) { + nbytes = 0; + } + envval[nbytes] = '\0'; + srm_printk("Loading the kernel...'%s'\n", envval); + + /* NOTE: *no* callbacks or printouts from here on out!!! */ + + /* This is a hack, as some consoles seem to get virtual 20000000 (ie + * where the SRM console puts the kernel bootp image) memory + * overlapping physical memory where the kernel wants to be put, + * which causes real problems when attempting to copy the former to + * the latter... :-( + * + * So, we first move the kernel virtual-to-physical way above where + * we physically want the kernel to end up, then copy it from there + * to its final resting place... ;-} + * + * Sigh... */ + +#ifdef INITRD_IMAGE_SIZE + load(initrd_start, KERNEL_ORIGIN+KERNEL_SIZE, INITRD_IMAGE_SIZE); +#endif + load(START_ADDR+(4*KERNEL_SIZE), KERNEL_ORIGIN, KERNEL_SIZE); + load(START_ADDR, START_ADDR+(4*KERNEL_SIZE), KERNEL_SIZE); + + memset((char*)ZERO_PGE, 0, PAGE_SIZE); + strcpy((char*)ZERO_PGE, envval); +#ifdef INITRD_IMAGE_SIZE + ((long *)(ZERO_PGE+256))[0] = initrd_start; + ((long *)(ZERO_PGE+256))[1] = INITRD_IMAGE_SIZE; +#endif + + runkernel(); +} diff --git a/arch/alpha/boot/bootpz.c b/arch/alpha/boot/bootpz.c new file mode 100644 index 00000000..d6ad1916 --- /dev/null +++ b/arch/alpha/boot/bootpz.c @@ -0,0 +1,475 @@ +/* + * arch/alpha/boot/bootpz.c + * + * Copyright (C) 1997 Jay Estabrook + * + * This file is used for creating a compressed BOOTP file for the + * Linux/AXP kernel + * + * based significantly on the arch/alpha/boot/main.c of Linus Torvalds + * and the decompression code from MILO. + */ +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <generated/utsrelease.h> +#include <linux/mm.h> + +#include <asm/console.h> +#include <asm/hwrpb.h> +#include <asm/pgtable.h> +#include <asm/io.h> + +#include <stdarg.h> + +#include "kzsize.h" + +/* FIXME FIXME FIXME */ +#define MALLOC_AREA_SIZE 0x200000 /* 2MB for now */ +/* FIXME FIXME FIXME */ + + +/* + WARNING NOTE + + It is very possible that turning on additional messages may cause + kernel image corruption due to stack usage to do the printing. + +*/ + +#undef DEBUG_CHECK_RANGE +#undef DEBUG_ADDRESSES +#undef DEBUG_LAST_STEPS + +extern unsigned long switch_to_osf_pal(unsigned long nr, + struct pcb_struct * pcb_va, struct pcb_struct * pcb_pa, + unsigned long *vptb); + +extern int decompress_kernel(void* destination, void *source, + size_t ksize, size_t kzsize); + +extern void move_stack(unsigned long new_stack); + +struct hwrpb_struct *hwrpb = INIT_HWRPB; +static struct pcb_struct pcb_va[1]; + +/* + * Find a physical address of a virtual object.. + * + * This is easy using the virtual page table address. + */ +#define VPTB ((unsigned long *) 0x200000000) + +static inline unsigned long +find_pa(unsigned long address) +{ + unsigned long result; + + result = VPTB[address >> 13]; + result >>= 32; + result <<= 13; + result |= address & 0x1fff; + return result; +} + +int +check_range(unsigned long vstart, unsigned long vend, + unsigned long kstart, unsigned long kend) +{ + unsigned long vaddr, kaddr; + +#ifdef DEBUG_CHECK_RANGE + srm_printk("check_range: V[0x%lx:0x%lx] K[0x%lx:0x%lx]\n", + vstart, vend, kstart, kend); +#endif + /* do some range checking for detecting an overlap... */ + for (vaddr = vstart; vaddr <= vend; vaddr += PAGE_SIZE) + { + kaddr = (find_pa(vaddr) | PAGE_OFFSET); + if (kaddr >= kstart && kaddr <= kend) + { +#ifdef DEBUG_CHECK_RANGE + srm_printk("OVERLAP: vaddr 0x%lx kaddr 0x%lx" + " [0x%lx:0x%lx]\n", + vaddr, kaddr, kstart, kend); +#endif + return 1; + } + } + return 0; +} + +/* + * This function moves into OSF/1 pal-code, and has a temporary + * PCB for that. The kernel proper should replace this PCB with + * the real one as soon as possible. + * + * The page table muckery in here depends on the fact that the boot + * code has the L1 page table identity-map itself in the second PTE + * in the L1 page table. Thus the L1-page is virtually addressable + * itself (through three levels) at virtual address 0x200802000. + */ + +#define L1 ((unsigned long *) 0x200802000) + +void +pal_init(void) +{ + unsigned long i, rev; + struct percpu_struct * percpu; + struct pcb_struct * pcb_pa; + + /* Create the dummy PCB. */ + pcb_va->ksp = 0; + pcb_va->usp = 0; + pcb_va->ptbr = L1[1] >> 32; + pcb_va->asn = 0; + pcb_va->pcc = 0; + pcb_va->unique = 0; + pcb_va->flags = 1; + pcb_va->res1 = 0; + pcb_va->res2 = 0; + pcb_pa = (struct pcb_struct *)find_pa((unsigned long)pcb_va); + + /* + * a0 = 2 (OSF) + * a1 = return address, but we give the asm the vaddr of the PCB + * a2 = physical addr of PCB + * a3 = new virtual page table pointer + * a4 = KSP (but the asm sets it) + */ + srm_printk("Switching to OSF PAL-code... "); + + i = switch_to_osf_pal(2, pcb_va, pcb_pa, VPTB); + if (i) { + srm_printk("failed, code %ld\n", i); + __halt(); + } + + percpu = (struct percpu_struct *) + (INIT_HWRPB->processor_offset + (unsigned long) INIT_HWRPB); + rev = percpu->pal_revision = percpu->palcode_avail[2]; + + srm_printk("OK (rev %lx)\n", rev); + + tbia(); /* do it directly in case we are SMP */ +} + +/* + * Start the kernel. + */ +static inline void +runkernel(void) +{ + __asm__ __volatile__( + "bis %0,%0,$27\n\t" + "jmp ($27)" + : /* no outputs: it doesn't even return */ + : "r" (START_ADDR)); +} + +/* Must record the SP (it is virtual) on entry, so we can make sure + not to overwrite it during movement or decompression. */ +unsigned long SP_on_entry; + +/* Calculate the kernel image address based on the end of the BOOTP + bootstrapper (ie this program). +*/ +extern char _end; +#define KERNEL_ORIGIN \ + ((((unsigned long)&_end) + 511) & ~511) + +/* Round address to next higher page boundary. */ +#define NEXT_PAGE(a) (((a) | (PAGE_SIZE - 1)) + 1) + +#ifdef INITRD_IMAGE_SIZE +# define REAL_INITRD_SIZE INITRD_IMAGE_SIZE +#else +# define REAL_INITRD_SIZE 0 +#endif + +/* Defines from include/asm-alpha/system.h + + BOOT_ADDR Virtual address at which the consoles loads + the BOOTP image. + + KERNEL_START KSEG address at which the kernel is built to run, + which includes some initial data pages before the + code. + + START_ADDR KSEG address of the entry point of kernel code. + + ZERO_PGE KSEG address of page full of zeroes, but + upon entry to kerne cvan be expected + to hold the parameter list and possible + INTRD information. + + These are used in the local defines below. +*/ + + +/* Virtual addresses for the BOOTP image. Note that this includes the + bootstrapper code as well as the compressed kernel image, and + possibly the INITRD image. + + Oh, and do NOT forget the STACK, which appears to be placed virtually + beyond the end of the loaded image. +*/ +#define V_BOOT_IMAGE_START BOOT_ADDR +#define V_BOOT_IMAGE_END SP_on_entry + +/* Virtual addresses for just the bootstrapper part of the BOOTP image. */ +#define V_BOOTSTRAPPER_START BOOT_ADDR +#define V_BOOTSTRAPPER_END KERNEL_ORIGIN + +/* Virtual addresses for just the data part of the BOOTP + image. This may also include the INITRD image, but always + includes the STACK. +*/ +#define V_DATA_START KERNEL_ORIGIN +#define V_INITRD_START (KERNEL_ORIGIN + KERNEL_Z_SIZE) +#define V_INTRD_END (V_INITRD_START + REAL_INITRD_SIZE) +#define V_DATA_END V_BOOT_IMAGE_END + +/* KSEG addresses for the uncompressed kernel. + + Note that the end address includes workspace for the decompression. + Note also that the DATA_START address is ZERO_PGE, to which we write + just before jumping to the kernel image at START_ADDR. + */ +#define K_KERNEL_DATA_START ZERO_PGE +#define K_KERNEL_IMAGE_START START_ADDR +#define K_KERNEL_IMAGE_END (START_ADDR + KERNEL_SIZE) + +/* Define to where we may have to decompress the kernel image, before + we move it to the final position, in case of overlap. This will be + above the final position of the kernel. + + Regardless of overlap, we move the INITRD image to the end of this + copy area, because there needs to be a buffer area after the kernel + for "bootmem" anyway. +*/ +#define K_COPY_IMAGE_START NEXT_PAGE(K_KERNEL_IMAGE_END) +/* Reserve one page below INITRD for the new stack. */ +#define K_INITRD_START \ + NEXT_PAGE(K_COPY_IMAGE_START + KERNEL_SIZE + PAGE_SIZE) +#define K_COPY_IMAGE_END \ + (K_INITRD_START + REAL_INITRD_SIZE + MALLOC_AREA_SIZE) +#define K_COPY_IMAGE_SIZE \ + NEXT_PAGE(K_COPY_IMAGE_END - K_COPY_IMAGE_START) + +void +start_kernel(void) +{ + int must_move = 0; + + /* Initialize these for the decompression-in-place situation, + which is the smallest amount of work and most likely to + occur when using the normal START_ADDR of the kernel + (currently set to 16MB, to clear all console code. + */ + unsigned long uncompressed_image_start = K_KERNEL_IMAGE_START; + unsigned long uncompressed_image_end = K_KERNEL_IMAGE_END; + + unsigned long initrd_image_start = K_INITRD_START; + + /* + * Note that this crufty stuff with static and envval + * and envbuf is because: + * + * 1. Frequently, the stack is short, and we don't want to overrun; + * 2. Frequently the stack is where we are going to copy the kernel to; + * 3. A certain SRM console required the GET_ENV output to stack. + * ??? A comment in the aboot sources indicates that the GET_ENV + * destination must be quadword aligned. Might this explain the + * behaviour, rather than requiring output to the stack, which + * seems rather far-fetched. + */ + static long nbytes; + static char envval[256] __attribute__((aligned(8))); + register unsigned long asm_sp asm("30"); + + SP_on_entry = asm_sp; + + srm_printk("Linux/Alpha BOOTPZ Loader for Linux " UTS_RELEASE "\n"); + + /* Validity check the HWRPB. */ + if (INIT_HWRPB->pagesize != 8192) { + srm_printk("Expected 8kB pages, got %ldkB\n", + INIT_HWRPB->pagesize >> 10); + return; + } + if (INIT_HWRPB->vptb != (unsigned long) VPTB) { + srm_printk("Expected vptb at %p, got %p\n", + VPTB, (void *)INIT_HWRPB->vptb); + return; + } + + /* PALcode (re)initialization. */ + pal_init(); + + /* Get the parameter list from the console environment variable. */ + nbytes = callback_getenv(ENV_BOOTED_OSFLAGS, envval, sizeof(envval)); + if (nbytes < 0 || nbytes >= sizeof(envval)) { + nbytes = 0; + } + envval[nbytes] = '\0'; + +#ifdef DEBUG_ADDRESSES + srm_printk("START_ADDR 0x%lx\n", START_ADDR); + srm_printk("KERNEL_ORIGIN 0x%lx\n", KERNEL_ORIGIN); + srm_printk("KERNEL_SIZE 0x%x\n", KERNEL_SIZE); + srm_printk("KERNEL_Z_SIZE 0x%x\n", KERNEL_Z_SIZE); +#endif + + /* Since all the SRM consoles load the BOOTP image at virtual + * 0x20000000, we have to ensure that the physical memory + * pages occupied by that image do NOT overlap the physical + * address range where the kernel wants to be run. This + * causes real problems when attempting to cdecompress the + * former into the latter... :-( + * + * So, we may have to decompress/move the kernel/INITRD image + * virtual-to-physical someplace else first before moving + * kernel /INITRD to their final resting places... ;-} + * + * Sigh... + */ + + /* First, check to see if the range of addresses occupied by + the bootstrapper part of the BOOTP image include any of the + physical pages into which the kernel will be placed for + execution. + + We only need check on the final kernel image range, since we + will put the INITRD someplace that we can be sure is not + in conflict. + */ + if (check_range(V_BOOTSTRAPPER_START, V_BOOTSTRAPPER_END, + K_KERNEL_DATA_START, K_KERNEL_IMAGE_END)) + { + srm_printk("FATAL ERROR: overlap of bootstrapper code\n"); + __halt(); + } + + /* Next, check to see if the range of addresses occupied by + the compressed kernel/INITRD/stack portion of the BOOTP + image include any of the physical pages into which the + decompressed kernel or the INITRD will be placed for + execution. + */ + if (check_range(V_DATA_START, V_DATA_END, + K_KERNEL_IMAGE_START, K_COPY_IMAGE_END)) + { +#ifdef DEBUG_ADDRESSES + srm_printk("OVERLAP: cannot decompress in place\n"); +#endif + uncompressed_image_start = K_COPY_IMAGE_START; + uncompressed_image_end = K_COPY_IMAGE_END; + must_move = 1; + + /* Finally, check to see if the range of addresses + occupied by the compressed kernel/INITRD part of + the BOOTP image include any of the physical pages + into which that part is to be copied for + decompression. + */ + while (check_range(V_DATA_START, V_DATA_END, + uncompressed_image_start, + uncompressed_image_end)) + { +#if 0 + uncompressed_image_start += K_COPY_IMAGE_SIZE; + uncompressed_image_end += K_COPY_IMAGE_SIZE; + initrd_image_start += K_COPY_IMAGE_SIZE; +#else + /* Keep as close as possible to end of BOOTP image. */ + uncompressed_image_start += PAGE_SIZE; + uncompressed_image_end += PAGE_SIZE; + initrd_image_start += PAGE_SIZE; +#endif + } + } + + srm_printk("Starting to load the kernel with args '%s'\n", envval); + +#ifdef DEBUG_ADDRESSES + srm_printk("Decompressing the kernel...\n" + "...from 0x%lx to 0x%lx size 0x%x\n", + V_DATA_START, + uncompressed_image_start, + KERNEL_SIZE); +#endif + decompress_kernel((void *)uncompressed_image_start, + (void *)V_DATA_START, + KERNEL_SIZE, KERNEL_Z_SIZE); + + /* + * Now, move things to their final positions, if/as required. + */ + +#ifdef INITRD_IMAGE_SIZE + + /* First, we always move the INITRD image, if present. */ +#ifdef DEBUG_ADDRESSES + srm_printk("Moving the INITRD image...\n" + " from 0x%lx to 0x%lx size 0x%x\n", + V_INITRD_START, + initrd_image_start, + INITRD_IMAGE_SIZE); +#endif + memcpy((void *)initrd_image_start, (void *)V_INITRD_START, + INITRD_IMAGE_SIZE); + +#endif /* INITRD_IMAGE_SIZE */ + + /* Next, we may have to move the uncompressed kernel to the + final destination. + */ + if (must_move) { +#ifdef DEBUG_ADDRESSES + srm_printk("Moving the uncompressed kernel...\n" + "...from 0x%lx to 0x%lx size 0x%x\n", + uncompressed_image_start, + K_KERNEL_IMAGE_START, + (unsigned)KERNEL_SIZE); +#endif + /* + * Move the stack to a safe place to ensure it won't be + * overwritten by kernel image. + */ + move_stack(initrd_image_start - PAGE_SIZE); + + memcpy((void *)K_KERNEL_IMAGE_START, + (void *)uncompressed_image_start, KERNEL_SIZE); + } + + /* Clear the zero page, then move the argument list in. */ +#ifdef DEBUG_LAST_STEPS + srm_printk("Preparing ZERO_PGE...\n"); +#endif + memset((char*)ZERO_PGE, 0, PAGE_SIZE); + strcpy((char*)ZERO_PGE, envval); + +#ifdef INITRD_IMAGE_SIZE + +#ifdef DEBUG_LAST_STEPS + srm_printk("Preparing INITRD info...\n"); +#endif + /* Finally, set the INITRD paramenters for the kernel. */ + ((long *)(ZERO_PGE+256))[0] = initrd_image_start; + ((long *)(ZERO_PGE+256))[1] = INITRD_IMAGE_SIZE; + +#endif /* INITRD_IMAGE_SIZE */ + +#ifdef DEBUG_LAST_STEPS + srm_printk("Doing 'runkernel()'...\n"); +#endif + runkernel(); +} + + /* dummy function, should never be called. */ +void *__kmalloc(size_t size, gfp_t flags) +{ + return (void *)NULL; +} diff --git a/arch/alpha/boot/head.S b/arch/alpha/boot/head.S new file mode 100644 index 00000000..b06812bc --- /dev/null +++ b/arch/alpha/boot/head.S @@ -0,0 +1,122 @@ +/* + * arch/alpha/boot/head.S + * + * initial bootloader stuff.. + */ + + + .set noreorder + .globl __start + .ent __start +__start: + br $29,2f +2: ldgp $29,0($29) + jsr $26,start_kernel + call_pal PAL_halt + .end __start + + .align 5 + .globl wrent + .ent wrent +wrent: + .prologue 0 + call_pal PAL_wrent + ret ($26) + .end wrent + + .align 5 + .globl wrkgp + .ent wrkgp +wrkgp: + .prologue 0 + call_pal PAL_wrkgp + ret ($26) + .end wrkgp + + .align 5 + .globl switch_to_osf_pal + .ent switch_to_osf_pal +switch_to_osf_pal: + subq $30,128,$30 + .frame $30,128,$26 + stq $26,0($30) + stq $1,8($30) + stq $2,16($30) + stq $3,24($30) + stq $4,32($30) + stq $5,40($30) + stq $6,48($30) + stq $7,56($30) + stq $8,64($30) + stq $9,72($30) + stq $10,80($30) + stq $11,88($30) + stq $12,96($30) + stq $13,104($30) + stq $14,112($30) + stq $15,120($30) + .prologue 0 + + stq $30,0($17) /* save KSP in PCB */ + + bis $30,$30,$20 /* a4 = KSP */ + br $17,1f + + ldq $26,0($30) + ldq $1,8($30) + ldq $2,16($30) + ldq $3,24($30) + ldq $4,32($30) + ldq $5,40($30) + ldq $6,48($30) + ldq $7,56($30) + ldq $8,64($30) + ldq $9,72($30) + ldq $10,80($30) + ldq $11,88($30) + ldq $12,96($30) + ldq $13,104($30) + ldq $14,112($30) + ldq $15,120($30) + addq $30,128,$30 + ret ($26) +1: call_pal PAL_swppal + .end switch_to_osf_pal + + .align 3 + .globl tbi + .ent tbi +tbi: + .prologue 0 + call_pal PAL_tbi + ret ($26) + .end tbi + + .align 3 + .globl halt + .ent halt +halt: + .prologue 0 + call_pal PAL_halt + .end halt + +/* $16 - new stack page */ + .align 3 + .globl move_stack + .ent move_stack +move_stack: + .prologue 0 + lda $0, 0x1fff($31) + and $0, $30, $1 /* Stack offset */ + or $1, $16, $16 /* New stack pointer */ + mov $30, $1 + mov $16, $2 +1: ldq $3, 0($1) /* Move the stack */ + addq $1, 8, $1 + stq $3, 0($2) + and $0, $1, $4 + addq $2, 8, $2 + bne $4, 1b + mov $16, $30 + ret ($26) + .end move_stack diff --git a/arch/alpha/boot/main.c b/arch/alpha/boot/main.c new file mode 100644 index 00000000..3baf2d1e --- /dev/null +++ b/arch/alpha/boot/main.c @@ -0,0 +1,191 @@ +/* + * arch/alpha/boot/main.c + * + * Copyright (C) 1994, 1995 Linus Torvalds + * + * This file is the bootloader for the Linux/AXP kernel + */ +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <generated/utsrelease.h> +#include <linux/mm.h> + +#include <asm/console.h> +#include <asm/hwrpb.h> +#include <asm/pgtable.h> + +#include <stdarg.h> + +#include "ksize.h" + +extern int vsprintf(char *, const char *, va_list); +extern unsigned long switch_to_osf_pal(unsigned long nr, + struct pcb_struct * pcb_va, struct pcb_struct * pcb_pa, + unsigned long *vptb); +struct hwrpb_struct *hwrpb = INIT_HWRPB; +static struct pcb_struct pcb_va[1]; + +/* + * Find a physical address of a virtual object.. + * + * This is easy using the virtual page table address. + */ + +static inline void * +find_pa(unsigned long *vptb, void *ptr) +{ + unsigned long address = (unsigned long) ptr; + unsigned long result; + + result = vptb[address >> 13]; + result >>= 32; + result <<= 13; + result |= address & 0x1fff; + return (void *) result; +} + +/* + * This function moves into OSF/1 pal-code, and has a temporary + * PCB for that. The kernel proper should replace this PCB with + * the real one as soon as possible. + * + * The page table muckery in here depends on the fact that the boot + * code has the L1 page table identity-map itself in the second PTE + * in the L1 page table. Thus the L1-page is virtually addressable + * itself (through three levels) at virtual address 0x200802000. + */ + +#define VPTB ((unsigned long *) 0x200000000) +#define L1 ((unsigned long *) 0x200802000) + +void +pal_init(void) +{ + unsigned long i, rev; + struct percpu_struct * percpu; + struct pcb_struct * pcb_pa; + + /* Create the dummy PCB. */ + pcb_va->ksp = 0; + pcb_va->usp = 0; + pcb_va->ptbr = L1[1] >> 32; + pcb_va->asn = 0; + pcb_va->pcc = 0; + pcb_va->unique = 0; + pcb_va->flags = 1; + pcb_va->res1 = 0; + pcb_va->res2 = 0; + pcb_pa = find_pa(VPTB, pcb_va); + + /* + * a0 = 2 (OSF) + * a1 = return address, but we give the asm the vaddr of the PCB + * a2 = physical addr of PCB + * a3 = new virtual page table pointer + * a4 = KSP (but the asm sets it) + */ + srm_printk("Switching to OSF PAL-code .. "); + + i = switch_to_osf_pal(2, pcb_va, pcb_pa, VPTB); + if (i) { + srm_printk("failed, code %ld\n", i); + __halt(); + } + + percpu = (struct percpu_struct *) + (INIT_HWRPB->processor_offset + (unsigned long) INIT_HWRPB); + rev = percpu->pal_revision = percpu->palcode_avail[2]; + + srm_printk("Ok (rev %lx)\n", rev); + + tbia(); /* do it directly in case we are SMP */ +} + +static inline long openboot(void) +{ + char bootdev[256]; + long result; + + result = callback_getenv(ENV_BOOTED_DEV, bootdev, 255); + if (result < 0) + return result; + return callback_open(bootdev, result & 255); +} + +static inline long close(long dev) +{ + return callback_close(dev); +} + +static inline long load(long dev, unsigned long addr, unsigned long count) +{ + char bootfile[256]; + extern char _end; + long result, boot_size = &_end - (char *) BOOT_ADDR; + + result = callback_getenv(ENV_BOOTED_FILE, bootfile, 255); + if (result < 0) + return result; + result &= 255; + bootfile[result] = '\0'; + if (result) + srm_printk("Boot file specification (%s) not implemented\n", + bootfile); + return callback_read(dev, count, (void *)addr, boot_size/512 + 1); +} + +/* + * Start the kernel. + */ +static void runkernel(void) +{ + __asm__ __volatile__( + "bis %1,%1,$30\n\t" + "bis %0,%0,$26\n\t" + "ret ($26)" + : /* no outputs: it doesn't even return */ + : "r" (START_ADDR), + "r" (PAGE_SIZE + INIT_STACK)); +} + +void start_kernel(void) +{ + long i; + long dev; + int nbytes; + char envval[256]; + + srm_printk("Linux/AXP bootloader for Linux " UTS_RELEASE "\n"); + if (INIT_HWRPB->pagesize != 8192) { + srm_printk("Expected 8kB pages, got %ldkB\n", INIT_HWRPB->pagesize >> 10); + return; + } + pal_init(); + dev = openboot(); + if (dev < 0) { + srm_printk("Unable to open boot device: %016lx\n", dev); + return; + } + dev &= 0xffffffff; + srm_printk("Loading vmlinux ..."); + i = load(dev, START_ADDR, KERNEL_SIZE); + close(dev); + if (i != KERNEL_SIZE) { + srm_printk("Failed (%lx)\n", i); + return; + } + + nbytes = callback_getenv(ENV_BOOTED_OSFLAGS, envval, sizeof(envval)); + if (nbytes < 0) { + nbytes = 0; + } + envval[nbytes] = '\0'; + strcpy((char*)ZERO_PGE, envval); + + srm_printk(" Ok\nNow booting the kernel\n"); + runkernel(); + for (i = 0 ; i < 0x100000000 ; i++) + /* nothing */; + __halt(); +} diff --git a/arch/alpha/boot/misc.c b/arch/alpha/boot/misc.c new file mode 100644 index 00000000..3ff9a957 --- /dev/null +++ b/arch/alpha/boot/misc.c @@ -0,0 +1,173 @@ +/* + * misc.c + * + * This is a collection of several routines from gzip-1.0.3 + * adapted for Linux. + * + * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994 + * + * Modified for ARM Linux by Russell King + * + * Nicolas Pitre <nico@visuaide.com> 1999/04/14 : + * For this code to run directly from Flash, all constant variables must + * be marked with 'const' and all other variables initialized at run-time + * only. This way all non constant variables will end up in the bss segment, + * which should point to addresses in RAM and cleared to 0 on start. + * This allows for a much quicker boot time. + * + * Modified for Alpha, from the ARM version, by Jay Estabrook 2003. + */ + +#include <linux/kernel.h> +#include <linux/slab.h> + +#include <asm/uaccess.h> + +#define memzero(s,n) memset ((s),0,(n)) +#define puts srm_printk +extern long srm_printk(const char *, ...) + __attribute__ ((format (printf, 1, 2))); + +/* + * gzip delarations + */ +#define OF(args) args +#define STATIC static + +typedef unsigned char uch; +typedef unsigned short ush; +typedef unsigned long ulg; + +#define WSIZE 0x8000 /* Window size must be at least 32k, */ + /* and a power of two */ + +static uch *inbuf; /* input buffer */ +static uch *window; /* Sliding window buffer */ + +static unsigned insize; /* valid bytes in inbuf */ +static unsigned inptr; /* index of next byte to be processed in inbuf */ +static unsigned outcnt; /* bytes in output buffer */ + +/* gzip flag byte */ +#define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */ +#define CONTINUATION 0x02 /* bit 1 set: continuation of multi-part gzip file */ +#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */ +#define ORIG_NAME 0x08 /* bit 3 set: original file name present */ +#define COMMENT 0x10 /* bit 4 set: file comment present */ +#define ENCRYPTED 0x20 /* bit 5 set: file is encrypted */ +#define RESERVED 0xC0 /* bit 6,7: reserved */ + +#define get_byte() (inptr < insize ? inbuf[inptr++] : fill_inbuf()) + +/* Diagnostic functions */ +#ifdef DEBUG +# define Assert(cond,msg) {if(!(cond)) error(msg);} +# define Trace(x) fprintf x +# define Tracev(x) {if (verbose) fprintf x ;} +# define Tracevv(x) {if (verbose>1) fprintf x ;} +# define Tracec(c,x) {if (verbose && (c)) fprintf x ;} +# define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;} +#else +# define Assert(cond,msg) +# define Trace(x) +# define Tracev(x) +# define Tracevv(x) +# define Tracec(c,x) +# define Tracecv(c,x) +#endif + +static int fill_inbuf(void); +static void flush_window(void); +static void error(char *m); + +static char *input_data; +static int input_data_size; + +static uch *output_data; +static ulg output_ptr; +static ulg bytes_out; + +static void error(char *m); +static void gzip_mark(void **); +static void gzip_release(void **); + +extern int end; +static ulg free_mem_ptr; +static ulg free_mem_end_ptr; + +#define HEAP_SIZE 0x3000 + +#include "../../../lib/inflate.c" + +/* =========================================================================== + * Fill the input buffer. This is called only when the buffer is empty + * and at least one byte is really needed. + */ +int fill_inbuf(void) +{ + if (insize != 0) + error("ran out of input data"); + + inbuf = input_data; + insize = input_data_size; + + inptr = 1; + return inbuf[0]; +} + +/* =========================================================================== + * Write the output window window[0..outcnt-1] and update crc and bytes_out. + * (Used for the decompressed data only.) + */ +void flush_window(void) +{ + ulg c = crc; + unsigned n; + uch *in, *out, ch; + + in = window; + out = &output_data[output_ptr]; + for (n = 0; n < outcnt; n++) { + ch = *out++ = *in++; + c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8); + } + crc = c; + bytes_out += (ulg)outcnt; + output_ptr += (ulg)outcnt; + outcnt = 0; +/* puts("."); */ +} + +static void error(char *x) +{ + puts("\n\n"); + puts(x); + puts("\n\n -- System halted"); + + while(1); /* Halt */ +} + +unsigned int +decompress_kernel(void *output_start, + void *input_start, + size_t ksize, + size_t kzsize) +{ + output_data = (uch *)output_start; + input_data = (uch *)input_start; + input_data_size = kzsize; /* use compressed size */ + + /* FIXME FIXME FIXME */ + free_mem_ptr = (ulg)output_start + ksize; + free_mem_end_ptr = (ulg)output_start + ksize + 0x200000; + /* FIXME FIXME FIXME */ + + /* put in temp area to reduce initial footprint */ + window = malloc(WSIZE); + + makecrc(); +/* puts("Uncompressing Linux..."); */ + gunzip(); +/* puts(" done, booting the kernel.\n"); */ + return output_ptr; +} diff --git a/arch/alpha/boot/tools/mkbb.c b/arch/alpha/boot/tools/mkbb.c new file mode 100644 index 00000000..1185778e --- /dev/null +++ b/arch/alpha/boot/tools/mkbb.c @@ -0,0 +1,152 @@ +/* This utility makes a bootblock suitable for the SRM console/miniloader */ + +/* Usage: + * mkbb <device> <lxboot> + * + * Where <device> is the name of the device to install the bootblock on, + * and <lxboot> is the name of a bootblock to merge in. This bootblock + * contains the offset and size of the bootloader. It must be exactly + * 512 bytes long. + */ + +#include <fcntl.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> + +/* Minimal definition of disklabel, so we don't have to include + * asm/disklabel.h (confuses make) + */ +#ifndef MAXPARTITIONS +#define MAXPARTITIONS 8 /* max. # of partitions */ +#endif + +#ifndef u8 +#define u8 unsigned char +#endif + +#ifndef u16 +#define u16 unsigned short +#endif + +#ifndef u32 +#define u32 unsigned int +#endif + +struct disklabel { + u32 d_magic; /* must be DISKLABELMAGIC */ + u16 d_type, d_subtype; + u8 d_typename[16]; + u8 d_packname[16]; + u32 d_secsize; + u32 d_nsectors; + u32 d_ntracks; + u32 d_ncylinders; + u32 d_secpercyl; + u32 d_secprtunit; + u16 d_sparespertrack; + u16 d_sparespercyl; + u32 d_acylinders; + u16 d_rpm, d_interleave, d_trackskew, d_cylskew; + u32 d_headswitch, d_trkseek, d_flags; + u32 d_drivedata[5]; + u32 d_spare[5]; + u32 d_magic2; /* must be DISKLABELMAGIC */ + u16 d_checksum; + u16 d_npartitions; + u32 d_bbsize, d_sbsize; + struct d_partition { + u32 p_size; + u32 p_offset; + u32 p_fsize; + u8 p_fstype; + u8 p_frag; + u16 p_cpg; + } d_partitions[MAXPARTITIONS]; +}; + + +typedef union __bootblock { + struct { + char __pad1[64]; + struct disklabel __label; + } __u1; + struct { + unsigned long __pad2[63]; + unsigned long __checksum; + } __u2; + char bootblock_bytes[512]; + unsigned long bootblock_quadwords[64]; +} bootblock; + +#define bootblock_label __u1.__label +#define bootblock_checksum __u2.__checksum + +int main(int argc, char ** argv) +{ + bootblock bootblock_from_disk; + bootblock bootloader_image; + int dev, fd; + int i; + int nread; + + /* Make sure of the arg count */ + if(argc != 3) { + fprintf(stderr, "Usage: %s device lxboot\n", argv[0]); + exit(0); + } + + /* First, open the device and make sure it's accessible */ + dev = open(argv[1], O_RDWR); + if(dev < 0) { + perror(argv[1]); + exit(0); + } + + /* Now open the lxboot and make sure it's reasonable */ + fd = open(argv[2], O_RDONLY); + if(fd < 0) { + perror(argv[2]); + close(dev); + exit(0); + } + + /* Read in the lxboot */ + nread = read(fd, &bootloader_image, sizeof(bootblock)); + if(nread != sizeof(bootblock)) { + perror("lxboot read"); + fprintf(stderr, "expected %zd, got %d\n", sizeof(bootblock), nread); + exit(0); + } + + /* Read in the bootblock from disk. */ + nread = read(dev, &bootblock_from_disk, sizeof(bootblock)); + if(nread != sizeof(bootblock)) { + perror("bootblock read"); + fprintf(stderr, "expected %zd, got %d\n", sizeof(bootblock), nread); + exit(0); + } + + /* Swap the bootblock's disklabel into the bootloader */ + bootloader_image.bootblock_label = bootblock_from_disk.bootblock_label; + + /* Calculate the bootblock checksum */ + bootloader_image.bootblock_checksum = 0; + for(i = 0; i < 63; i++) { + bootloader_image.bootblock_checksum += + bootloader_image.bootblock_quadwords[i]; + } + + /* Write the whole thing out! */ + lseek(dev, 0L, SEEK_SET); + if(write(dev, &bootloader_image, sizeof(bootblock)) != sizeof(bootblock)) { + perror("bootblock write"); + exit(0); + } + + close(fd); + close(dev); + exit(0); +} + + diff --git a/arch/alpha/boot/tools/objstrip.c b/arch/alpha/boot/tools/objstrip.c new file mode 100644 index 00000000..367d53d0 --- /dev/null +++ b/arch/alpha/boot/tools/objstrip.c @@ -0,0 +1,280 @@ +/* + * arch/alpha/boot/tools/objstrip.c + * + * Strip the object file headers/trailers from an executable (ELF or ECOFF). + * + * Copyright (C) 1996 David Mosberger-Tang. + */ +/* + * Converts an ECOFF or ELF object file into a bootable file. The + * object file must be a OMAGIC file (i.e., data and bss follow immediately + * behind the text). See DEC "Assembly Language Programmer's Guide" + * documentation for details. The SRM boot process is documented in + * the Alpha AXP Architecture Reference Manual, Second Edition by + * Richard L. Sites and Richard T. Witek. + */ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> + +#include <sys/fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include <linux/a.out.h> +#include <linux/coff.h> +#include <linux/param.h> +#ifdef __ELF__ +# include <linux/elf.h> +#endif + +/* bootfile size must be multiple of BLOCK_SIZE: */ +#define BLOCK_SIZE 512 + +const char * prog_name; + + +static void +usage (void) +{ + fprintf(stderr, + "usage: %s [-v] -p file primary\n" + " %s [-vb] file [secondary]\n", prog_name, prog_name); + exit(1); +} + + +int +main (int argc, char *argv[]) +{ + size_t nwritten, tocopy, n, mem_size, fil_size, pad = 0; + int fd, ofd, i, j, verbose = 0, primary = 0; + char buf[8192], *inname; + struct exec * aout; /* includes file & aout header */ + long offset; +#ifdef __ELF__ + struct elfhdr *elf; + struct elf_phdr *elf_phdr; /* program header */ + unsigned long long e_entry; +#endif + + prog_name = argv[0]; + + for (i = 1; i < argc && argv[i][0] == '-'; ++i) { + for (j = 1; argv[i][j]; ++j) { + switch (argv[i][j]) { + case 'v': + verbose = ~verbose; + break; + + case 'b': + pad = BLOCK_SIZE; + break; + + case 'p': + primary = 1; /* make primary bootblock */ + break; + } + } + } + + if (i >= argc) { + usage(); + } + inname = argv[i++]; + + fd = open(inname, O_RDONLY); + if (fd == -1) { + perror("open"); + exit(1); + } + + ofd = 1; + if (i < argc) { + ofd = open(argv[i++], O_WRONLY | O_CREAT | O_TRUNC, 0666); + if (ofd == -1) { + perror("open"); + exit(1); + } + } + + if (primary) { + /* generate bootblock for primary loader */ + + unsigned long bb[64], sum = 0; + struct stat st; + off_t size; + int i; + + if (ofd == 1) { + usage(); + } + + if (fstat(fd, &st) == -1) { + perror("fstat"); + exit(1); + } + + size = (st.st_size + BLOCK_SIZE - 1) & ~(BLOCK_SIZE - 1); + memset(bb, 0, sizeof(bb)); + strcpy((char *) bb, "Linux SRM bootblock"); + bb[60] = size / BLOCK_SIZE; /* count */ + bb[61] = 1; /* starting sector # */ + bb[62] = 0; /* flags---must be 0 */ + for (i = 0; i < 63; ++i) { + sum += bb[i]; + } + bb[63] = sum; + if (write(ofd, bb, sizeof(bb)) != sizeof(bb)) { + perror("boot-block write"); + exit(1); + } + printf("%lu\n", size); + return 0; + } + + /* read and inspect exec header: */ + + if (read(fd, buf, sizeof(buf)) < 0) { + perror("read"); + exit(1); + } + +#ifdef __ELF__ + elf = (struct elfhdr *) buf; + + if (elf->e_ident[0] == 0x7f && strncmp((char *)elf->e_ident + 1, "ELF", 3) == 0) { + if (elf->e_type != ET_EXEC) { + fprintf(stderr, "%s: %s is not an ELF executable\n", + prog_name, inname); + exit(1); + } + if (!elf_check_arch(elf)) { + fprintf(stderr, "%s: is not for this processor (e_machine=%d)\n", + prog_name, elf->e_machine); + exit(1); + } + if (elf->e_phnum != 1) { + fprintf(stderr, + "%s: %d program headers (forgot to link with -N?)\n", + prog_name, elf->e_phnum); + } + + e_entry = elf->e_entry; + + lseek(fd, elf->e_phoff, SEEK_SET); + if (read(fd, buf, sizeof(*elf_phdr)) != sizeof(*elf_phdr)) { + perror("read"); + exit(1); + } + + elf_phdr = (struct elf_phdr *) buf; + offset = elf_phdr->p_offset; + mem_size = elf_phdr->p_memsz; + fil_size = elf_phdr->p_filesz; + + /* work around ELF bug: */ + if (elf_phdr->p_vaddr < e_entry) { + unsigned long delta = e_entry - elf_phdr->p_vaddr; + offset += delta; + mem_size -= delta; + fil_size -= delta; + elf_phdr->p_vaddr += delta; + } + + if (verbose) { + fprintf(stderr, "%s: extracting %#016lx-%#016lx (at %lx)\n", + prog_name, (long) elf_phdr->p_vaddr, + elf_phdr->p_vaddr + fil_size, offset); + } + } else +#endif + { + aout = (struct exec *) buf; + + if (!(aout->fh.f_flags & COFF_F_EXEC)) { + fprintf(stderr, "%s: %s is not in executable format\n", + prog_name, inname); + exit(1); + } + + if (aout->fh.f_opthdr != sizeof(aout->ah)) { + fprintf(stderr, "%s: %s has unexpected optional header size\n", + prog_name, inname); + exit(1); + } + + if (N_MAGIC(*aout) != OMAGIC) { + fprintf(stderr, "%s: %s is not an OMAGIC file\n", + prog_name, inname); + exit(1); + } + offset = N_TXTOFF(*aout); + fil_size = aout->ah.tsize + aout->ah.dsize; + mem_size = fil_size + aout->ah.bsize; + + if (verbose) { + fprintf(stderr, "%s: extracting %#016lx-%#016lx (at %lx)\n", + prog_name, aout->ah.text_start, + aout->ah.text_start + fil_size, offset); + } + } + + if (lseek(fd, offset, SEEK_SET) != offset) { + perror("lseek"); + exit(1); + } + + if (verbose) { + fprintf(stderr, "%s: copying %lu byte from %s\n", + prog_name, (unsigned long) fil_size, inname); + } + + tocopy = fil_size; + while (tocopy > 0) { + n = tocopy; + if (n > sizeof(buf)) { + n = sizeof(buf); + } + tocopy -= n; + if ((size_t) read(fd, buf, n) != n) { + perror("read"); + exit(1); + } + do { + nwritten = write(ofd, buf, n); + if ((ssize_t) nwritten == -1) { + perror("write"); + exit(1); + } + n -= nwritten; + } while (n > 0); + } + + if (pad) { + mem_size = ((mem_size + pad - 1) / pad) * pad; + } + + tocopy = mem_size - fil_size; + if (tocopy > 0) { + fprintf(stderr, + "%s: zero-filling bss and aligning to %lu with %lu bytes\n", + prog_name, pad, (unsigned long) tocopy); + + memset(buf, 0x00, sizeof(buf)); + do { + n = tocopy; + if (n > sizeof(buf)) { + n = sizeof(buf); + } + nwritten = write(ofd, buf, n); + if ((ssize_t) nwritten == -1) { + perror("write"); + exit(1); + } + tocopy -= nwritten; + } while (tocopy > 0); + } + return 0; +} diff --git a/arch/alpha/defconfig b/arch/alpha/defconfig new file mode 100644 index 00000000..539e8b5a --- /dev/null +++ b/arch/alpha/defconfig @@ -0,0 +1,76 @@ +CONFIG_EXPERIMENTAL=y +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_LOG_BUF_SHIFT=14 +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set +CONFIG_KALLSYMS_ALL=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_VERBOSE_MCHECK=y +CONFIG_SRM_ENV=m +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=m +CONFIG_NET_KEY=m +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_INET_AH=m +CONFIG_INET_ESP=m +# CONFIG_IPV6 is not set +CONFIG_NETFILTER=y +CONFIG_IP_NF_QUEUE=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_FILTER=m +CONFIG_VLAN_8021Q=m +CONFIG_PNP=y +CONFIG_ISAPNP=y +CONFIG_BLK_DEV_FD=y +CONFIG_BLK_DEV_LOOP=m +CONFIG_IDE=y +CONFIG_BLK_DEV_IDECD=y +CONFIG_IDE_GENERIC=y +CONFIG_BLK_DEV_GENERIC=y +CONFIG_BLK_DEV_ALI15X3=y +CONFIG_BLK_DEV_CMD64X=y +CONFIG_BLK_DEV_CY82C693=y +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y +CONFIG_BLK_DEV_SR=y +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_SCSI_AIC7XXX=m +CONFIG_AIC7XXX_CMDS_PER_DEVICE=253 +# CONFIG_AIC7XXX_DEBUG_ENABLE is not set +CONFIG_NETDEVICES=y +CONFIG_DUMMY=m +CONFIG_NET_ETHERNET=y +CONFIG_NET_VENDOR_3COM=y +CONFIG_VORTEX=y +CONFIG_NET_TULIP=y +CONFIG_DE2104X=m +CONFIG_TULIP=y +CONFIG_TULIP_MMIO=y +CONFIG_NET_PCI=y +CONFIG_YELLOWFIN=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_RTC=y +CONFIG_EXT2_FS=y +CONFIG_REISERFS_FS=m +CONFIG_AUTOFS_FS=m +CONFIG_ISO9660_FS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_NFS_FS=m +CONFIG_NFS_V3=y +CONFIG_NFSD=m +CONFIG_NFSD_V3=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_INFO=y +CONFIG_ALPHA_LEGACY_START_ADDRESS=y +CONFIG_MATHEMU=y +CONFIG_CRYPTO_HMAC=y diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild new file mode 100644 index 00000000..e423defe --- /dev/null +++ b/arch/alpha/include/asm/Kbuild @@ -0,0 +1,10 @@ +include include/asm-generic/Kbuild.asm + +header-y += compiler.h +header-y += console.h +header-y += fpu.h +header-y += gentrap.h +header-y += pal.h +header-y += reg.h +header-y += regdef.h +header-y += sysinfo.h diff --git a/arch/alpha/include/asm/a.out-core.h b/arch/alpha/include/asm/a.out-core.h new file mode 100644 index 00000000..9e33e92e --- /dev/null +++ b/arch/alpha/include/asm/a.out-core.h @@ -0,0 +1,80 @@ +/* a.out coredump register dumper + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _ASM_A_OUT_CORE_H +#define _ASM_A_OUT_CORE_H + +#ifdef __KERNEL__ + +#include <linux/user.h> + +/* + * Fill in the user structure for an ECOFF core dump. + */ +static inline void aout_dump_thread(struct pt_regs *pt, struct user *dump) +{ + /* switch stack follows right below pt_regs: */ + struct switch_stack * sw = ((struct switch_stack *) pt) - 1; + + dump->magic = CMAGIC; + dump->start_code = current->mm->start_code; + dump->start_data = current->mm->start_data; + dump->start_stack = rdusp() & ~(PAGE_SIZE - 1); + dump->u_tsize = ((current->mm->end_code - dump->start_code) + >> PAGE_SHIFT); + dump->u_dsize = ((current->mm->brk + PAGE_SIZE-1 - dump->start_data) + >> PAGE_SHIFT); + dump->u_ssize = (current->mm->start_stack - dump->start_stack + + PAGE_SIZE-1) >> PAGE_SHIFT; + + /* + * We store the registers in an order/format that is + * compatible with DEC Unix/OSF/1 as this makes life easier + * for gdb. + */ + dump->regs[EF_V0] = pt->r0; + dump->regs[EF_T0] = pt->r1; + dump->regs[EF_T1] = pt->r2; + dump->regs[EF_T2] = pt->r3; + dump->regs[EF_T3] = pt->r4; + dump->regs[EF_T4] = pt->r5; + dump->regs[EF_T5] = pt->r6; + dump->regs[EF_T6] = pt->r7; + dump->regs[EF_T7] = pt->r8; + dump->regs[EF_S0] = sw->r9; + dump->regs[EF_S1] = sw->r10; + dump->regs[EF_S2] = sw->r11; + dump->regs[EF_S3] = sw->r12; + dump->regs[EF_S4] = sw->r13; + dump->regs[EF_S5] = sw->r14; + dump->regs[EF_S6] = sw->r15; + dump->regs[EF_A3] = pt->r19; + dump->regs[EF_A4] = pt->r20; + dump->regs[EF_A5] = pt->r21; + dump->regs[EF_T8] = pt->r22; + dump->regs[EF_T9] = pt->r23; + dump->regs[EF_T10] = pt->r24; + dump->regs[EF_T11] = pt->r25; + dump->regs[EF_RA] = pt->r26; + dump->regs[EF_T12] = pt->r27; + dump->regs[EF_AT] = pt->r28; + dump->regs[EF_SP] = rdusp(); + dump->regs[EF_PS] = pt->ps; + dump->regs[EF_PC] = pt->pc; + dump->regs[EF_GP] = pt->gp; + dump->regs[EF_A0] = pt->r16; + dump->regs[EF_A1] = pt->r17; + dump->regs[EF_A2] = pt->r18; + memcpy((char *)dump->regs + EF_SIZE, sw->fp, 32 * 8); +} + +#endif /* __KERNEL__ */ +#endif /* _ASM_A_OUT_CORE_H */ diff --git a/arch/alpha/include/asm/a.out.h b/arch/alpha/include/asm/a.out.h new file mode 100644 index 00000000..acdc6812 --- /dev/null +++ b/arch/alpha/include/asm/a.out.h @@ -0,0 +1,102 @@ +#ifndef __ALPHA_A_OUT_H__ +#define __ALPHA_A_OUT_H__ + +#include <linux/types.h> + +/* + * OSF/1 ECOFF header structs. ECOFF files consist of: + * - a file header (struct filehdr), + * - an a.out header (struct aouthdr), + * - one or more section headers (struct scnhdr). + * The filhdr's "f_nscns" field contains the + * number of section headers. + */ + +struct filehdr +{ + /* OSF/1 "file" header */ + __u16 f_magic, f_nscns; + __u32 f_timdat; + __u64 f_symptr; + __u32 f_nsyms; + __u16 f_opthdr, f_flags; +}; + +struct aouthdr +{ + __u64 info; /* after that it looks quite normal.. */ + __u64 tsize; + __u64 dsize; + __u64 bsize; + __u64 entry; + __u64 text_start; /* with a few additions that actually make sense */ + __u64 data_start; + __u64 bss_start; + __u32 gprmask, fprmask; /* bitmask of general & floating point regs used in binary */ + __u64 gpvalue; +}; + +struct scnhdr +{ + char s_name[8]; + __u64 s_paddr; + __u64 s_vaddr; + __u64 s_size; + __u64 s_scnptr; + __u64 s_relptr; + __u64 s_lnnoptr; + __u16 s_nreloc; + __u16 s_nlnno; + __u32 s_flags; +}; + +struct exec +{ + /* OSF/1 "file" header */ + struct filehdr fh; + struct aouthdr ah; +}; + +/* + * Define's so that the kernel exec code can access the a.out header + * fields... + */ +#define a_info ah.info +#define a_text ah.tsize +#define a_data ah.dsize +#define a_bss ah.bsize +#define a_entry ah.entry +#define a_textstart ah.text_start +#define a_datastart ah.data_start +#define a_bssstart ah.bss_start +#define a_gprmask ah.gprmask +#define a_fprmask ah.fprmask +#define a_gpvalue ah.gpvalue + +#define N_TXTADDR(x) ((x).a_textstart) +#define N_DATADDR(x) ((x).a_datastart) +#define N_BSSADDR(x) ((x).a_bssstart) +#define N_DRSIZE(x) 0 +#define N_TRSIZE(x) 0 +#define N_SYMSIZE(x) 0 + +#define AOUTHSZ sizeof(struct aouthdr) +#define SCNHSZ sizeof(struct scnhdr) +#define SCNROUND 16 + +#define N_TXTOFF(x) \ + ((long) N_MAGIC(x) == ZMAGIC ? 0 : \ + (sizeof(struct exec) + (x).fh.f_nscns*SCNHSZ + SCNROUND - 1) & ~(SCNROUND - 1)) + +#ifdef __KERNEL__ + +/* Assume that start addresses below 4G belong to a TASO application. + Unfortunately, there is no proper bit in the exec header to check. + Worse, we have to notice the start address before swapping to use + /sbin/loader, which of course is _not_ a TASO application. */ +#define SET_AOUT_PERSONALITY(BFPM, EX) \ + set_personality (((BFPM->taso || EX.ah.entry < 0x100000000L \ + ? ADDR_LIMIT_32BIT : 0) | PER_OSF4)) + +#endif /* __KERNEL__ */ +#endif /* __A_OUT_GNU_H__ */ diff --git a/arch/alpha/include/asm/agp.h b/arch/alpha/include/asm/agp.h new file mode 100644 index 00000000..a94d48b8 --- /dev/null +++ b/arch/alpha/include/asm/agp.h @@ -0,0 +1,18 @@ +#ifndef AGP_H +#define AGP_H 1 + +#include <asm/io.h> + +/* dummy for now */ + +#define map_page_into_agp(page) +#define unmap_page_from_agp(page) +#define flush_agp_cache() mb() + +/* GATT allocation. Returns/accepts GATT kernel virtual address. */ +#define alloc_gatt_pages(order) \ + ((char *)__get_free_pages(GFP_KERNEL, (order))) +#define free_gatt_pages(table, order) \ + free_pages((unsigned long)(table), (order)) + +#endif diff --git a/arch/alpha/include/asm/agp_backend.h b/arch/alpha/include/asm/agp_backend.h new file mode 100644 index 00000000..55dd44a2 --- /dev/null +++ b/arch/alpha/include/asm/agp_backend.h @@ -0,0 +1,42 @@ +#ifndef _ALPHA_AGP_BACKEND_H +#define _ALPHA_AGP_BACKEND_H 1 + +typedef union _alpha_agp_mode { + struct { + u32 rate : 3; + u32 reserved0 : 1; + u32 fw : 1; + u32 fourgb : 1; + u32 reserved1 : 2; + u32 enable : 1; + u32 sba : 1; + u32 reserved2 : 14; + u32 rq : 8; + } bits; + u32 lw; +} alpha_agp_mode; + +typedef struct _alpha_agp_info { + struct pci_controller *hose; + struct { + dma_addr_t bus_base; + unsigned long size; + void *sysdata; + } aperture; + alpha_agp_mode capability; + alpha_agp_mode mode; + void *private; + struct alpha_agp_ops *ops; +} alpha_agp_info; + +struct alpha_agp_ops { + int (*setup)(alpha_agp_info *); + void (*cleanup)(alpha_agp_info *); + int (*configure)(alpha_agp_info *); + int (*bind)(alpha_agp_info *, off_t, struct agp_memory *); + int (*unbind)(alpha_agp_info *, off_t, struct agp_memory *); + unsigned long (*translate)(alpha_agp_info *, dma_addr_t); +}; + + +#endif /* _ALPHA_AGP_BACKEND_H */ diff --git a/arch/alpha/include/asm/asm-offsets.h b/arch/alpha/include/asm/asm-offsets.h new file mode 100644 index 00000000..d370ee36 --- /dev/null +++ b/arch/alpha/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include <generated/asm-offsets.h> diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h new file mode 100644 index 00000000..3bb7ffea --- /dev/null +++ b/arch/alpha/include/asm/atomic.h @@ -0,0 +1,258 @@ +#ifndef _ALPHA_ATOMIC_H +#define _ALPHA_ATOMIC_H + +#include <linux/types.h> +#include <asm/barrier.h> +#include <asm/cmpxchg.h> + +/* + * Atomic operations that C can't guarantee us. Useful for + * resource counting etc... + * + * But use these as seldom as possible since they are much slower + * than regular operations. + */ + + +#define ATOMIC_INIT(i) ( (atomic_t) { (i) } ) +#define ATOMIC64_INIT(i) ( (atomic64_t) { (i) } ) + +#define atomic_read(v) (*(volatile int *)&(v)->counter) +#define atomic64_read(v) (*(volatile long *)&(v)->counter) + +#define atomic_set(v,i) ((v)->counter = (i)) +#define atomic64_set(v,i) ((v)->counter = (i)) + +/* + * To get proper branch prediction for the main line, we must branch + * forward to code at the end of this object's .text section, then + * branch back to restart the operation. + */ + +static __inline__ void atomic_add(int i, atomic_t * v) +{ + unsigned long temp; + __asm__ __volatile__( + "1: ldl_l %0,%1\n" + " addl %0,%2,%0\n" + " stl_c %0,%1\n" + " beq %0,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + :"=&r" (temp), "=m" (v->counter) + :"Ir" (i), "m" (v->counter)); +} + +static __inline__ void atomic64_add(long i, atomic64_t * v) +{ + unsigned long temp; + __asm__ __volatile__( + "1: ldq_l %0,%1\n" + " addq %0,%2,%0\n" + " stq_c %0,%1\n" + " beq %0,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + :"=&r" (temp), "=m" (v->counter) + :"Ir" (i), "m" (v->counter)); +} + +static __inline__ void atomic_sub(int i, atomic_t * v) +{ + unsigned long temp; + __asm__ __volatile__( + "1: ldl_l %0,%1\n" + " subl %0,%2,%0\n" + " stl_c %0,%1\n" + " beq %0,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + :"=&r" (temp), "=m" (v->counter) + :"Ir" (i), "m" (v->counter)); +} + +static __inline__ void atomic64_sub(long i, atomic64_t * v) +{ + unsigned long temp; + __asm__ __volatile__( + "1: ldq_l %0,%1\n" + " subq %0,%2,%0\n" + " stq_c %0,%1\n" + " beq %0,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + :"=&r" (temp), "=m" (v->counter) + :"Ir" (i), "m" (v->counter)); +} + + +/* + * Same as above, but return the result value + */ +static inline int atomic_add_return(int i, atomic_t *v) +{ + long temp, result; + smp_mb(); + __asm__ __volatile__( + "1: ldl_l %0,%1\n" + " addl %0,%3,%2\n" + " addl %0,%3,%0\n" + " stl_c %0,%1\n" + " beq %0,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + :"=&r" (temp), "=m" (v->counter), "=&r" (result) + :"Ir" (i), "m" (v->counter) : "memory"); + smp_mb(); + return result; +} + +static __inline__ long atomic64_add_return(long i, atomic64_t * v) +{ + long temp, result; + smp_mb(); + __asm__ __volatile__( + "1: ldq_l %0,%1\n" + " addq %0,%3,%2\n" + " addq %0,%3,%0\n" + " stq_c %0,%1\n" + " beq %0,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + :"=&r" (temp), "=m" (v->counter), "=&r" (result) + :"Ir" (i), "m" (v->counter) : "memory"); + smp_mb(); + return result; +} + +static __inline__ long atomic_sub_return(int i, atomic_t * v) +{ + long temp, result; + smp_mb(); + __asm__ __volatile__( + "1: ldl_l %0,%1\n" + " subl %0,%3,%2\n" + " subl %0,%3,%0\n" + " stl_c %0,%1\n" + " beq %0,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + :"=&r" (temp), "=m" (v->counter), "=&r" (result) + :"Ir" (i), "m" (v->counter) : "memory"); + smp_mb(); + return result; +} + +static __inline__ long atomic64_sub_return(long i, atomic64_t * v) +{ + long temp, result; + smp_mb(); + __asm__ __volatile__( + "1: ldq_l %0,%1\n" + " subq %0,%3,%2\n" + " subq %0,%3,%0\n" + " stq_c %0,%1\n" + " beq %0,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + :"=&r" (temp), "=m" (v->counter), "=&r" (result) + :"Ir" (i), "m" (v->counter) : "memory"); + smp_mb(); + return result; +} + +#define atomic64_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), old, new)) +#define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) + +#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), old, new)) +#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) + +/** + * __atomic_add_unless - add unless the number is a given value + * @v: pointer of type atomic_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns the old value of @v. + */ +static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u) +{ + int c, old; + c = atomic_read(v); + for (;;) { + if (unlikely(c == (u))) + break; + old = atomic_cmpxchg((v), c, c + (a)); + if (likely(old == c)) + break; + c = old; + } + return c; +} + + +/** + * atomic64_add_unless - add unless the number is a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns the old value of @v. + */ +static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u) +{ + long c, old; + c = atomic64_read(v); + for (;;) { + if (unlikely(c == (u))) + break; + old = atomic64_cmpxchg((v), c, c + (a)); + if (likely(old == c)) + break; + c = old; + } + return c != (u); +} + +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) + +#define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0) +#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0) + +#define atomic_dec_return(v) atomic_sub_return(1,(v)) +#define atomic64_dec_return(v) atomic64_sub_return(1,(v)) + +#define atomic_inc_return(v) atomic_add_return(1,(v)) +#define atomic64_inc_return(v) atomic64_add_return(1,(v)) + +#define atomic_sub_and_test(i,v) (atomic_sub_return((i), (v)) == 0) +#define atomic64_sub_and_test(i,v) (atomic64_sub_return((i), (v)) == 0) + +#define atomic_inc_and_test(v) (atomic_add_return(1, (v)) == 0) +#define atomic64_inc_and_test(v) (atomic64_add_return(1, (v)) == 0) + +#define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0) +#define atomic64_dec_and_test(v) (atomic64_sub_return(1, (v)) == 0) + +#define atomic_inc(v) atomic_add(1,(v)) +#define atomic64_inc(v) atomic64_add(1,(v)) + +#define atomic_dec(v) atomic_sub(1,(v)) +#define atomic64_dec(v) atomic64_sub(1,(v)) + +#define smp_mb__before_atomic_dec() smp_mb() +#define smp_mb__after_atomic_dec() smp_mb() +#define smp_mb__before_atomic_inc() smp_mb() +#define smp_mb__after_atomic_inc() smp_mb() + +#endif /* _ALPHA_ATOMIC_H */ diff --git a/arch/alpha/include/asm/auxvec.h b/arch/alpha/include/asm/auxvec.h new file mode 100644 index 00000000..a3a579df --- /dev/null +++ b/arch/alpha/include/asm/auxvec.h @@ -0,0 +1,26 @@ +#ifndef __ASM_ALPHA_AUXVEC_H +#define __ASM_ALPHA_AUXVEC_H + +/* Reserve these numbers for any future use of a VDSO. */ +#if 0 +#define AT_SYSINFO 32 +#define AT_SYSINFO_EHDR 33 +#endif + +/* More complete cache descriptions than AT_[DIU]CACHEBSIZE. If the + value is -1, then the cache doesn't exist. Otherwise: + + bit 0-3: Cache set-associativity; 0 means fully associative. + bit 4-7: Log2 of cacheline size. + bit 8-31: Size of the entire cache >> 8. + bit 32-63: Reserved. +*/ + +#define AT_L1I_CACHESHAPE 34 +#define AT_L1D_CACHESHAPE 35 +#define AT_L2_CACHESHAPE 36 +#define AT_L3_CACHESHAPE 37 + +#define AT_VECTOR_SIZE_ARCH 4 /* entries in ARCH_DLINFO */ + +#endif /* __ASM_ALPHA_AUXVEC_H */ diff --git a/arch/alpha/include/asm/barrier.h b/arch/alpha/include/asm/barrier.h new file mode 100644 index 00000000..ce8860a0 --- /dev/null +++ b/arch/alpha/include/asm/barrier.h @@ -0,0 +1,35 @@ +#ifndef __BARRIER_H +#define __BARRIER_H + +#include <asm/compiler.h> + +#define mb() \ +__asm__ __volatile__("mb": : :"memory") + +#define rmb() \ +__asm__ __volatile__("mb": : :"memory") + +#define wmb() \ +__asm__ __volatile__("wmb": : :"memory") + +#define read_barrier_depends() \ +__asm__ __volatile__("mb": : :"memory") + +#ifdef CONFIG_SMP +#define __ASM_SMP_MB "\tmb\n" +#define smp_mb() mb() +#define smp_rmb() rmb() +#define smp_wmb() wmb() +#define smp_read_barrier_depends() read_barrier_depends() +#else +#define __ASM_SMP_MB +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#define smp_read_barrier_depends() do { } while (0) +#endif + +#define set_mb(var, value) \ +do { var = value; mb(); } while (0) + +#endif /* __BARRIER_H */ diff --git a/arch/alpha/include/asm/bitops.h b/arch/alpha/include/asm/bitops.h new file mode 100644 index 00000000..a19ba5ef --- /dev/null +++ b/arch/alpha/include/asm/bitops.h @@ -0,0 +1,463 @@ +#ifndef _ALPHA_BITOPS_H +#define _ALPHA_BITOPS_H + +#ifndef _LINUX_BITOPS_H +#error only <linux/bitops.h> can be included directly +#endif + +#include <asm/compiler.h> +#include <asm/barrier.h> + +/* + * Copyright 1994, Linus Torvalds. + */ + +/* + * These have to be done with inline assembly: that way the bit-setting + * is guaranteed to be atomic. All bit operations return 0 if the bit + * was cleared before the operation and != 0 if it was not. + * + * To get proper branch prediction for the main line, we must branch + * forward to code at the end of this object's .text section, then + * branch back to restart the operation. + * + * bit 0 is the LSB of addr; bit 64 is the LSB of (addr+1). + */ + +static inline void +set_bit(unsigned long nr, volatile void * addr) +{ + unsigned long temp; + int *m = ((int *) addr) + (nr >> 5); + + __asm__ __volatile__( + "1: ldl_l %0,%3\n" + " bis %0,%2,%0\n" + " stl_c %0,%1\n" + " beq %0,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + :"=&r" (temp), "=m" (*m) + :"Ir" (1UL << (nr & 31)), "m" (*m)); +} + +/* + * WARNING: non atomic version. + */ +static inline void +__set_bit(unsigned long nr, volatile void * addr) +{ + int *m = ((int *) addr) + (nr >> 5); + + *m |= 1 << (nr & 31); +} + +#define smp_mb__before_clear_bit() smp_mb() +#define smp_mb__after_clear_bit() smp_mb() + +static inline void +clear_bit(unsigned long nr, volatile void * addr) +{ + unsigned long temp; + int *m = ((int *) addr) + (nr >> 5); + + __asm__ __volatile__( + "1: ldl_l %0,%3\n" + " bic %0,%2,%0\n" + " stl_c %0,%1\n" + " beq %0,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + :"=&r" (temp), "=m" (*m) + :"Ir" (1UL << (nr & 31)), "m" (*m)); +} + +static inline void +clear_bit_unlock(unsigned long nr, volatile void * addr) +{ + smp_mb(); + clear_bit(nr, addr); +} + +/* + * WARNING: non atomic version. + */ +static __inline__ void +__clear_bit(unsigned long nr, volatile void * addr) +{ + int *m = ((int *) addr) + (nr >> 5); + + *m &= ~(1 << (nr & 31)); +} + +static inline void +__clear_bit_unlock(unsigned long nr, volatile void * addr) +{ + smp_mb(); + __clear_bit(nr, addr); +} + +static inline void +change_bit(unsigned long nr, volatile void * addr) +{ + unsigned long temp; + int *m = ((int *) addr) + (nr >> 5); + + __asm__ __volatile__( + "1: ldl_l %0,%3\n" + " xor %0,%2,%0\n" + " stl_c %0,%1\n" + " beq %0,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + :"=&r" (temp), "=m" (*m) + :"Ir" (1UL << (nr & 31)), "m" (*m)); +} + +/* + * WARNING: non atomic version. + */ +static __inline__ void +__change_bit(unsigned long nr, volatile void * addr) +{ + int *m = ((int *) addr) + (nr >> 5); + + *m ^= 1 << (nr & 31); +} + +static inline int +test_and_set_bit(unsigned long nr, volatile void *addr) +{ + unsigned long oldbit; + unsigned long temp; + int *m = ((int *) addr) + (nr >> 5); + + __asm__ __volatile__( +#ifdef CONFIG_SMP + " mb\n" +#endif + "1: ldl_l %0,%4\n" + " and %0,%3,%2\n" + " bne %2,2f\n" + " xor %0,%3,%0\n" + " stl_c %0,%1\n" + " beq %0,3f\n" + "2:\n" +#ifdef CONFIG_SMP + " mb\n" +#endif + ".subsection 2\n" + "3: br 1b\n" + ".previous" + :"=&r" (temp), "=m" (*m), "=&r" (oldbit) + :"Ir" (1UL << (nr & 31)), "m" (*m) : "memory"); + + return oldbit != 0; +} + +static inline int +test_and_set_bit_lock(unsigned long nr, volatile void *addr) +{ + unsigned long oldbit; + unsigned long temp; + int *m = ((int *) addr) + (nr >> 5); + + __asm__ __volatile__( + "1: ldl_l %0,%4\n" + " and %0,%3,%2\n" + " bne %2,2f\n" + " xor %0,%3,%0\n" + " stl_c %0,%1\n" + " beq %0,3f\n" + "2:\n" +#ifdef CONFIG_SMP + " mb\n" +#endif + ".subsection 2\n" + "3: br 1b\n" + ".previous" + :"=&r" (temp), "=m" (*m), "=&r" (oldbit) + :"Ir" (1UL << (nr & 31)), "m" (*m) : "memory"); + + return oldbit != 0; +} + +/* + * WARNING: non atomic version. + */ +static inline int +__test_and_set_bit(unsigned long nr, volatile void * addr) +{ + unsigned long mask = 1 << (nr & 0x1f); + int *m = ((int *) addr) + (nr >> 5); + int old = *m; + + *m = old | mask; + return (old & mask) != 0; +} + +static inline int +test_and_clear_bit(unsigned long nr, volatile void * addr) +{ + unsigned long oldbit; + unsigned long temp; + int *m = ((int *) addr) + (nr >> 5); + + __asm__ __volatile__( +#ifdef CONFIG_SMP + " mb\n" +#endif + "1: ldl_l %0,%4\n" + " and %0,%3,%2\n" + " beq %2,2f\n" + " xor %0,%3,%0\n" + " stl_c %0,%1\n" + " beq %0,3f\n" + "2:\n" +#ifdef CONFIG_SMP + " mb\n" +#endif + ".subsection 2\n" + "3: br 1b\n" + ".previous" + :"=&r" (temp), "=m" (*m), "=&r" (oldbit) + :"Ir" (1UL << (nr & 31)), "m" (*m) : "memory"); + + return oldbit != 0; +} + +/* + * WARNING: non atomic version. + */ +static inline int +__test_and_clear_bit(unsigned long nr, volatile void * addr) +{ + unsigned long mask = 1 << (nr & 0x1f); + int *m = ((int *) addr) + (nr >> 5); + int old = *m; + + *m = old & ~mask; + return (old & mask) != 0; +} + +static inline int +test_and_change_bit(unsigned long nr, volatile void * addr) +{ + unsigned long oldbit; + unsigned long temp; + int *m = ((int *) addr) + (nr >> 5); + + __asm__ __volatile__( +#ifdef CONFIG_SMP + " mb\n" +#endif + "1: ldl_l %0,%4\n" + " and %0,%3,%2\n" + " xor %0,%3,%0\n" + " stl_c %0,%1\n" + " beq %0,3f\n" +#ifdef CONFIG_SMP + " mb\n" +#endif + ".subsection 2\n" + "3: br 1b\n" + ".previous" + :"=&r" (temp), "=m" (*m), "=&r" (oldbit) + :"Ir" (1UL << (nr & 31)), "m" (*m) : "memory"); + + return oldbit != 0; +} + +/* + * WARNING: non atomic version. + */ +static __inline__ int +__test_and_change_bit(unsigned long nr, volatile void * addr) +{ + unsigned long mask = 1 << (nr & 0x1f); + int *m = ((int *) addr) + (nr >> 5); + int old = *m; + + *m = old ^ mask; + return (old & mask) != 0; +} + +static inline int +test_bit(int nr, const volatile void * addr) +{ + return (1UL & (((const int *) addr)[nr >> 5] >> (nr & 31))) != 0UL; +} + +/* + * ffz = Find First Zero in word. Undefined if no zero exists, + * so code should check against ~0UL first.. + * + * Do a binary search on the bits. Due to the nature of large + * constants on the alpha, it is worthwhile to split the search. + */ +static inline unsigned long ffz_b(unsigned long x) +{ + unsigned long sum, x1, x2, x4; + + x = ~x & -~x; /* set first 0 bit, clear others */ + x1 = x & 0xAA; + x2 = x & 0xCC; + x4 = x & 0xF0; + sum = x2 ? 2 : 0; + sum += (x4 != 0) * 4; + sum += (x1 != 0); + + return sum; +} + +static inline unsigned long ffz(unsigned long word) +{ +#if defined(CONFIG_ALPHA_EV6) && defined(CONFIG_ALPHA_EV67) + /* Whee. EV67 can calculate it directly. */ + return __kernel_cttz(~word); +#else + unsigned long bits, qofs, bofs; + + bits = __kernel_cmpbge(word, ~0UL); + qofs = ffz_b(bits); + bits = __kernel_extbl(word, qofs); + bofs = ffz_b(bits); + + return qofs*8 + bofs; +#endif +} + +/* + * __ffs = Find First set bit in word. Undefined if no set bit exists. + */ +static inline unsigned long __ffs(unsigned long word) +{ +#if defined(CONFIG_ALPHA_EV6) && defined(CONFIG_ALPHA_EV67) + /* Whee. EV67 can calculate it directly. */ + return __kernel_cttz(word); +#else + unsigned long bits, qofs, bofs; + + bits = __kernel_cmpbge(0, word); + qofs = ffz_b(bits); + bits = __kernel_extbl(word, qofs); + bofs = ffz_b(~bits); + + return qofs*8 + bofs; +#endif +} + +#ifdef __KERNEL__ + +/* + * ffs: find first bit set. This is defined the same way as + * the libc and compiler builtin ffs routines, therefore + * differs in spirit from the above __ffs. + */ + +static inline int ffs(int word) +{ + int result = __ffs(word) + 1; + return word ? result : 0; +} + +/* + * fls: find last bit set. + */ +#if defined(CONFIG_ALPHA_EV6) && defined(CONFIG_ALPHA_EV67) +static inline int fls64(unsigned long word) +{ + return 64 - __kernel_ctlz(word); +} +#else +extern const unsigned char __flsm1_tab[256]; + +static inline int fls64(unsigned long x) +{ + unsigned long t, a, r; + + t = __kernel_cmpbge (x, 0x0101010101010101UL); + a = __flsm1_tab[t]; + t = __kernel_extbl (x, a); + r = a*8 + __flsm1_tab[t] + (x != 0); + + return r; +} +#endif + +static inline unsigned long __fls(unsigned long x) +{ + return fls64(x) - 1; +} + +static inline int fls(int x) +{ + return fls64((unsigned int) x); +} + +/* + * hweightN: returns the hamming weight (i.e. the number + * of bits set) of a N-bit word + */ + +#if defined(CONFIG_ALPHA_EV6) && defined(CONFIG_ALPHA_EV67) +/* Whee. EV67 can calculate it directly. */ +static inline unsigned long __arch_hweight64(unsigned long w) +{ + return __kernel_ctpop(w); +} + +static inline unsigned int __arch_hweight32(unsigned int w) +{ + return __arch_hweight64(w); +} + +static inline unsigned int __arch_hweight16(unsigned int w) +{ + return __arch_hweight64(w & 0xffff); +} + +static inline unsigned int __arch_hweight8(unsigned int w) +{ + return __arch_hweight64(w & 0xff); +} +#else +#include <asm-generic/bitops/arch_hweight.h> +#endif + +#include <asm-generic/bitops/const_hweight.h> + +#endif /* __KERNEL__ */ + +#include <asm-generic/bitops/find.h> + +#ifdef __KERNEL__ + +/* + * Every architecture must define this function. It's the fastest + * way of searching a 100-bit bitmap. It's guaranteed that at least + * one of the 100 bits is cleared. + */ +static inline unsigned long +sched_find_first_bit(const unsigned long b[2]) +{ + unsigned long b0, b1, ofs, tmp; + + b0 = b[0]; + b1 = b[1]; + ofs = (b0 ? 0 : 64); + tmp = (b0 ? b0 : b1); + + return __ffs(tmp) + ofs; +} + +#include <asm-generic/bitops/le.h> + +#include <asm-generic/bitops/ext2-atomic-setbit.h> + +#endif /* __KERNEL__ */ + +#endif /* _ALPHA_BITOPS_H */ diff --git a/arch/alpha/include/asm/bitsperlong.h b/arch/alpha/include/asm/bitsperlong.h new file mode 100644 index 00000000..ad57f786 --- /dev/null +++ b/arch/alpha/include/asm/bitsperlong.h @@ -0,0 +1,8 @@ +#ifndef __ASM_ALPHA_BITSPERLONG_H +#define __ASM_ALPHA_BITSPERLONG_H + +#define __BITS_PER_LONG 64 + +#include <asm-generic/bitsperlong.h> + +#endif /* __ASM_ALPHA_BITSPERLONG_H */ diff --git a/arch/alpha/include/asm/bug.h b/arch/alpha/include/asm/bug.h new file mode 100644 index 00000000..f091682e --- /dev/null +++ b/arch/alpha/include/asm/bug.h @@ -0,0 +1,24 @@ +#ifndef _ALPHA_BUG_H +#define _ALPHA_BUG_H + +#include <linux/linkage.h> + +#ifdef CONFIG_BUG +#include <asm/pal.h> + +/* ??? Would be nice to use .gprel32 here, but we can't be sure that the + function loaded the GP, so this could fail in modules. */ +#define BUG() do { \ + __asm__ __volatile__( \ + "call_pal %0 # bugchk\n\t" \ + ".long %1\n\t.8byte %2" \ + : : "i"(PAL_bugchk), "i"(__LINE__), "i"(__FILE__)); \ + unreachable(); \ + } while (0) + +#define HAVE_ARCH_BUG +#endif + +#include <asm-generic/bug.h> + +#endif diff --git a/arch/alpha/include/asm/bugs.h b/arch/alpha/include/asm/bugs.h new file mode 100644 index 00000000..78030d1c --- /dev/null +++ b/arch/alpha/include/asm/bugs.h @@ -0,0 +1,20 @@ +/* + * include/asm-alpha/bugs.h + * + * Copyright (C) 1994 Linus Torvalds + */ + +/* + * This is included by init/main.c to check for architecture-dependent bugs. + * + * Needs: + * void check_bugs(void); + */ + +/* + * I don't know of any alpha bugs yet.. Nice chip + */ + +static void check_bugs(void) +{ +} diff --git a/arch/alpha/include/asm/byteorder.h b/arch/alpha/include/asm/byteorder.h new file mode 100644 index 00000000..73683093 --- /dev/null +++ b/arch/alpha/include/asm/byteorder.h @@ -0,0 +1,6 @@ +#ifndef _ALPHA_BYTEORDER_H +#define _ALPHA_BYTEORDER_H + +#include <linux/byteorder/little_endian.h> + +#endif /* _ALPHA_BYTEORDER_H */ diff --git a/arch/alpha/include/asm/cache.h b/arch/alpha/include/asm/cache.h new file mode 100644 index 00000000..ad368a93 --- /dev/null +++ b/arch/alpha/include/asm/cache.h @@ -0,0 +1,22 @@ +/* + * include/asm-alpha/cache.h + */ +#ifndef __ARCH_ALPHA_CACHE_H +#define __ARCH_ALPHA_CACHE_H + + +/* Bytes per L1 (data) cache line. */ +#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_EV6) +# define L1_CACHE_BYTES 64 +# define L1_CACHE_SHIFT 6 +#else +/* Both EV4 and EV5 are write-through, read-allocate, + direct-mapped, physical. +*/ +# define L1_CACHE_BYTES 32 +# define L1_CACHE_SHIFT 5 +#endif + +#define SMP_CACHE_BYTES L1_CACHE_BYTES + +#endif diff --git a/arch/alpha/include/asm/cacheflush.h b/arch/alpha/include/asm/cacheflush.h new file mode 100644 index 00000000..a9cb6aa4 --- /dev/null +++ b/arch/alpha/include/asm/cacheflush.h @@ -0,0 +1,77 @@ +#ifndef _ALPHA_CACHEFLUSH_H +#define _ALPHA_CACHEFLUSH_H + +#include <linux/mm.h> + +/* Caches aren't brain-dead on the Alpha. */ +#define flush_cache_all() do { } while (0) +#define flush_cache_mm(mm) do { } while (0) +#define flush_cache_dup_mm(mm) do { } while (0) +#define flush_cache_range(vma, start, end) do { } while (0) +#define flush_cache_page(vma, vmaddr, pfn) do { } while (0) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 +#define flush_dcache_page(page) do { } while (0) +#define flush_dcache_mmap_lock(mapping) do { } while (0) +#define flush_dcache_mmap_unlock(mapping) do { } while (0) +#define flush_cache_vmap(start, end) do { } while (0) +#define flush_cache_vunmap(start, end) do { } while (0) + +/* Note that the following two definitions are _highly_ dependent + on the contexts in which they are used in the kernel. I personally + think it is criminal how loosely defined these macros are. */ + +/* We need to flush the kernel's icache after loading modules. The + only other use of this macro is in load_aout_interp which is not + used on Alpha. + + Note that this definition should *not* be used for userspace + icache flushing. While functional, it is _way_ overkill. The + icache is tagged with ASNs and it suffices to allocate a new ASN + for the process. */ +#ifndef CONFIG_SMP +#define flush_icache_range(start, end) imb() +#else +#define flush_icache_range(start, end) smp_imb() +extern void smp_imb(void); +#endif + +/* We need to flush the userspace icache after setting breakpoints in + ptrace. + + Instead of indiscriminately using imb, take advantage of the fact + that icache entries are tagged with the ASN and load a new mm context. */ +/* ??? Ought to use this in arch/alpha/kernel/signal.c too. */ + +#ifndef CONFIG_SMP +#include <linux/sched.h> + +extern void __load_new_mm_context(struct mm_struct *); +static inline void +flush_icache_user_range(struct vm_area_struct *vma, struct page *page, + unsigned long addr, int len) +{ + if (vma->vm_flags & VM_EXEC) { + struct mm_struct *mm = vma->vm_mm; + if (current->active_mm == mm) + __load_new_mm_context(mm); + else + mm->context[smp_processor_id()] = 0; + } +} +#else +extern void flush_icache_user_range(struct vm_area_struct *vma, + struct page *page, unsigned long addr, int len); +#endif + +/* This is used only in __do_fault and do_swap_page. */ +#define flush_icache_page(vma, page) \ + flush_icache_user_range((vma), (page), 0, 0) + +#define copy_to_user_page(vma, page, vaddr, dst, src, len) \ +do { memcpy(dst, src, len); \ + flush_icache_user_range(vma, page, vaddr, len); \ +} while (0) +#define copy_from_user_page(vma, page, vaddr, dst, src, len) \ + memcpy(dst, src, len) + +#endif /* _ALPHA_CACHEFLUSH_H */ diff --git a/arch/alpha/include/asm/checksum.h b/arch/alpha/include/asm/checksum.h new file mode 100644 index 00000000..d3854bbf --- /dev/null +++ b/arch/alpha/include/asm/checksum.h @@ -0,0 +1,75 @@ +#ifndef _ALPHA_CHECKSUM_H +#define _ALPHA_CHECKSUM_H + +#include <linux/in6.h> + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + */ +extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl); + +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented + */ +extern __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, + unsigned short len, + unsigned short proto, + __wsum sum); + +__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, + unsigned short len, unsigned short proto, + __wsum sum); + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +extern __wsum csum_partial(const void *buff, int len, __wsum sum); + +/* + * the same as csum_partial, but copies from src while it + * checksums + * + * here even more important to align src and dst on a 32-bit (or even + * better 64-bit) boundary + */ +__wsum csum_partial_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *errp); + +__wsum csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum); + + +/* + * this routine is used for miscellaneous IP-like checksums, mainly + * in icmp.c + */ + +extern __sum16 ip_compute_csum(const void *buff, int len); + +/* + * Fold a partial checksum without adding pseudo headers + */ + +static inline __sum16 csum_fold(__wsum csum) +{ + u32 sum = (__force u32)csum; + sum = (sum & 0xffff) + (sum >> 16); + sum = (sum & 0xffff) + (sum >> 16); + return (__force __sum16)~sum; +} + +#define _HAVE_ARCH_IPV6_CSUM +extern __sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, unsigned short proto, + __wsum sum); +#endif diff --git a/arch/alpha/include/asm/cmpxchg.h b/arch/alpha/include/asm/cmpxchg.h new file mode 100644 index 00000000..429e8cd0 --- /dev/null +++ b/arch/alpha/include/asm/cmpxchg.h @@ -0,0 +1,71 @@ +#ifndef _ALPHA_CMPXCHG_H +#define _ALPHA_CMPXCHG_H + +/* + * Atomic exchange routines. + */ + +#define __ASM__MB +#define ____xchg(type, args...) __xchg ## type ## _local(args) +#define ____cmpxchg(type, args...) __cmpxchg ## type ## _local(args) +#include <asm/xchg.h> + +#define xchg_local(ptr, x) \ +({ \ + __typeof__(*(ptr)) _x_ = (x); \ + (__typeof__(*(ptr))) __xchg_local((ptr), (unsigned long)_x_, \ + sizeof(*(ptr))); \ +}) + +#define cmpxchg_local(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) _o_ = (o); \ + __typeof__(*(ptr)) _n_ = (n); \ + (__typeof__(*(ptr))) __cmpxchg_local((ptr), (unsigned long)_o_, \ + (unsigned long)_n_, \ + sizeof(*(ptr))); \ +}) + +#define cmpxchg64_local(ptr, o, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + cmpxchg_local((ptr), (o), (n)); \ +}) + +#ifdef CONFIG_SMP +#undef __ASM__MB +#define __ASM__MB "\tmb\n" +#endif +#undef ____xchg +#undef ____cmpxchg +#define ____xchg(type, args...) __xchg ##type(args) +#define ____cmpxchg(type, args...) __cmpxchg ##type(args) +#include <asm/xchg.h> + +#define xchg(ptr, x) \ +({ \ + __typeof__(*(ptr)) _x_ = (x); \ + (__typeof__(*(ptr))) __xchg((ptr), (unsigned long)_x_, \ + sizeof(*(ptr))); \ +}) + +#define cmpxchg(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) _o_ = (o); \ + __typeof__(*(ptr)) _n_ = (n); \ + (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_, \ + (unsigned long)_n_, sizeof(*(ptr)));\ +}) + +#define cmpxchg64(ptr, o, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + cmpxchg((ptr), (o), (n)); \ +}) + +#undef __ASM__MB +#undef ____cmpxchg + +#define __HAVE_ARCH_CMPXCHG 1 + +#endif /* _ALPHA_CMPXCHG_H */ diff --git a/arch/alpha/include/asm/compiler.h b/arch/alpha/include/asm/compiler.h new file mode 100644 index 00000000..da6bb199 --- /dev/null +++ b/arch/alpha/include/asm/compiler.h @@ -0,0 +1,130 @@ +#ifndef __ALPHA_COMPILER_H +#define __ALPHA_COMPILER_H + +/* + * Herein are macros we use when describing various patterns we want to GCC. + * In all cases we can get better schedules out of the compiler if we hide + * as little as possible inside inline assembly. However, we want to be + * able to know what we'll get out before giving up inline assembly. Thus + * these tests and macros. + */ + +#if __GNUC__ == 3 && __GNUC_MINOR__ >= 4 || __GNUC__ > 3 +# define __kernel_insbl(val, shift) __builtin_alpha_insbl(val, shift) +# define __kernel_inswl(val, shift) __builtin_alpha_inswl(val, shift) +# define __kernel_insql(val, shift) __builtin_alpha_insql(val, shift) +# define __kernel_inslh(val, shift) __builtin_alpha_inslh(val, shift) +# define __kernel_extbl(val, shift) __builtin_alpha_extbl(val, shift) +# define __kernel_extwl(val, shift) __builtin_alpha_extwl(val, shift) +# define __kernel_cmpbge(a, b) __builtin_alpha_cmpbge(a, b) +#else +# define __kernel_insbl(val, shift) \ + ({ unsigned long __kir; \ + __asm__("insbl %2,%1,%0" : "=r"(__kir) : "rI"(shift), "r"(val)); \ + __kir; }) +# define __kernel_inswl(val, shift) \ + ({ unsigned long __kir; \ + __asm__("inswl %2,%1,%0" : "=r"(__kir) : "rI"(shift), "r"(val)); \ + __kir; }) +# define __kernel_insql(val, shift) \ + ({ unsigned long __kir; \ + __asm__("insql %2,%1,%0" : "=r"(__kir) : "rI"(shift), "r"(val)); \ + __kir; }) +# define __kernel_inslh(val, shift) \ + ({ unsigned long __kir; \ + __asm__("inslh %2,%1,%0" : "=r"(__kir) : "rI"(shift), "r"(val)); \ + __kir; }) +# define __kernel_extbl(val, shift) \ + ({ unsigned long __kir; \ + __asm__("extbl %2,%1,%0" : "=r"(__kir) : "rI"(shift), "r"(val)); \ + __kir; }) +# define __kernel_extwl(val, shift) \ + ({ unsigned long __kir; \ + __asm__("extwl %2,%1,%0" : "=r"(__kir) : "rI"(shift), "r"(val)); \ + __kir; }) +# define __kernel_cmpbge(a, b) \ + ({ unsigned long __kir; \ + __asm__("cmpbge %r2,%1,%0" : "=r"(__kir) : "rI"(b), "rJ"(a)); \ + __kir; }) +#endif + +#ifdef __alpha_cix__ +# if __GNUC__ == 3 && __GNUC_MINOR__ >= 4 || __GNUC__ > 3 +# define __kernel_cttz(x) __builtin_ctzl(x) +# define __kernel_ctlz(x) __builtin_clzl(x) +# define __kernel_ctpop(x) __builtin_popcountl(x) +# else +# define __kernel_cttz(x) \ + ({ unsigned long __kir; \ + __asm__("cttz %1,%0" : "=r"(__kir) : "r"(x)); \ + __kir; }) +# define __kernel_ctlz(x) \ + ({ unsigned long __kir; \ + __asm__("ctlz %1,%0" : "=r"(__kir) : "r"(x)); \ + __kir; }) +# define __kernel_ctpop(x) \ + ({ unsigned long __kir; \ + __asm__("ctpop %1,%0" : "=r"(__kir) : "r"(x)); \ + __kir; }) +# endif +#else +# define __kernel_cttz(x) \ + ({ unsigned long __kir; \ + __asm__(".arch ev67; cttz %1,%0" : "=r"(__kir) : "r"(x)); \ + __kir; }) +# define __kernel_ctlz(x) \ + ({ unsigned long __kir; \ + __asm__(".arch ev67; ctlz %1,%0" : "=r"(__kir) : "r"(x)); \ + __kir; }) +# define __kernel_ctpop(x) \ + ({ unsigned long __kir; \ + __asm__(".arch ev67; ctpop %1,%0" : "=r"(__kir) : "r"(x)); \ + __kir; }) +#endif + + +/* + * Beginning with EGCS 1.1, GCC defines __alpha_bwx__ when the BWX + * extension is enabled. Previous versions did not define anything + * we could test during compilation -- too bad, so sad. + */ + +#if defined(__alpha_bwx__) +#define __kernel_ldbu(mem) (mem) +#define __kernel_ldwu(mem) (mem) +#define __kernel_stb(val,mem) ((mem) = (val)) +#define __kernel_stw(val,mem) ((mem) = (val)) +#else +#define __kernel_ldbu(mem) \ + ({ unsigned char __kir; \ + __asm__(".arch ev56; \ + ldbu %0,%1" : "=r"(__kir) : "m"(mem)); \ + __kir; }) +#define __kernel_ldwu(mem) \ + ({ unsigned short __kir; \ + __asm__(".arch ev56; \ + ldwu %0,%1" : "=r"(__kir) : "m"(mem)); \ + __kir; }) +#define __kernel_stb(val,mem) \ + __asm__(".arch ev56; \ + stb %1,%0" : "=m"(mem) : "r"(val)) +#define __kernel_stw(val,mem) \ + __asm__(".arch ev56; \ + stw %1,%0" : "=m"(mem) : "r"(val)) +#endif + +#ifdef __KERNEL__ +/* Some idiots over in <linux/compiler.h> thought inline should imply + always_inline. This breaks stuff. We'll include this file whenever + we run into such problems. */ + +#include <linux/compiler.h> +#undef inline +#undef __inline__ +#undef __inline +#undef __always_inline +#define __always_inline inline __attribute__((always_inline)) + +#endif /* __KERNEL__ */ + +#endif /* __ALPHA_COMPILER_H */ diff --git a/arch/alpha/include/asm/console.h b/arch/alpha/include/asm/console.h new file mode 100644 index 00000000..a3ce4e62 --- /dev/null +++ b/arch/alpha/include/asm/console.h @@ -0,0 +1,75 @@ +#ifndef __AXP_CONSOLE_H +#define __AXP_CONSOLE_H + +/* + * Console callback routine numbers + */ +#define CCB_GETC 0x01 +#define CCB_PUTS 0x02 +#define CCB_RESET_TERM 0x03 +#define CCB_SET_TERM_INT 0x04 +#define CCB_SET_TERM_CTL 0x05 +#define CCB_PROCESS_KEYCODE 0x06 +#define CCB_OPEN_CONSOLE 0x07 +#define CCB_CLOSE_CONSOLE 0x08 + +#define CCB_OPEN 0x10 +#define CCB_CLOSE 0x11 +#define CCB_IOCTL 0x12 +#define CCB_READ 0x13 +#define CCB_WRITE 0x14 + +#define CCB_SET_ENV 0x20 +#define CCB_RESET_ENV 0x21 +#define CCB_GET_ENV 0x22 +#define CCB_SAVE_ENV 0x23 + +#define CCB_PSWITCH 0x30 +#define CCB_BIOS_EMUL 0x32 + +/* + * Environment variable numbers + */ +#define ENV_AUTO_ACTION 0x01 +#define ENV_BOOT_DEV 0x02 +#define ENV_BOOTDEF_DEV 0x03 +#define ENV_BOOTED_DEV 0x04 +#define ENV_BOOT_FILE 0x05 +#define ENV_BOOTED_FILE 0x06 +#define ENV_BOOT_OSFLAGS 0x07 +#define ENV_BOOTED_OSFLAGS 0x08 +#define ENV_BOOT_RESET 0x09 +#define ENV_DUMP_DEV 0x0A +#define ENV_ENABLE_AUDIT 0x0B +#define ENV_LICENSE 0x0C +#define ENV_CHAR_SET 0x0D +#define ENV_LANGUAGE 0x0E +#define ENV_TTY_DEV 0x0F + +#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ +extern long callback_puts(long unit, const char *s, long length); +extern long callback_getc(long unit); +extern long callback_open_console(void); +extern long callback_close_console(void); +extern long callback_open(const char *device, long length); +extern long callback_close(long unit); +extern long callback_read(long channel, long count, const char *buf, long lbn); +extern long callback_getenv(long id, const char *buf, unsigned long buf_size); +extern long callback_setenv(long id, const char *buf, unsigned long buf_size); +extern long callback_save_env(void); + +extern int srm_fixup(unsigned long new_callback_addr, + unsigned long new_hwrpb_addr); +extern long srm_puts(const char *, long); +extern long srm_printk(const char *, ...) + __attribute__ ((format (printf, 1, 2))); + +struct crb_struct; +struct hwrpb_struct; +extern int callback_init_done; +extern void * callback_init(void *); +#endif /* __ASSEMBLY__ */ +#endif /* __KERNEL__ */ + +#endif /* __AXP_CONSOLE_H */ diff --git a/arch/alpha/include/asm/core_apecs.h b/arch/alpha/include/asm/core_apecs.h new file mode 100644 index 00000000..6785ff7e --- /dev/null +++ b/arch/alpha/include/asm/core_apecs.h @@ -0,0 +1,517 @@ +#ifndef __ALPHA_APECS__H__ +#define __ALPHA_APECS__H__ + +#include <linux/types.h> +#include <asm/compiler.h> + +/* + * APECS is the internal name for the 2107x chipset which provides + * memory controller and PCI access for the 21064 chip based systems. + * + * This file is based on: + * + * DECchip 21071-AA and DECchip 21072-AA Core Logic Chipsets + * Data Sheet + * + * EC-N0648-72 + * + * + * david.rusling@reo.mts.dec.com Initial Version. + * + */ + +/* + An AVANTI *might* be an XL, and an XL has only 27 bits of ISA address + that get passed through the PCI<->ISA bridge chip. So we've gotta use + both windows to max out the physical memory we can DMA to. Sigh... + + If we try a window at 0 for 1GB as a work-around, we run into conflicts + with ISA/PCI bus memory which can't be relocated, like VGA aperture and + BIOS ROMs. So we must put the windows high enough to avoid these areas. + + We put window 1 at BUS 64Mb for 64Mb, mapping physical 0 to 64Mb-1, + and window 2 at BUS 1Gb for 1Gb, mapping physical 0 to 1Gb-1. + Yes, this does map 0 to 64Mb-1 twice, but only window 1 will actually + be used for that range (via virt_to_bus()). + + Note that we actually fudge the window 1 maximum as 48Mb instead of 64Mb, + to keep virt_to_bus() from returning an address in the first window, for + a data area that goes beyond the 64Mb first DMA window. Sigh... + The fudge factor MUST match with <asm/dma.h> MAX_DMA_ADDRESS, but + we can't just use that here, because of header file looping... :-( + + Window 1 will be used for all DMA from the ISA bus; yes, that does + limit what memory an ISA floppy or sound card or Ethernet can touch, but + it's also a known limitation on other platforms as well. We use the + same technique that is used on INTEL platforms with similar limitation: + set MAX_DMA_ADDRESS and clear some pages' DMAable flags during mem_init(). + We trust that any ISA bus device drivers will *always* ask for DMAable + memory explicitly via kmalloc()/get_free_pages() flags arguments. + + Note that most PCI bus devices' drivers do *not* explicitly ask for + DMAable memory; they count on being able to DMA to any memory they + get from kmalloc()/get_free_pages(). They will also use window 1 for + any physical memory accesses below 64Mb; the rest will be handled by + window 2, maxing out at 1Gb of memory. I trust this is enough... :-) + + We hope that the area before the first window is large enough so that + there will be no overlap at the top end (64Mb). We *must* locate the + PCI cards' memory just below window 1, so that there's still the + possibility of being able to access it via SPARSE space. This is + important for cards such as the Matrox Millennium, whose Xserver + wants to access memory-mapped registers in byte and short lengths. + + Note that the XL is treated differently from the AVANTI, even though + for most other things they are identical. It didn't seem reasonable to + make the AVANTI support pay for the limitations of the XL. It is true, + however, that an XL kernel will run on an AVANTI without problems. + + %%% All of this should be obviated by the ability to route + everything through the iommu. +*/ + +/* + * 21071-DA Control and Status registers. + * These are used for PCI memory access. + */ +#define APECS_IOC_DCSR (IDENT_ADDR + 0x1A0000000UL) +#define APECS_IOC_PEAR (IDENT_ADDR + 0x1A0000020UL) +#define APECS_IOC_SEAR (IDENT_ADDR + 0x1A0000040UL) +#define APECS_IOC_DR1 (IDENT_ADDR + 0x1A0000060UL) +#define APECS_IOC_DR2 (IDENT_ADDR + 0x1A0000080UL) +#define APECS_IOC_DR3 (IDENT_ADDR + 0x1A00000A0UL) + +#define APECS_IOC_TB1R (IDENT_ADDR + 0x1A00000C0UL) +#define APECS_IOC_TB2R (IDENT_ADDR + 0x1A00000E0UL) + +#define APECS_IOC_PB1R (IDENT_ADDR + 0x1A0000100UL) +#define APECS_IOC_PB2R (IDENT_ADDR + 0x1A0000120UL) + +#define APECS_IOC_PM1R (IDENT_ADDR + 0x1A0000140UL) +#define APECS_IOC_PM2R (IDENT_ADDR + 0x1A0000160UL) + +#define APECS_IOC_HAXR0 (IDENT_ADDR + 0x1A0000180UL) +#define APECS_IOC_HAXR1 (IDENT_ADDR + 0x1A00001A0UL) +#define APECS_IOC_HAXR2 (IDENT_ADDR + 0x1A00001C0UL) + +#define APECS_IOC_PMLT (IDENT_ADDR + 0x1A00001E0UL) + +#define APECS_IOC_TLBTAG0 (IDENT_ADDR + 0x1A0000200UL) +#define APECS_IOC_TLBTAG1 (IDENT_ADDR + 0x1A0000220UL) +#define APECS_IOC_TLBTAG2 (IDENT_ADDR + 0x1A0000240UL) +#define APECS_IOC_TLBTAG3 (IDENT_ADDR + 0x1A0000260UL) +#define APECS_IOC_TLBTAG4 (IDENT_ADDR + 0x1A0000280UL) +#define APECS_IOC_TLBTAG5 (IDENT_ADDR + 0x1A00002A0UL) +#define APECS_IOC_TLBTAG6 (IDENT_ADDR + 0x1A00002C0UL) +#define APECS_IOC_TLBTAG7 (IDENT_ADDR + 0x1A00002E0UL) + +#define APECS_IOC_TLBDATA0 (IDENT_ADDR + 0x1A0000300UL) +#define APECS_IOC_TLBDATA1 (IDENT_ADDR + 0x1A0000320UL) +#define APECS_IOC_TLBDATA2 (IDENT_ADDR + 0x1A0000340UL) +#define APECS_IOC_TLBDATA3 (IDENT_ADDR + 0x1A0000360UL) +#define APECS_IOC_TLBDATA4 (IDENT_ADDR + 0x1A0000380UL) +#define APECS_IOC_TLBDATA5 (IDENT_ADDR + 0x1A00003A0UL) +#define APECS_IOC_TLBDATA6 (IDENT_ADDR + 0x1A00003C0UL) +#define APECS_IOC_TLBDATA7 (IDENT_ADDR + 0x1A00003E0UL) + +#define APECS_IOC_TBIA (IDENT_ADDR + 0x1A0000400UL) + + +/* + * 21071-CA Control and Status registers. + * These are used to program memory timing, + * configure memory and initialise the B-Cache. + */ +#define APECS_MEM_GCR (IDENT_ADDR + 0x180000000UL) +#define APECS_MEM_EDSR (IDENT_ADDR + 0x180000040UL) +#define APECS_MEM_TAR (IDENT_ADDR + 0x180000060UL) +#define APECS_MEM_ELAR (IDENT_ADDR + 0x180000080UL) +#define APECS_MEM_EHAR (IDENT_ADDR + 0x1800000a0UL) +#define APECS_MEM_SFT_RST (IDENT_ADDR + 0x1800000c0UL) +#define APECS_MEM_LDxLAR (IDENT_ADDR + 0x1800000e0UL) +#define APECS_MEM_LDxHAR (IDENT_ADDR + 0x180000100UL) +#define APECS_MEM_GTR (IDENT_ADDR + 0x180000200UL) +#define APECS_MEM_RTR (IDENT_ADDR + 0x180000220UL) +#define APECS_MEM_VFPR (IDENT_ADDR + 0x180000240UL) +#define APECS_MEM_PDLDR (IDENT_ADDR + 0x180000260UL) +#define APECS_MEM_PDhDR (IDENT_ADDR + 0x180000280UL) + +/* Bank x Base Address Register */ +#define APECS_MEM_B0BAR (IDENT_ADDR + 0x180000800UL) +#define APECS_MEM_B1BAR (IDENT_ADDR + 0x180000820UL) +#define APECS_MEM_B2BAR (IDENT_ADDR + 0x180000840UL) +#define APECS_MEM_B3BAR (IDENT_ADDR + 0x180000860UL) +#define APECS_MEM_B4BAR (IDENT_ADDR + 0x180000880UL) +#define APECS_MEM_B5BAR (IDENT_ADDR + 0x1800008A0UL) +#define APECS_MEM_B6BAR (IDENT_ADDR + 0x1800008C0UL) +#define APECS_MEM_B7BAR (IDENT_ADDR + 0x1800008E0UL) +#define APECS_MEM_B8BAR (IDENT_ADDR + 0x180000900UL) + +/* Bank x Configuration Register */ +#define APECS_MEM_B0BCR (IDENT_ADDR + 0x180000A00UL) +#define APECS_MEM_B1BCR (IDENT_ADDR + 0x180000A20UL) +#define APECS_MEM_B2BCR (IDENT_ADDR + 0x180000A40UL) +#define APECS_MEM_B3BCR (IDENT_ADDR + 0x180000A60UL) +#define APECS_MEM_B4BCR (IDENT_ADDR + 0x180000A80UL) +#define APECS_MEM_B5BCR (IDENT_ADDR + 0x180000AA0UL) +#define APECS_MEM_B6BCR (IDENT_ADDR + 0x180000AC0UL) +#define APECS_MEM_B7BCR (IDENT_ADDR + 0x180000AE0UL) +#define APECS_MEM_B8BCR (IDENT_ADDR + 0x180000B00UL) + +/* Bank x Timing Register A */ +#define APECS_MEM_B0TRA (IDENT_ADDR + 0x180000C00UL) +#define APECS_MEM_B1TRA (IDENT_ADDR + 0x180000C20UL) +#define APECS_MEM_B2TRA (IDENT_ADDR + 0x180000C40UL) +#define APECS_MEM_B3TRA (IDENT_ADDR + 0x180000C60UL) +#define APECS_MEM_B4TRA (IDENT_ADDR + 0x180000C80UL) +#define APECS_MEM_B5TRA (IDENT_ADDR + 0x180000CA0UL) +#define APECS_MEM_B6TRA (IDENT_ADDR + 0x180000CC0UL) +#define APECS_MEM_B7TRA (IDENT_ADDR + 0x180000CE0UL) +#define APECS_MEM_B8TRA (IDENT_ADDR + 0x180000D00UL) + +/* Bank x Timing Register B */ +#define APECS_MEM_B0TRB (IDENT_ADDR + 0x180000E00UL) +#define APECS_MEM_B1TRB (IDENT_ADDR + 0x180000E20UL) +#define APECS_MEM_B2TRB (IDENT_ADDR + 0x180000E40UL) +#define APECS_MEM_B3TRB (IDENT_ADDR + 0x180000E60UL) +#define APECS_MEM_B4TRB (IDENT_ADDR + 0x180000E80UL) +#define APECS_MEM_B5TRB (IDENT_ADDR + 0x180000EA0UL) +#define APECS_MEM_B6TRB (IDENT_ADDR + 0x180000EC0UL) +#define APECS_MEM_B7TRB (IDENT_ADDR + 0x180000EE0UL) +#define APECS_MEM_B8TRB (IDENT_ADDR + 0x180000F00UL) + + +/* + * Memory spaces: + */ +#define APECS_IACK_SC (IDENT_ADDR + 0x1b0000000UL) +#define APECS_CONF (IDENT_ADDR + 0x1e0000000UL) +#define APECS_IO (IDENT_ADDR + 0x1c0000000UL) +#define APECS_SPARSE_MEM (IDENT_ADDR + 0x200000000UL) +#define APECS_DENSE_MEM (IDENT_ADDR + 0x300000000UL) + + +/* + * Bit definitions for I/O Controller status register 0: + */ +#define APECS_IOC_STAT0_CMD 0xf +#define APECS_IOC_STAT0_ERR (1<<4) +#define APECS_IOC_STAT0_LOST (1<<5) +#define APECS_IOC_STAT0_THIT (1<<6) +#define APECS_IOC_STAT0_TREF (1<<7) +#define APECS_IOC_STAT0_CODE_SHIFT 8 +#define APECS_IOC_STAT0_CODE_MASK 0x7 +#define APECS_IOC_STAT0_P_NBR_SHIFT 13 +#define APECS_IOC_STAT0_P_NBR_MASK 0x7ffff + +#define APECS_HAE_ADDRESS APECS_IOC_HAXR1 + + +/* + * Data structure for handling APECS machine checks: + */ + +struct el_apecs_mikasa_sysdata_mcheck +{ + unsigned long coma_gcr; + unsigned long coma_edsr; + unsigned long coma_ter; + unsigned long coma_elar; + unsigned long coma_ehar; + unsigned long coma_ldlr; + unsigned long coma_ldhr; + unsigned long coma_base0; + unsigned long coma_base1; + unsigned long coma_base2; + unsigned long coma_base3; + unsigned long coma_cnfg0; + unsigned long coma_cnfg1; + unsigned long coma_cnfg2; + unsigned long coma_cnfg3; + unsigned long epic_dcsr; + unsigned long epic_pear; + unsigned long epic_sear; + unsigned long epic_tbr1; + unsigned long epic_tbr2; + unsigned long epic_pbr1; + unsigned long epic_pbr2; + unsigned long epic_pmr1; + unsigned long epic_pmr2; + unsigned long epic_harx1; + unsigned long epic_harx2; + unsigned long epic_pmlt; + unsigned long epic_tag0; + unsigned long epic_tag1; + unsigned long epic_tag2; + unsigned long epic_tag3; + unsigned long epic_tag4; + unsigned long epic_tag5; + unsigned long epic_tag6; + unsigned long epic_tag7; + unsigned long epic_data0; + unsigned long epic_data1; + unsigned long epic_data2; + unsigned long epic_data3; + unsigned long epic_data4; + unsigned long epic_data5; + unsigned long epic_data6; + unsigned long epic_data7; + + unsigned long pceb_vid; + unsigned long pceb_did; + unsigned long pceb_revision; + unsigned long pceb_command; + unsigned long pceb_status; + unsigned long pceb_latency; + unsigned long pceb_control; + unsigned long pceb_arbcon; + unsigned long pceb_arbpri; + + unsigned long esc_id; + unsigned long esc_revision; + unsigned long esc_int0; + unsigned long esc_int1; + unsigned long esc_elcr0; + unsigned long esc_elcr1; + unsigned long esc_last_eisa; + unsigned long esc_nmi_stat; + + unsigned long pci_ir; + unsigned long pci_imr; + unsigned long svr_mgr; +}; + +/* This for the normal APECS machines. */ +struct el_apecs_sysdata_mcheck +{ + unsigned long coma_gcr; + unsigned long coma_edsr; + unsigned long coma_ter; + unsigned long coma_elar; + unsigned long coma_ehar; + unsigned long coma_ldlr; + unsigned long coma_ldhr; + unsigned long coma_base0; + unsigned long coma_base1; + unsigned long coma_base2; + unsigned long coma_cnfg0; + unsigned long coma_cnfg1; + unsigned long coma_cnfg2; + unsigned long epic_dcsr; + unsigned long epic_pear; + unsigned long epic_sear; + unsigned long epic_tbr1; + unsigned long epic_tbr2; + unsigned long epic_pbr1; + unsigned long epic_pbr2; + unsigned long epic_pmr1; + unsigned long epic_pmr2; + unsigned long epic_harx1; + unsigned long epic_harx2; + unsigned long epic_pmlt; + unsigned long epic_tag0; + unsigned long epic_tag1; + unsigned long epic_tag2; + unsigned long epic_tag3; + unsigned long epic_tag4; + unsigned long epic_tag5; + unsigned long epic_tag6; + unsigned long epic_tag7; + unsigned long epic_data0; + unsigned long epic_data1; + unsigned long epic_data2; + unsigned long epic_data3; + unsigned long epic_data4; + unsigned long epic_data5; + unsigned long epic_data6; + unsigned long epic_data7; +}; + +struct el_apecs_procdata +{ + unsigned long paltemp[32]; /* PAL TEMP REGS. */ + /* EV4-specific fields */ + unsigned long exc_addr; /* Address of excepting instruction. */ + unsigned long exc_sum; /* Summary of arithmetic traps. */ + unsigned long exc_mask; /* Exception mask (from exc_sum). */ + unsigned long iccsr; /* IBox hardware enables. */ + unsigned long pal_base; /* Base address for PALcode. */ + unsigned long hier; /* Hardware Interrupt Enable. */ + unsigned long hirr; /* Hardware Interrupt Request. */ + unsigned long csr; /* D-stream fault info. */ + unsigned long dc_stat; /* D-cache status (ECC/Parity Err). */ + unsigned long dc_addr; /* EV3 Phys Addr for ECC/DPERR. */ + unsigned long abox_ctl; /* ABox Control Register. */ + unsigned long biu_stat; /* BIU Status. */ + unsigned long biu_addr; /* BUI Address. */ + unsigned long biu_ctl; /* BIU Control. */ + unsigned long fill_syndrome;/* For correcting ECC errors. */ + unsigned long fill_addr; /* Cache block which was being read */ + unsigned long va; /* Effective VA of fault or miss. */ + unsigned long bc_tag; /* Backup Cache Tag Probe Results.*/ +}; + + +#ifdef __KERNEL__ + +#ifndef __EXTERN_INLINE +#define __EXTERN_INLINE extern inline +#define __IO_EXTERN_INLINE +#endif + +/* + * I/O functions: + * + * Unlike Jensen, the APECS machines have no concept of local + * I/O---everything goes over the PCI bus. + * + * There is plenty room for optimization here. In particular, + * the Alpha's insb/insw/extb/extw should be useful in moving + * data to/from the right byte-lanes. + */ + +#define vip volatile int __force * +#define vuip volatile unsigned int __force * +#define vulp volatile unsigned long __force * + +#define APECS_SET_HAE \ + do { \ + if (addr >= (1UL << 24)) { \ + unsigned long msb = addr & 0xf8000000; \ + addr -= msb; \ + set_hae(msb); \ + } \ + } while (0) + +__EXTERN_INLINE unsigned int apecs_ioread8(void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + unsigned long result, base_and_type; + + if (addr >= APECS_DENSE_MEM) { + addr -= APECS_DENSE_MEM; + APECS_SET_HAE; + base_and_type = APECS_SPARSE_MEM + 0x00; + } else { + addr -= APECS_IO; + base_and_type = APECS_IO + 0x00; + } + + result = *(vip) ((addr << 5) + base_and_type); + return __kernel_extbl(result, addr & 3); +} + +__EXTERN_INLINE void apecs_iowrite8(u8 b, void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + unsigned long w, base_and_type; + + if (addr >= APECS_DENSE_MEM) { + addr -= APECS_DENSE_MEM; + APECS_SET_HAE; + base_and_type = APECS_SPARSE_MEM + 0x00; + } else { + addr -= APECS_IO; + base_and_type = APECS_IO + 0x00; + } + + w = __kernel_insbl(b, addr & 3); + *(vuip) ((addr << 5) + base_and_type) = w; +} + +__EXTERN_INLINE unsigned int apecs_ioread16(void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + unsigned long result, base_and_type; + + if (addr >= APECS_DENSE_MEM) { + addr -= APECS_DENSE_MEM; + APECS_SET_HAE; + base_and_type = APECS_SPARSE_MEM + 0x08; + } else { + addr -= APECS_IO; + base_and_type = APECS_IO + 0x08; + } + + result = *(vip) ((addr << 5) + base_and_type); + return __kernel_extwl(result, addr & 3); +} + +__EXTERN_INLINE void apecs_iowrite16(u16 b, void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + unsigned long w, base_and_type; + + if (addr >= APECS_DENSE_MEM) { + addr -= APECS_DENSE_MEM; + APECS_SET_HAE; + base_and_type = APECS_SPARSE_MEM + 0x08; + } else { + addr -= APECS_IO; + base_and_type = APECS_IO + 0x08; + } + + w = __kernel_inswl(b, addr & 3); + *(vuip) ((addr << 5) + base_and_type) = w; +} + +__EXTERN_INLINE unsigned int apecs_ioread32(void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + if (addr < APECS_DENSE_MEM) + addr = ((addr - APECS_IO) << 5) + APECS_IO + 0x18; + return *(vuip)addr; +} + +__EXTERN_INLINE void apecs_iowrite32(u32 b, void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + if (addr < APECS_DENSE_MEM) + addr = ((addr - APECS_IO) << 5) + APECS_IO + 0x18; + *(vuip)addr = b; +} + +__EXTERN_INLINE void __iomem *apecs_ioportmap(unsigned long addr) +{ + return (void __iomem *)(addr + APECS_IO); +} + +__EXTERN_INLINE void __iomem *apecs_ioremap(unsigned long addr, + unsigned long size) +{ + return (void __iomem *)(addr + APECS_DENSE_MEM); +} + +__EXTERN_INLINE int apecs_is_ioaddr(unsigned long addr) +{ + return addr >= IDENT_ADDR + 0x180000000UL; +} + +__EXTERN_INLINE int apecs_is_mmio(const volatile void __iomem *addr) +{ + return (unsigned long)addr >= APECS_DENSE_MEM; +} + +#undef APECS_SET_HAE + +#undef vip +#undef vuip +#undef vulp + +#undef __IO_PREFIX +#define __IO_PREFIX apecs +#define apecs_trivial_io_bw 0 +#define apecs_trivial_io_lq 0 +#define apecs_trivial_rw_bw 2 +#define apecs_trivial_rw_lq 1 +#define apecs_trivial_iounmap 1 +#include <asm/io_trivial.h> + +#ifdef __IO_EXTERN_INLINE +#undef __EXTERN_INLINE +#undef __IO_EXTERN_INLINE +#endif + +#endif /* __KERNEL__ */ + +#endif /* __ALPHA_APECS__H__ */ diff --git a/arch/alpha/include/asm/core_cia.h b/arch/alpha/include/asm/core_cia.h new file mode 100644 index 00000000..9e0516c0 --- /dev/null +++ b/arch/alpha/include/asm/core_cia.h @@ -0,0 +1,500 @@ +#ifndef __ALPHA_CIA__H__ +#define __ALPHA_CIA__H__ + +/* Define to experiment with fitting everything into one 512MB HAE window. */ +#define CIA_ONE_HAE_WINDOW 1 + +#include <linux/types.h> +#include <asm/compiler.h> + +/* + * CIA is the internal name for the 21171 chipset which provides + * memory controller and PCI access for the 21164 chip based systems. + * Also supported here is the 21172 (CIA-2) and 21174 (PYXIS). + * + * The lineage is a bit confused, since the 21174 was reportedly started + * from the 21171 Pass 1 mask, and so is missing bug fixes that appear + * in 21171 Pass 2 and 21172, but it also contains additional features. + * + * This file is based on: + * + * DECchip 21171 Core Logic Chipset + * Technical Reference Manual + * + * EC-QE18B-TE + * + * david.rusling@reo.mts.dec.com Initial Version. + * + */ + +/* + * CIA ADDRESS BIT DEFINITIONS + * + * 3333 3333 3322 2222 2222 1111 1111 11 + * 9876 5432 1098 7654 3210 9876 5432 1098 7654 3210 + * ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- + * 1 000 + * ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- + * | |\| + * | Byte Enable --+ | + * | Transfer Length --+ + * +-- IO space, not cached + * + * Byte Transfer + * Enable Length Transfer Byte Address + * adr<6:5> adr<4:3> Length Enable Adder + * --------------------------------------------- + * 00 00 Byte 1110 0x000 + * 01 00 Byte 1101 0x020 + * 10 00 Byte 1011 0x040 + * 11 00 Byte 0111 0x060 + * + * 00 01 Word 1100 0x008 + * 01 01 Word 1001 0x028 <= Not supported in this code. + * 10 01 Word 0011 0x048 + * + * 00 10 Tribyte 1000 0x010 + * 01 10 Tribyte 0001 0x030 + * + * 10 11 Longword 0000 0x058 + * + * Note that byte enables are asserted low. + * + */ + +#define CIA_MEM_R1_MASK 0x1fffffff /* SPARSE Mem region 1 mask is 29 bits */ +#define CIA_MEM_R2_MASK 0x07ffffff /* SPARSE Mem region 2 mask is 27 bits */ +#define CIA_MEM_R3_MASK 0x03ffffff /* SPARSE Mem region 3 mask is 26 bits */ + +/* + * 21171-CA Control and Status Registers + */ +#define CIA_IOC_CIA_REV (IDENT_ADDR + 0x8740000080UL) +# define CIA_REV_MASK 0xff +#define CIA_IOC_PCI_LAT (IDENT_ADDR + 0x87400000C0UL) +#define CIA_IOC_CIA_CTRL (IDENT_ADDR + 0x8740000100UL) +# define CIA_CTRL_PCI_EN (1 << 0) +# define CIA_CTRL_PCI_LOCK_EN (1 << 1) +# define CIA_CTRL_PCI_LOOP_EN (1 << 2) +# define CIA_CTRL_FST_BB_EN (1 << 3) +# define CIA_CTRL_PCI_MST_EN (1 << 4) +# define CIA_CTRL_PCI_MEM_EN (1 << 5) +# define CIA_CTRL_PCI_REQ64_EN (1 << 6) +# define CIA_CTRL_PCI_ACK64_EN (1 << 7) +# define CIA_CTRL_ADDR_PE_EN (1 << 8) +# define CIA_CTRL_PERR_EN (1 << 9) +# define CIA_CTRL_FILL_ERR_EN (1 << 10) +# define CIA_CTRL_MCHK_ERR_EN (1 << 11) +# define CIA_CTRL_ECC_CHK_EN (1 << 12) +# define CIA_CTRL_ASSERT_IDLE_BC (1 << 13) +# define CIA_CTRL_COM_IDLE_BC (1 << 14) +# define CIA_CTRL_CSR_IOA_BYPASS (1 << 15) +# define CIA_CTRL_IO_FLUSHREQ_EN (1 << 16) +# define CIA_CTRL_CPU_FLUSHREQ_EN (1 << 17) +# define CIA_CTRL_ARB_CPU_EN (1 << 18) +# define CIA_CTRL_EN_ARB_LINK (1 << 19) +# define CIA_CTRL_RD_TYPE_SHIFT 20 +# define CIA_CTRL_RL_TYPE_SHIFT 24 +# define CIA_CTRL_RM_TYPE_SHIFT 28 +# define CIA_CTRL_EN_DMA_RD_PERF (1 << 31) +#define CIA_IOC_CIA_CNFG (IDENT_ADDR + 0x8740000140UL) +# define CIA_CNFG_IOA_BWEN (1 << 0) +# define CIA_CNFG_PCI_MWEN (1 << 4) +# define CIA_CNFG_PCI_DWEN (1 << 5) +# define CIA_CNFG_PCI_WLEN (1 << 8) +#define CIA_IOC_FLASH_CTRL (IDENT_ADDR + 0x8740000200UL) +#define CIA_IOC_HAE_MEM (IDENT_ADDR + 0x8740000400UL) +#define CIA_IOC_HAE_IO (IDENT_ADDR + 0x8740000440UL) +#define CIA_IOC_CFG (IDENT_ADDR + 0x8740000480UL) +#define CIA_IOC_CACK_EN (IDENT_ADDR + 0x8740000600UL) +# define CIA_CACK_EN_LOCK_EN (1 << 0) +# define CIA_CACK_EN_MB_EN (1 << 1) +# define CIA_CACK_EN_SET_DIRTY_EN (1 << 2) +# define CIA_CACK_EN_BC_VICTIM_EN (1 << 3) + + +/* + * 21171-CA Diagnostic Registers + */ +#define CIA_IOC_CIA_DIAG (IDENT_ADDR + 0x8740002000UL) +#define CIA_IOC_DIAG_CHECK (IDENT_ADDR + 0x8740003000UL) + +/* + * 21171-CA Performance Monitor registers + */ +#define CIA_IOC_PERF_MONITOR (IDENT_ADDR + 0x8740004000UL) +#define CIA_IOC_PERF_CONTROL (IDENT_ADDR + 0x8740004040UL) + +/* + * 21171-CA Error registers + */ +#define CIA_IOC_CPU_ERR0 (IDENT_ADDR + 0x8740008000UL) +#define CIA_IOC_CPU_ERR1 (IDENT_ADDR + 0x8740008040UL) +#define CIA_IOC_CIA_ERR (IDENT_ADDR + 0x8740008200UL) +# define CIA_ERR_COR_ERR (1 << 0) +# define CIA_ERR_UN_COR_ERR (1 << 1) +# define CIA_ERR_CPU_PE (1 << 2) +# define CIA_ERR_MEM_NEM (1 << 3) +# define CIA_ERR_PCI_SERR (1 << 4) +# define CIA_ERR_PERR (1 << 5) +# define CIA_ERR_PCI_ADDR_PE (1 << 6) +# define CIA_ERR_RCVD_MAS_ABT (1 << 7) +# define CIA_ERR_RCVD_TAR_ABT (1 << 8) +# define CIA_ERR_PA_PTE_INV (1 << 9) +# define CIA_ERR_FROM_WRT_ERR (1 << 10) +# define CIA_ERR_IOA_TIMEOUT (1 << 11) +# define CIA_ERR_LOST_CORR_ERR (1 << 16) +# define CIA_ERR_LOST_UN_CORR_ERR (1 << 17) +# define CIA_ERR_LOST_CPU_PE (1 << 18) +# define CIA_ERR_LOST_MEM_NEM (1 << 19) +# define CIA_ERR_LOST_PERR (1 << 21) +# define CIA_ERR_LOST_PCI_ADDR_PE (1 << 22) +# define CIA_ERR_LOST_RCVD_MAS_ABT (1 << 23) +# define CIA_ERR_LOST_RCVD_TAR_ABT (1 << 24) +# define CIA_ERR_LOST_PA_PTE_INV (1 << 25) +# define CIA_ERR_LOST_FROM_WRT_ERR (1 << 26) +# define CIA_ERR_LOST_IOA_TIMEOUT (1 << 27) +# define CIA_ERR_VALID (1 << 31) +#define CIA_IOC_CIA_STAT (IDENT_ADDR + 0x8740008240UL) +#define CIA_IOC_ERR_MASK (IDENT_ADDR + 0x8740008280UL) +#define CIA_IOC_CIA_SYN (IDENT_ADDR + 0x8740008300UL) +#define CIA_IOC_MEM_ERR0 (IDENT_ADDR + 0x8740008400UL) +#define CIA_IOC_MEM_ERR1 (IDENT_ADDR + 0x8740008440UL) +#define CIA_IOC_PCI_ERR0 (IDENT_ADDR + 0x8740008800UL) +#define CIA_IOC_PCI_ERR1 (IDENT_ADDR + 0x8740008840UL) +#define CIA_IOC_PCI_ERR3 (IDENT_ADDR + 0x8740008880UL) + +/* + * 21171-CA System configuration registers + */ +#define CIA_IOC_MCR (IDENT_ADDR + 0x8750000000UL) +#define CIA_IOC_MBA0 (IDENT_ADDR + 0x8750000600UL) +#define CIA_IOC_MBA2 (IDENT_ADDR + 0x8750000680UL) +#define CIA_IOC_MBA4 (IDENT_ADDR + 0x8750000700UL) +#define CIA_IOC_MBA6 (IDENT_ADDR + 0x8750000780UL) +#define CIA_IOC_MBA8 (IDENT_ADDR + 0x8750000800UL) +#define CIA_IOC_MBAA (IDENT_ADDR + 0x8750000880UL) +#define CIA_IOC_MBAC (IDENT_ADDR + 0x8750000900UL) +#define CIA_IOC_MBAE (IDENT_ADDR + 0x8750000980UL) +#define CIA_IOC_TMG0 (IDENT_ADDR + 0x8750000B00UL) +#define CIA_IOC_TMG1 (IDENT_ADDR + 0x8750000B40UL) +#define CIA_IOC_TMG2 (IDENT_ADDR + 0x8750000B80UL) + +/* + * 2117A-CA PCI Address and Scatter-Gather Registers. + */ +#define CIA_IOC_PCI_TBIA (IDENT_ADDR + 0x8760000100UL) + +#define CIA_IOC_PCI_W0_BASE (IDENT_ADDR + 0x8760000400UL) +#define CIA_IOC_PCI_W0_MASK (IDENT_ADDR + 0x8760000440UL) +#define CIA_IOC_PCI_T0_BASE (IDENT_ADDR + 0x8760000480UL) + +#define CIA_IOC_PCI_W1_BASE (IDENT_ADDR + 0x8760000500UL) +#define CIA_IOC_PCI_W1_MASK (IDENT_ADDR + 0x8760000540UL) +#define CIA_IOC_PCI_T1_BASE (IDENT_ADDR + 0x8760000580UL) + +#define CIA_IOC_PCI_W2_BASE (IDENT_ADDR + 0x8760000600UL) +#define CIA_IOC_PCI_W2_MASK (IDENT_ADDR + 0x8760000640UL) +#define CIA_IOC_PCI_T2_BASE (IDENT_ADDR + 0x8760000680UL) + +#define CIA_IOC_PCI_W3_BASE (IDENT_ADDR + 0x8760000700UL) +#define CIA_IOC_PCI_W3_MASK (IDENT_ADDR + 0x8760000740UL) +#define CIA_IOC_PCI_T3_BASE (IDENT_ADDR + 0x8760000780UL) + +#define CIA_IOC_PCI_Wn_BASE(N) (IDENT_ADDR + 0x8760000400UL + (N)*0x100) +#define CIA_IOC_PCI_Wn_MASK(N) (IDENT_ADDR + 0x8760000440UL + (N)*0x100) +#define CIA_IOC_PCI_Tn_BASE(N) (IDENT_ADDR + 0x8760000480UL + (N)*0x100) + +#define CIA_IOC_PCI_W_DAC (IDENT_ADDR + 0x87600007C0UL) + +/* + * 2117A-CA Address Translation Registers. + */ + +/* 8 tag registers, the first 4 of which are lockable. */ +#define CIA_IOC_TB_TAGn(n) \ + (IDENT_ADDR + 0x8760000800UL + (n)*0x40) + +/* 4 page registers per tag register. */ +#define CIA_IOC_TBn_PAGEm(n,m) \ + (IDENT_ADDR + 0x8760001000UL + (n)*0x100 + (m)*0x40) + +/* + * Memory spaces: + */ +#define CIA_IACK_SC (IDENT_ADDR + 0x8720000000UL) +#define CIA_CONF (IDENT_ADDR + 0x8700000000UL) +#define CIA_IO (IDENT_ADDR + 0x8580000000UL) +#define CIA_SPARSE_MEM (IDENT_ADDR + 0x8000000000UL) +#define CIA_SPARSE_MEM_R2 (IDENT_ADDR + 0x8400000000UL) +#define CIA_SPARSE_MEM_R3 (IDENT_ADDR + 0x8500000000UL) +#define CIA_DENSE_MEM (IDENT_ADDR + 0x8600000000UL) +#define CIA_BW_MEM (IDENT_ADDR + 0x8800000000UL) +#define CIA_BW_IO (IDENT_ADDR + 0x8900000000UL) +#define CIA_BW_CFG_0 (IDENT_ADDR + 0x8a00000000UL) +#define CIA_BW_CFG_1 (IDENT_ADDR + 0x8b00000000UL) + +/* + * ALCOR's GRU ASIC registers + */ +#define GRU_INT_REQ (IDENT_ADDR + 0x8780000000UL) +#define GRU_INT_MASK (IDENT_ADDR + 0x8780000040UL) +#define GRU_INT_EDGE (IDENT_ADDR + 0x8780000080UL) +#define GRU_INT_HILO (IDENT_ADDR + 0x87800000C0UL) +#define GRU_INT_CLEAR (IDENT_ADDR + 0x8780000100UL) + +#define GRU_CACHE_CNFG (IDENT_ADDR + 0x8780000200UL) +#define GRU_SCR (IDENT_ADDR + 0x8780000300UL) +#define GRU_LED (IDENT_ADDR + 0x8780000800UL) +#define GRU_RESET (IDENT_ADDR + 0x8780000900UL) + +#define ALCOR_GRU_INT_REQ_BITS 0x800fffffUL +#define XLT_GRU_INT_REQ_BITS 0x80003fffUL +#define GRU_INT_REQ_BITS (alpha_mv.sys.cia.gru_int_req_bits+0) + +/* + * PYXIS interrupt control registers + */ +#define PYXIS_INT_REQ (IDENT_ADDR + 0x87A0000000UL) +#define PYXIS_INT_MASK (IDENT_ADDR + 0x87A0000040UL) +#define PYXIS_INT_HILO (IDENT_ADDR + 0x87A00000C0UL) +#define PYXIS_INT_ROUTE (IDENT_ADDR + 0x87A0000140UL) +#define PYXIS_GPO (IDENT_ADDR + 0x87A0000180UL) +#define PYXIS_INT_CNFG (IDENT_ADDR + 0x87A00001C0UL) +#define PYXIS_RT_COUNT (IDENT_ADDR + 0x87A0000200UL) +#define PYXIS_INT_TIME (IDENT_ADDR + 0x87A0000240UL) +#define PYXIS_IIC_CTRL (IDENT_ADDR + 0x87A00002C0UL) +#define PYXIS_RESET (IDENT_ADDR + 0x8780000900UL) + +/* Offset between ram physical addresses and pci64 DAC bus addresses. */ +#define PYXIS_DAC_OFFSET (1UL << 40) + +/* + * Data structure for handling CIA machine checks. + */ + +/* System-specific info. */ +struct el_CIA_sysdata_mcheck { + unsigned long cpu_err0; + unsigned long cpu_err1; + unsigned long cia_err; + unsigned long cia_stat; + unsigned long err_mask; + unsigned long cia_syn; + unsigned long mem_err0; + unsigned long mem_err1; + unsigned long pci_err0; + unsigned long pci_err1; + unsigned long pci_err2; +}; + + +#ifdef __KERNEL__ + +#ifndef __EXTERN_INLINE +/* Do not touch, this should *NOT* be static inline */ +#define __EXTERN_INLINE extern inline +#define __IO_EXTERN_INLINE +#endif + +/* + * I/O functions: + * + * CIA (the 2117x PCI/memory support chipset for the EV5 (21164) + * series of processors uses a sparse address mapping scheme to + * get at PCI memory and I/O. + */ + +/* + * Memory functions. 64-bit and 32-bit accesses are done through + * dense memory space, everything else through sparse space. + * + * For reading and writing 8 and 16 bit quantities we need to + * go through one of the three sparse address mapping regions + * and use the HAE_MEM CSR to provide some bits of the address. + * The following few routines use only sparse address region 1 + * which gives 1Gbyte of accessible space which relates exactly + * to the amount of PCI memory mapping *into* system address space. + * See p 6-17 of the specification but it looks something like this: + * + * 21164 Address: + * + * 3 2 1 + * 9876543210987654321098765432109876543210 + * 1ZZZZ0.PCI.QW.Address............BBLL + * + * ZZ = SBZ + * BB = Byte offset + * LL = Transfer length + * + * PCI Address: + * + * 3 2 1 + * 10987654321098765432109876543210 + * HHH....PCI.QW.Address........ 00 + * + * HHH = 31:29 HAE_MEM CSR + * + */ + +#define vip volatile int __force * +#define vuip volatile unsigned int __force * +#define vulp volatile unsigned long __force * + +__EXTERN_INLINE unsigned int cia_ioread8(void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + unsigned long result, base_and_type; + + if (addr >= CIA_DENSE_MEM) + base_and_type = CIA_SPARSE_MEM + 0x00; + else + base_and_type = CIA_IO + 0x00; + + /* We can use CIA_MEM_R1_MASK for io ports too, since it is large + enough to cover all io ports, and smaller than CIA_IO. */ + addr &= CIA_MEM_R1_MASK; + result = *(vip) ((addr << 5) + base_and_type); + return __kernel_extbl(result, addr & 3); +} + +__EXTERN_INLINE void cia_iowrite8(u8 b, void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + unsigned long w, base_and_type; + + if (addr >= CIA_DENSE_MEM) + base_and_type = CIA_SPARSE_MEM + 0x00; + else + base_and_type = CIA_IO + 0x00; + + addr &= CIA_MEM_R1_MASK; + w = __kernel_insbl(b, addr & 3); + *(vuip) ((addr << 5) + base_and_type) = w; +} + +__EXTERN_INLINE unsigned int cia_ioread16(void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + unsigned long result, base_and_type; + + if (addr >= CIA_DENSE_MEM) + base_and_type = CIA_SPARSE_MEM + 0x08; + else + base_and_type = CIA_IO + 0x08; + + addr &= CIA_MEM_R1_MASK; + result = *(vip) ((addr << 5) + base_and_type); + return __kernel_extwl(result, addr & 3); +} + +__EXTERN_INLINE void cia_iowrite16(u16 b, void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + unsigned long w, base_and_type; + + if (addr >= CIA_DENSE_MEM) + base_and_type = CIA_SPARSE_MEM + 0x08; + else + base_and_type = CIA_IO + 0x08; + + addr &= CIA_MEM_R1_MASK; + w = __kernel_inswl(b, addr & 3); + *(vuip) ((addr << 5) + base_and_type) = w; +} + +__EXTERN_INLINE unsigned int cia_ioread32(void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + if (addr < CIA_DENSE_MEM) + addr = ((addr - CIA_IO) << 5) + CIA_IO + 0x18; + return *(vuip)addr; +} + +__EXTERN_INLINE void cia_iowrite32(u32 b, void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + if (addr < CIA_DENSE_MEM) + addr = ((addr - CIA_IO) << 5) + CIA_IO + 0x18; + *(vuip)addr = b; +} + +__EXTERN_INLINE void __iomem *cia_ioportmap(unsigned long addr) +{ + return (void __iomem *)(addr + CIA_IO); +} + +__EXTERN_INLINE void __iomem *cia_ioremap(unsigned long addr, + unsigned long size) +{ + return (void __iomem *)(addr + CIA_DENSE_MEM); +} + +__EXTERN_INLINE int cia_is_ioaddr(unsigned long addr) +{ + return addr >= IDENT_ADDR + 0x8000000000UL; +} + +__EXTERN_INLINE int cia_is_mmio(const volatile void __iomem *addr) +{ + return (unsigned long)addr >= CIA_DENSE_MEM; +} + +__EXTERN_INLINE void __iomem *cia_bwx_ioportmap(unsigned long addr) +{ + return (void __iomem *)(addr + CIA_BW_IO); +} + +__EXTERN_INLINE void __iomem *cia_bwx_ioremap(unsigned long addr, + unsigned long size) +{ + return (void __iomem *)(addr + CIA_BW_MEM); +} + +__EXTERN_INLINE int cia_bwx_is_ioaddr(unsigned long addr) +{ + return addr >= IDENT_ADDR + 0x8000000000UL; +} + +__EXTERN_INLINE int cia_bwx_is_mmio(const volatile void __iomem *addr) +{ + return (unsigned long)addr < CIA_BW_IO; +} + +#undef vip +#undef vuip +#undef vulp + +#undef __IO_PREFIX +#define __IO_PREFIX cia +#define cia_trivial_rw_bw 2 +#define cia_trivial_rw_lq 1 +#define cia_trivial_io_bw 0 +#define cia_trivial_io_lq 0 +#define cia_trivial_iounmap 1 +#include <asm/io_trivial.h> + +#undef __IO_PREFIX +#define __IO_PREFIX cia_bwx +#define cia_bwx_trivial_rw_bw 1 +#define cia_bwx_trivial_rw_lq 1 +#define cia_bwx_trivial_io_bw 1 +#define cia_bwx_trivial_io_lq 1 +#define cia_bwx_trivial_iounmap 1 +#include <asm/io_trivial.h> + +#undef __IO_PREFIX +#ifdef CONFIG_ALPHA_PYXIS +#define __IO_PREFIX cia_bwx +#else +#define __IO_PREFIX cia +#endif + +#ifdef __IO_EXTERN_INLINE +#undef __EXTERN_INLINE +#undef __IO_EXTERN_INLINE +#endif + +#endif /* __KERNEL__ */ + +#endif /* __ALPHA_CIA__H__ */ diff --git a/arch/alpha/include/asm/core_irongate.h b/arch/alpha/include/asm/core_irongate.h new file mode 100644 index 00000000..24b2db54 --- /dev/null +++ b/arch/alpha/include/asm/core_irongate.h @@ -0,0 +1,232 @@ +#ifndef __ALPHA_IRONGATE__H__ +#define __ALPHA_IRONGATE__H__ + +#include <linux/types.h> +#include <asm/compiler.h> + +/* + * IRONGATE is the internal name for the AMD-751 K7 core logic chipset + * which provides memory controller and PCI access for NAUTILUS-based + * EV6 (21264) systems. + * + * This file is based on: + * + * IronGate management library, (c) 1999 Alpha Processor, Inc. + * Copyright (C) 1999 Alpha Processor, Inc., + * (David Daniel, Stig Telfer, Soohoon Lee) + */ + +/* + * The 21264 supports, and internally recognizes, a 44-bit physical + * address space that is divided equally between memory address space + * and I/O address space. Memory address space resides in the lower + * half of the physical address space (PA[43]=0) and I/O address space + * resides in the upper half of the physical address space (PA[43]=1). + */ + +/* + * Irongate CSR map. Some of the CSRs are 8 or 16 bits, but all access + * through the routines given is 32-bit. + * + * The first 0x40 bytes are standard as per the PCI spec. + */ + +typedef volatile __u32 igcsr32; + +typedef struct { + igcsr32 dev_vendor; /* 0x00 - device ID, vendor ID */ + igcsr32 stat_cmd; /* 0x04 - status, command */ + igcsr32 class; /* 0x08 - class code, rev ID */ + igcsr32 latency; /* 0x0C - header type, PCI latency */ + igcsr32 bar0; /* 0x10 - BAR0 - AGP */ + igcsr32 bar1; /* 0x14 - BAR1 - GART */ + igcsr32 bar2; /* 0x18 - Power Management reg block */ + + igcsr32 rsrvd0[6]; /* 0x1C-0x33 reserved */ + + igcsr32 capptr; /* 0x34 - Capabilities pointer */ + + igcsr32 rsrvd1[2]; /* 0x38-0x3F reserved */ + + igcsr32 bacsr10; /* 0x40 - base address chip selects */ + igcsr32 bacsr32; /* 0x44 - base address chip selects */ + igcsr32 bacsr54_eccms761; /* 0x48 - 751: base addr. chip selects + 761: ECC, mode/status */ + + igcsr32 rsrvd2[1]; /* 0x4C-0x4F reserved */ + + igcsr32 drammap; /* 0x50 - address mapping control */ + igcsr32 dramtm; /* 0x54 - timing, driver strength */ + igcsr32 dramms; /* 0x58 - DRAM mode/status */ + + igcsr32 rsrvd3[1]; /* 0x5C-0x5F reserved */ + + igcsr32 biu0; /* 0x60 - bus interface unit */ + igcsr32 biusip; /* 0x64 - Serial initialisation pkt */ + + igcsr32 rsrvd4[2]; /* 0x68-0x6F reserved */ + + igcsr32 mro; /* 0x70 - memory request optimiser */ + + igcsr32 rsrvd5[3]; /* 0x74-0x7F reserved */ + + igcsr32 whami; /* 0x80 - who am I */ + igcsr32 pciarb; /* 0x84 - PCI arbitration control */ + igcsr32 pcicfg; /* 0x88 - PCI config status */ + + igcsr32 rsrvd6[4]; /* 0x8C-0x9B reserved */ + + igcsr32 pci_mem; /* 0x9C - PCI top of memory, + 761 only */ + + /* AGP (bus 1) control registers */ + igcsr32 agpcap; /* 0xA0 - AGP Capability Identifier */ + igcsr32 agpstat; /* 0xA4 - AGP status register */ + igcsr32 agpcmd; /* 0xA8 - AGP control register */ + igcsr32 agpva; /* 0xAC - AGP Virtual Address Space */ + igcsr32 agpmode; /* 0xB0 - AGP/GART mode control */ +} Irongate0; + + +typedef struct { + + igcsr32 dev_vendor; /* 0x00 - Device and Vendor IDs */ + igcsr32 stat_cmd; /* 0x04 - Status and Command regs */ + igcsr32 class; /* 0x08 - subclass, baseclass etc */ + igcsr32 htype; /* 0x0C - header type (at 0x0E) */ + igcsr32 rsrvd0[2]; /* 0x10-0x17 reserved */ + igcsr32 busnos; /* 0x18 - Primary, secondary bus nos */ + igcsr32 io_baselim_regs; /* 0x1C - IO base, IO lim, AGP status */ + igcsr32 mem_baselim; /* 0x20 - memory base, memory lim */ + igcsr32 pfmem_baselim; /* 0x24 - prefetchable base, lim */ + igcsr32 rsrvd1[2]; /* 0x28-0x2F reserved */ + igcsr32 io_baselim; /* 0x30 - IO base, IO limit */ + igcsr32 rsrvd2[2]; /* 0x34-0x3B - reserved */ + igcsr32 interrupt; /* 0x3C - interrupt, PCI bridge ctrl */ + +} Irongate1; + +extern igcsr32 *IronECC; + +/* + * Memory spaces: + */ + +/* Irongate is consistent with a subset of the Tsunami memory map */ +#ifdef USE_48_BIT_KSEG +#define IRONGATE_BIAS 0x80000000000UL +#else +#define IRONGATE_BIAS 0x10000000000UL +#endif + + +#define IRONGATE_MEM (IDENT_ADDR | IRONGATE_BIAS | 0x000000000UL) +#define IRONGATE_IACK_SC (IDENT_ADDR | IRONGATE_BIAS | 0x1F8000000UL) +#define IRONGATE_IO (IDENT_ADDR | IRONGATE_BIAS | 0x1FC000000UL) +#define IRONGATE_CONF (IDENT_ADDR | IRONGATE_BIAS | 0x1FE000000UL) + +/* + * PCI Configuration space accesses are formed like so: + * + * 0x1FE << 24 | : 2 2 2 2 1 1 1 1 : 1 1 1 1 1 1 0 0 : 0 0 0 0 0 0 0 0 : + * : 3 2 1 0 9 8 7 6 : 5 4 3 2 1 0 9 8 : 7 6 5 4 3 2 1 0 : + * ---bus numer--- -device-- -fun- ---register---- + */ + +#define IGCSR(dev,fun,reg) ( IRONGATE_CONF | \ + ((dev)<<11) | \ + ((fun)<<8) | \ + (reg) ) + +#define IRONGATE0 ((Irongate0 *) IGCSR(0, 0, 0)) +#define IRONGATE1 ((Irongate1 *) IGCSR(1, 0, 0)) + +/* + * Data structure for handling IRONGATE machine checks: + * This is the standard OSF logout frame + */ + +#define SCB_Q_SYSERR 0x620 /* OSF definitions */ +#define SCB_Q_PROCERR 0x630 +#define SCB_Q_SYSMCHK 0x660 +#define SCB_Q_PROCMCHK 0x670 + +struct el_IRONGATE_sysdata_mcheck { + __u32 FrameSize; /* Bytes, including this field */ + __u32 FrameFlags; /* <31> = Retry, <30> = Second Error */ + __u32 CpuOffset; /* Offset to CPU-specific into */ + __u32 SystemOffset; /* Offset to system-specific info */ + __u32 MCHK_Code; + __u32 MCHK_Frame_Rev; + __u64 I_STAT; + __u64 DC_STAT; + __u64 C_ADDR; + __u64 DC1_SYNDROME; + __u64 DC0_SYNDROME; + __u64 C_STAT; + __u64 C_STS; + __u64 RESERVED0; + __u64 EXC_ADDR; + __u64 IER_CM; + __u64 ISUM; + __u64 MM_STAT; + __u64 PAL_BASE; + __u64 I_CTL; + __u64 PCTX; +}; + + +#ifdef __KERNEL__ + +#ifndef __EXTERN_INLINE +#define __EXTERN_INLINE extern inline +#define __IO_EXTERN_INLINE +#endif + +/* + * I/O functions: + * + * IRONGATE (AMD-751) PCI/memory support chip for the EV6 (21264) and + * K7 can only use linear accesses to get at PCI memory and I/O spaces. + */ + +/* + * Memory functions. All accesses are done through linear space. + */ + +__EXTERN_INLINE void __iomem *irongate_ioportmap(unsigned long addr) +{ + return (void __iomem *)(addr + IRONGATE_IO); +} + +extern void __iomem *irongate_ioremap(unsigned long addr, unsigned long size); +extern void irongate_iounmap(volatile void __iomem *addr); + +__EXTERN_INLINE int irongate_is_ioaddr(unsigned long addr) +{ + return addr >= IRONGATE_MEM; +} + +__EXTERN_INLINE int irongate_is_mmio(const volatile void __iomem *xaddr) +{ + unsigned long addr = (unsigned long)xaddr; + return addr < IRONGATE_IO || addr >= IRONGATE_CONF; +} + +#undef __IO_PREFIX +#define __IO_PREFIX irongate +#define irongate_trivial_rw_bw 1 +#define irongate_trivial_rw_lq 1 +#define irongate_trivial_io_bw 1 +#define irongate_trivial_io_lq 1 +#define irongate_trivial_iounmap 0 +#include <asm/io_trivial.h> + +#ifdef __IO_EXTERN_INLINE +#undef __EXTERN_INLINE +#undef __IO_EXTERN_INLINE +#endif + +#endif /* __KERNEL__ */ + +#endif /* __ALPHA_IRONGATE__H__ */ diff --git a/arch/alpha/include/asm/core_lca.h b/arch/alpha/include/asm/core_lca.h new file mode 100644 index 00000000..8ee6c516 --- /dev/null +++ b/arch/alpha/include/asm/core_lca.h @@ -0,0 +1,361 @@ +#ifndef __ALPHA_LCA__H__ +#define __ALPHA_LCA__H__ + +#include <asm/compiler.h> +#include <asm/mce.h> + +/* + * Low Cost Alpha (LCA) definitions (these apply to 21066 and 21068, + * for example). + * + * This file is based on: + * + * DECchip 21066 and DECchip 21068 Alpha AXP Microprocessors + * Hardware Reference Manual; Digital Equipment Corp.; May 1994; + * Maynard, MA; Order Number: EC-N2681-71. + */ + +/* + * NOTE: The LCA uses a Host Address Extension (HAE) register to access + * PCI addresses that are beyond the first 27 bits of address + * space. Updating the HAE requires an external cycle (and + * a memory barrier), which tends to be slow. Instead of updating + * it on each sparse memory access, we keep the current HAE value + * cached in variable cache_hae. Only if the cached HAE differs + * from the desired HAE value do we actually updated HAE register. + * The HAE register is preserved by the interrupt handler entry/exit + * code, so this scheme works even in the presence of interrupts. + * + * Dense memory space doesn't require the HAE, but is restricted to + * aligned 32 and 64 bit accesses. Special Cycle and Interrupt + * Acknowledge cycles may also require the use of the HAE. The LCA + * limits I/O address space to the bottom 24 bits of address space, + * but this easily covers the 16 bit ISA I/O address space. + */ + +/* + * NOTE 2! The memory operations do not set any memory barriers, as + * it's not needed for cases like a frame buffer that is essentially + * memory-like. You need to do them by hand if the operations depend + * on ordering. + * + * Similarly, the port I/O operations do a "mb" only after a write + * operation: if an mb is needed before (as in the case of doing + * memory mapped I/O first, and then a port I/O operation to the same + * device), it needs to be done by hand. + * + * After the above has bitten me 100 times, I'll give up and just do + * the mb all the time, but right now I'm hoping this will work out. + * Avoiding mb's may potentially be a noticeable speed improvement, + * but I can't honestly say I've tested it. + * + * Handling interrupts that need to do mb's to synchronize to + * non-interrupts is another fun race area. Don't do it (because if + * you do, I'll have to do *everything* with interrupts disabled, + * ugh). + */ + +/* + * Memory Controller registers: + */ +#define LCA_MEM_BCR0 (IDENT_ADDR + 0x120000000UL) +#define LCA_MEM_BCR1 (IDENT_ADDR + 0x120000008UL) +#define LCA_MEM_BCR2 (IDENT_ADDR + 0x120000010UL) +#define LCA_MEM_BCR3 (IDENT_ADDR + 0x120000018UL) +#define LCA_MEM_BMR0 (IDENT_ADDR + 0x120000020UL) +#define LCA_MEM_BMR1 (IDENT_ADDR + 0x120000028UL) +#define LCA_MEM_BMR2 (IDENT_ADDR + 0x120000030UL) +#define LCA_MEM_BMR3 (IDENT_ADDR + 0x120000038UL) +#define LCA_MEM_BTR0 (IDENT_ADDR + 0x120000040UL) +#define LCA_MEM_BTR1 (IDENT_ADDR + 0x120000048UL) +#define LCA_MEM_BTR2 (IDENT_ADDR + 0x120000050UL) +#define LCA_MEM_BTR3 (IDENT_ADDR + 0x120000058UL) +#define LCA_MEM_GTR (IDENT_ADDR + 0x120000060UL) +#define LCA_MEM_ESR (IDENT_ADDR + 0x120000068UL) +#define LCA_MEM_EAR (IDENT_ADDR + 0x120000070UL) +#define LCA_MEM_CAR (IDENT_ADDR + 0x120000078UL) +#define LCA_MEM_VGR (IDENT_ADDR + 0x120000080UL) +#define LCA_MEM_PLM (IDENT_ADDR + 0x120000088UL) +#define LCA_MEM_FOR (IDENT_ADDR + 0x120000090UL) + +/* + * I/O Controller registers: + */ +#define LCA_IOC_HAE (IDENT_ADDR + 0x180000000UL) +#define LCA_IOC_CONF (IDENT_ADDR + 0x180000020UL) +#define LCA_IOC_STAT0 (IDENT_ADDR + 0x180000040UL) +#define LCA_IOC_STAT1 (IDENT_ADDR + 0x180000060UL) +#define LCA_IOC_TBIA (IDENT_ADDR + 0x180000080UL) +#define LCA_IOC_TB_ENA (IDENT_ADDR + 0x1800000a0UL) +#define LCA_IOC_SFT_RST (IDENT_ADDR + 0x1800000c0UL) +#define LCA_IOC_PAR_DIS (IDENT_ADDR + 0x1800000e0UL) +#define LCA_IOC_W_BASE0 (IDENT_ADDR + 0x180000100UL) +#define LCA_IOC_W_BASE1 (IDENT_ADDR + 0x180000120UL) +#define LCA_IOC_W_MASK0 (IDENT_ADDR + 0x180000140UL) +#define LCA_IOC_W_MASK1 (IDENT_ADDR + 0x180000160UL) +#define LCA_IOC_T_BASE0 (IDENT_ADDR + 0x180000180UL) +#define LCA_IOC_T_BASE1 (IDENT_ADDR + 0x1800001a0UL) +#define LCA_IOC_TB_TAG0 (IDENT_ADDR + 0x188000000UL) +#define LCA_IOC_TB_TAG1 (IDENT_ADDR + 0x188000020UL) +#define LCA_IOC_TB_TAG2 (IDENT_ADDR + 0x188000040UL) +#define LCA_IOC_TB_TAG3 (IDENT_ADDR + 0x188000060UL) +#define LCA_IOC_TB_TAG4 (IDENT_ADDR + 0x188000070UL) +#define LCA_IOC_TB_TAG5 (IDENT_ADDR + 0x1880000a0UL) +#define LCA_IOC_TB_TAG6 (IDENT_ADDR + 0x1880000c0UL) +#define LCA_IOC_TB_TAG7 (IDENT_ADDR + 0x1880000e0UL) + +/* + * Memory spaces: + */ +#define LCA_IACK_SC (IDENT_ADDR + 0x1a0000000UL) +#define LCA_CONF (IDENT_ADDR + 0x1e0000000UL) +#define LCA_IO (IDENT_ADDR + 0x1c0000000UL) +#define LCA_SPARSE_MEM (IDENT_ADDR + 0x200000000UL) +#define LCA_DENSE_MEM (IDENT_ADDR + 0x300000000UL) + +/* + * Bit definitions for I/O Controller status register 0: + */ +#define LCA_IOC_STAT0_CMD 0xf +#define LCA_IOC_STAT0_ERR (1<<4) +#define LCA_IOC_STAT0_LOST (1<<5) +#define LCA_IOC_STAT0_THIT (1<<6) +#define LCA_IOC_STAT0_TREF (1<<7) +#define LCA_IOC_STAT0_CODE_SHIFT 8 +#define LCA_IOC_STAT0_CODE_MASK 0x7 +#define LCA_IOC_STAT0_P_NBR_SHIFT 13 +#define LCA_IOC_STAT0_P_NBR_MASK 0x7ffff + +#define LCA_HAE_ADDRESS LCA_IOC_HAE + +/* LCA PMR Power Management register defines */ +#define LCA_PMR_ADDR (IDENT_ADDR + 0x120000098UL) +#define LCA_PMR_PDIV 0x7 /* Primary clock divisor */ +#define LCA_PMR_ODIV 0x38 /* Override clock divisor */ +#define LCA_PMR_INTO 0x40 /* Interrupt override */ +#define LCA_PMR_DMAO 0x80 /* DMA override */ +#define LCA_PMR_OCCEB 0xffff0000L /* Override cycle counter - even bits */ +#define LCA_PMR_OCCOB 0xffff000000000000L /* Override cycle counter - even bits */ +#define LCA_PMR_PRIMARY_MASK 0xfffffffffffffff8L + +/* LCA PMR Macros */ + +#define LCA_READ_PMR (*(volatile unsigned long *)LCA_PMR_ADDR) +#define LCA_WRITE_PMR(d) (*((volatile unsigned long *)LCA_PMR_ADDR) = (d)) + +#define LCA_GET_PRIMARY(r) ((r) & LCA_PMR_PDIV) +#define LCA_GET_OVERRIDE(r) (((r) >> 3) & LCA_PMR_PDIV) +#define LCA_SET_PRIMARY_CLOCK(r, c) ((r) = (((r) & LCA_PMR_PRIMARY_MASK)|(c))) + +/* LCA PMR Divisor values */ +#define LCA_PMR_DIV_1 0x0 +#define LCA_PMR_DIV_1_5 0x1 +#define LCA_PMR_DIV_2 0x2 +#define LCA_PMR_DIV_4 0x3 +#define LCA_PMR_DIV_8 0x4 +#define LCA_PMR_DIV_16 0x5 +#define LCA_PMR_DIV_MIN DIV_1 +#define LCA_PMR_DIV_MAX DIV_16 + + +/* + * Data structure for handling LCA machine checks. Correctable errors + * result in a short logout frame, uncorrectable ones in a long one. + */ +struct el_lca_mcheck_short { + struct el_common h; /* common logout header */ + unsigned long esr; /* error-status register */ + unsigned long ear; /* error-address register */ + unsigned long dc_stat; /* dcache status register */ + unsigned long ioc_stat0; /* I/O controller status register 0 */ + unsigned long ioc_stat1; /* I/O controller status register 1 */ +}; + +struct el_lca_mcheck_long { + struct el_common h; /* common logout header */ + unsigned long pt[31]; /* PAL temps */ + unsigned long exc_addr; /* exception address */ + unsigned long pad1[3]; + unsigned long pal_base; /* PALcode base address */ + unsigned long hier; /* hw interrupt enable */ + unsigned long hirr; /* hw interrupt request */ + unsigned long mm_csr; /* MMU control & status */ + unsigned long dc_stat; /* data cache status */ + unsigned long dc_addr; /* data cache addr register */ + unsigned long abox_ctl; /* address box control register */ + unsigned long esr; /* error status register */ + unsigned long ear; /* error address register */ + unsigned long car; /* cache control register */ + unsigned long ioc_stat0; /* I/O controller status register 0 */ + unsigned long ioc_stat1; /* I/O controller status register 1 */ + unsigned long va; /* virtual address register */ +}; + +union el_lca { + struct el_common * c; + struct el_lca_mcheck_long * l; + struct el_lca_mcheck_short * s; +}; + +#ifdef __KERNEL__ + +#ifndef __EXTERN_INLINE +#define __EXTERN_INLINE extern inline +#define __IO_EXTERN_INLINE +#endif + +/* + * I/O functions: + * + * Unlike Jensen, the Noname machines have no concept of local + * I/O---everything goes over the PCI bus. + * + * There is plenty room for optimization here. In particular, + * the Alpha's insb/insw/extb/extw should be useful in moving + * data to/from the right byte-lanes. + */ + +#define vip volatile int __force * +#define vuip volatile unsigned int __force * +#define vulp volatile unsigned long __force * + +#define LCA_SET_HAE \ + do { \ + if (addr >= (1UL << 24)) { \ + unsigned long msb = addr & 0xf8000000; \ + addr -= msb; \ + set_hae(msb); \ + } \ + } while (0) + + +__EXTERN_INLINE unsigned int lca_ioread8(void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + unsigned long result, base_and_type; + + if (addr >= LCA_DENSE_MEM) { + addr -= LCA_DENSE_MEM; + LCA_SET_HAE; + base_and_type = LCA_SPARSE_MEM + 0x00; + } else { + addr -= LCA_IO; + base_and_type = LCA_IO + 0x00; + } + + result = *(vip) ((addr << 5) + base_and_type); + return __kernel_extbl(result, addr & 3); +} + +__EXTERN_INLINE void lca_iowrite8(u8 b, void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + unsigned long w, base_and_type; + + if (addr >= LCA_DENSE_MEM) { + addr -= LCA_DENSE_MEM; + LCA_SET_HAE; + base_and_type = LCA_SPARSE_MEM + 0x00; + } else { + addr -= LCA_IO; + base_and_type = LCA_IO + 0x00; + } + + w = __kernel_insbl(b, addr & 3); + *(vuip) ((addr << 5) + base_and_type) = w; +} + +__EXTERN_INLINE unsigned int lca_ioread16(void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + unsigned long result, base_and_type; + + if (addr >= LCA_DENSE_MEM) { + addr -= LCA_DENSE_MEM; + LCA_SET_HAE; + base_and_type = LCA_SPARSE_MEM + 0x08; + } else { + addr -= LCA_IO; + base_and_type = LCA_IO + 0x08; + } + + result = *(vip) ((addr << 5) + base_and_type); + return __kernel_extwl(result, addr & 3); +} + +__EXTERN_INLINE void lca_iowrite16(u16 b, void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + unsigned long w, base_and_type; + + if (addr >= LCA_DENSE_MEM) { + addr -= LCA_DENSE_MEM; + LCA_SET_HAE; + base_and_type = LCA_SPARSE_MEM + 0x08; + } else { + addr -= LCA_IO; + base_and_type = LCA_IO + 0x08; + } + + w = __kernel_inswl(b, addr & 3); + *(vuip) ((addr << 5) + base_and_type) = w; +} + +__EXTERN_INLINE unsigned int lca_ioread32(void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + if (addr < LCA_DENSE_MEM) + addr = ((addr - LCA_IO) << 5) + LCA_IO + 0x18; + return *(vuip)addr; +} + +__EXTERN_INLINE void lca_iowrite32(u32 b, void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + if (addr < LCA_DENSE_MEM) + addr = ((addr - LCA_IO) << 5) + LCA_IO + 0x18; + *(vuip)addr = b; +} + +__EXTERN_INLINE void __iomem *lca_ioportmap(unsigned long addr) +{ + return (void __iomem *)(addr + LCA_IO); +} + +__EXTERN_INLINE void __iomem *lca_ioremap(unsigned long addr, + unsigned long size) +{ + return (void __iomem *)(addr + LCA_DENSE_MEM); +} + +__EXTERN_INLINE int lca_is_ioaddr(unsigned long addr) +{ + return addr >= IDENT_ADDR + 0x120000000UL; +} + +__EXTERN_INLINE int lca_is_mmio(const volatile void __iomem *addr) +{ + return (unsigned long)addr >= LCA_DENSE_MEM; +} + +#undef vip +#undef vuip +#undef vulp + +#undef __IO_PREFIX +#define __IO_PREFIX lca +#define lca_trivial_rw_bw 2 +#define lca_trivial_rw_lq 1 +#define lca_trivial_io_bw 0 +#define lca_trivial_io_lq 0 +#define lca_trivial_iounmap 1 +#include <asm/io_trivial.h> + +#ifdef __IO_EXTERN_INLINE +#undef __EXTERN_INLINE +#undef __IO_EXTERN_INLINE +#endif + +#endif /* __KERNEL__ */ + +#endif /* __ALPHA_LCA__H__ */ diff --git a/arch/alpha/include/asm/core_marvel.h b/arch/alpha/include/asm/core_marvel.h new file mode 100644 index 00000000..dad300fa --- /dev/null +++ b/arch/alpha/include/asm/core_marvel.h @@ -0,0 +1,377 @@ +/* + * Marvel systems use the IO7 I/O chip provides PCI/PCIX/AGP access + * + * This file is based on: + * + * Marvel / EV7 System Programmer's Manual + * Revision 1.00 + * 14 May 2001 + */ + +#ifndef __ALPHA_MARVEL__H__ +#define __ALPHA_MARVEL__H__ + +#include <linux/types.h> +#include <linux/spinlock.h> + +#include <asm/compiler.h> + +#define MARVEL_MAX_PIDS 32 /* as long as we rely on 43-bit superpage */ +#define MARVEL_IRQ_VEC_PE_SHIFT (10) +#define MARVEL_IRQ_VEC_IRQ_MASK ((1 << MARVEL_IRQ_VEC_PE_SHIFT) - 1) +#define MARVEL_NR_IRQS \ + (16 + (MARVEL_MAX_PIDS * (1 << MARVEL_IRQ_VEC_PE_SHIFT))) + +/* + * EV7 RBOX Registers + */ +typedef struct { + volatile unsigned long csr __attribute__((aligned(16))); +} ev7_csr; + +typedef struct { + ev7_csr RBOX_CFG; /* 0x0000 */ + ev7_csr RBOX_NSVC; + ev7_csr RBOX_EWVC; + ev7_csr RBOX_WHAMI; + ev7_csr RBOX_TCTL; /* 0x0040 */ + ev7_csr RBOX_INT; + ev7_csr RBOX_IMASK; + ev7_csr RBOX_IREQ; + ev7_csr RBOX_INTQ; /* 0x0080 */ + ev7_csr RBOX_INTA; + ev7_csr RBOX_IT; + ev7_csr RBOX_SCRATCH1; + ev7_csr RBOX_SCRATCH2; /* 0x00c0 */ + ev7_csr RBOX_L_ERR; +} ev7_csrs; + +/* + * EV7 CSR addressing macros + */ +#define EV7_MASK40(addr) ((addr) & ((1UL << 41) - 1)) +#define EV7_KERN_ADDR(addr) ((void *)(IDENT_ADDR | EV7_MASK40(addr))) + +#define EV7_PE_MASK 0x1ffUL /* 9 bits ( 256 + mem/io ) */ +#define EV7_IPE(pe) ((~((long)(pe)) & EV7_PE_MASK) << 35) + +#define EV7_CSR_PHYS(pe, off) (EV7_IPE(pe) | (0x7FFCUL << 20) | (off)) +#define EV7_CSRS_PHYS(pe) (EV7_CSR_PHYS(pe, 0UL)) + +#define EV7_CSR_KERN(pe, off) (EV7_KERN_ADDR(EV7_CSR_PHYS(pe, off))) +#define EV7_CSRS_KERN(pe) (EV7_KERN_ADDR(EV7_CSRS_PHYS(pe))) + +#define EV7_CSR_OFFSET(name) ((unsigned long)&((ev7_csrs *)NULL)->name.csr) + +/* + * IO7 registers + */ +typedef struct { + volatile unsigned long csr __attribute__((aligned(64))); +} io7_csr; + +typedef struct { + /* I/O Port Control Registers */ + io7_csr POx_CTRL; /* 0x0000 */ + io7_csr POx_CACHE_CTL; + io7_csr POx_TIMER; + io7_csr POx_IO_ADR_EXT; + io7_csr POx_MEM_ADR_EXT; /* 0x0100 */ + io7_csr POx_XCAL_CTRL; + io7_csr rsvd1[2]; /* ?? spec doesn't show 0x180 */ + io7_csr POx_DM_SOURCE; /* 0x0200 */ + io7_csr POx_DM_DEST; + io7_csr POx_DM_SIZE; + io7_csr POx_DM_CTRL; + io7_csr rsvd2[4]; /* 0x0300 */ + + /* AGP Control Registers -- port 3 only */ + io7_csr AGP_CAP_ID; /* 0x0400 */ + io7_csr AGP_STAT; + io7_csr AGP_CMD; + io7_csr rsvd3; + + /* I/O Port Monitor Registers */ + io7_csr POx_MONCTL; /* 0x0500 */ + io7_csr POx_CTRA; + io7_csr POx_CTRB; + io7_csr POx_CTR56; + io7_csr POx_SCRATCH; /* 0x0600 */ + io7_csr POx_XTRA_A; + io7_csr POx_XTRA_TS; + io7_csr POx_XTRA_Z; + io7_csr rsvd4; /* 0x0700 */ + io7_csr POx_THRESHA; + io7_csr POx_THRESHB; + io7_csr rsvd5[33]; + + /* System Address Space Window Control Registers */ + + io7_csr POx_WBASE[4]; /* 0x1000 */ + io7_csr POx_WMASK[4]; + io7_csr POx_TBASE[4]; + io7_csr POx_SG_TBIA; + io7_csr POx_MSI_WBASE; + io7_csr rsvd6[50]; + + /* I/O Port Error Registers */ + io7_csr POx_ERR_SUM; + io7_csr POx_FIRST_ERR; + io7_csr POx_MSK_HEI; + io7_csr POx_TLB_ERR; + io7_csr POx_SPL_COMPLT; + io7_csr POx_TRANS_SUM; + io7_csr POx_FRC_PCI_ERR; + io7_csr POx_MULT_ERR; + io7_csr rsvd7[8]; + + /* I/O Port End of Interrupt Registers */ + io7_csr EOI_DAT; + io7_csr rsvd8[7]; + io7_csr POx_IACK_SPECIAL; + io7_csr rsvd9[103]; +} io7_ioport_csrs; + +typedef struct { + io7_csr IO_ASIC_REV; /* 0x30.0000 */ + io7_csr IO_SYS_REV; + io7_csr SER_CHAIN3; + io7_csr PO7_RST1; + io7_csr PO7_RST2; /* 0x30.0100 */ + io7_csr POx_RST[4]; + io7_csr IO7_DWNH; + io7_csr IO7_MAF; + io7_csr IO7_MAF_TO; + io7_csr IO7_ACC_CLUMP; /* 0x30.0300 */ + io7_csr IO7_PMASK; + io7_csr IO7_IOMASK; + io7_csr IO7_UPH; + io7_csr IO7_UPH_TO; /* 0x30.0400 */ + io7_csr RBX_IREQ_OFF; + io7_csr RBX_INTA_OFF; + io7_csr INT_RTY; + io7_csr PO7_MONCTL; /* 0x30.0500 */ + io7_csr PO7_CTRA; + io7_csr PO7_CTRB; + io7_csr PO7_CTR56; + io7_csr PO7_SCRATCH; /* 0x30.0600 */ + io7_csr PO7_XTRA_A; + io7_csr PO7_XTRA_TS; + io7_csr PO7_XTRA_Z; + io7_csr PO7_PMASK; /* 0x30.0700 */ + io7_csr PO7_THRESHA; + io7_csr PO7_THRESHB; + io7_csr rsvd1[97]; + io7_csr PO7_ERROR_SUM; /* 0x30.2000 */ + io7_csr PO7_BHOLE_MASK; + io7_csr PO7_HEI_MSK; + io7_csr PO7_CRD_MSK; + io7_csr PO7_UNCRR_SYM; /* 0x30.2100 */ + io7_csr PO7_CRRCT_SYM; + io7_csr PO7_ERR_PKT[2]; + io7_csr PO7_UGBGE_SYM; /* 0x30.2200 */ + io7_csr rsbv2[887]; + io7_csr PO7_LSI_CTL[128]; /* 0x31.0000 */ + io7_csr rsvd3[123]; + io7_csr HLT_CTL; /* 0x31.3ec0 */ + io7_csr HPI_CTL; /* 0x31.3f00 */ + io7_csr CRD_CTL; + io7_csr STV_CTL; + io7_csr HEI_CTL; + io7_csr PO7_MSI_CTL[16]; /* 0x31.4000 */ + io7_csr rsvd4[240]; + + /* + * Interrupt Diagnostic / Test + */ + struct { + io7_csr INT_PND; + io7_csr INT_CLR; + io7_csr INT_EOI; + io7_csr rsvd[29]; + } INT_DIAG[4]; + io7_csr rsvd5[125]; /* 0x31.a000 */ + io7_csr MISC_PND; /* 0x31.b800 */ + io7_csr rsvd6[31]; + io7_csr MSI_PND[16]; /* 0x31.c000 */ + io7_csr rsvd7[16]; + io7_csr MSI_CLR[16]; /* 0x31.c800 */ +} io7_port7_csrs; + +/* + * IO7 DMA Window Base register (POx_WBASEx) + */ +#define wbase_m_ena 0x1 +#define wbase_m_sg 0x2 +#define wbase_m_dac 0x4 +#define wbase_m_addr 0xFFF00000 +union IO7_POx_WBASE { + struct { + unsigned ena : 1; /* <0> */ + unsigned sg : 1; /* <1> */ + unsigned dac : 1; /* <2> -- window 3 only */ + unsigned rsvd1 : 17; + unsigned addr : 12; /* <31:20> */ + unsigned rsvd2 : 32; + } bits; + unsigned as_long[2]; + unsigned as_quad; +}; + +/* + * IO7 IID (Interrupt IDentifier) format + * + * For level-sensative interrupts, int_num is encoded as: + * + * bus/port slot/device INTx + * <7:5> <4:2> <1:0> + */ +union IO7_IID { + struct { + unsigned int_num : 9; /* <8:0> */ + unsigned tpu_mask : 4; /* <12:9> rsvd */ + unsigned msi : 1; /* 13 */ + unsigned ipe : 10; /* <23:14> */ + unsigned long rsvd : 40; + } bits; + unsigned int as_long[2]; + unsigned long as_quad; +}; + +/* + * IO7 addressing macros + */ +#define IO7_KERN_ADDR(addr) (EV7_KERN_ADDR(addr)) + +#define IO7_PORT_MASK 0x07UL /* 3 bits of port */ + +#define IO7_IPE(pe) (EV7_IPE(pe)) +#define IO7_IPORT(port) ((~((long)(port)) & IO7_PORT_MASK) << 32) + +#define IO7_HOSE(pe, port) (IO7_IPE(pe) | IO7_IPORT(port)) + +#define IO7_MEM_PHYS(pe, port) (IO7_HOSE(pe, port) | 0x00000000UL) +#define IO7_CONF_PHYS(pe, port) (IO7_HOSE(pe, port) | 0xFE000000UL) +#define IO7_IO_PHYS(pe, port) (IO7_HOSE(pe, port) | 0xFF000000UL) +#define IO7_CSR_PHYS(pe, port, off) \ + (IO7_HOSE(pe, port) | 0xFF800000UL | (off)) +#define IO7_CSRS_PHYS(pe, port) (IO7_CSR_PHYS(pe, port, 0UL)) +#define IO7_PORT7_CSRS_PHYS(pe) (IO7_CSR_PHYS(pe, 7, 0x300000UL)) + +#define IO7_MEM_KERN(pe, port) (IO7_KERN_ADDR(IO7_MEM_PHYS(pe, port))) +#define IO7_CONF_KERN(pe, port) (IO7_KERN_ADDR(IO7_CONF_PHYS(pe, port))) +#define IO7_IO_KERN(pe, port) (IO7_KERN_ADDR(IO7_IO_PHYS(pe, port))) +#define IO7_CSR_KERN(pe, port, off) (IO7_KERN_ADDR(IO7_CSR_PHYS(pe,port,off))) +#define IO7_CSRS_KERN(pe, port) (IO7_KERN_ADDR(IO7_CSRS_PHYS(pe, port))) +#define IO7_PORT7_CSRS_KERN(pe) (IO7_KERN_ADDR(IO7_PORT7_CSRS_PHYS(pe))) + +#define IO7_PLL_RNGA(pll) (((pll) >> 3) & 0x7) +#define IO7_PLL_RNGB(pll) (((pll) >> 6) & 0x7) + +#define IO7_MEM_SPACE (2UL * 1024 * 1024 * 1024) /* 2GB MEM */ +#define IO7_IO_SPACE (8UL * 1024 * 1024) /* 8MB I/O */ + + +/* + * Offset between ram physical addresses and pci64 DAC addresses + */ +#define IO7_DAC_OFFSET (1UL << 49) + +/* + * This is needed to satisify the IO() macro used in initializing the machvec + */ +#define MARVEL_IACK_SC \ + ((unsigned long) \ + (&(((io7_ioport_csrs *)IO7_CSRS_KERN(0, 0))->POx_IACK_SPECIAL))) + +#ifdef __KERNEL__ + +/* + * IO7 structs + */ +#define IO7_NUM_PORTS 4 +#define IO7_AGP_PORT 3 + +struct io7_port { + struct io7 *io7; + struct pci_controller *hose; + + int enabled; + unsigned int port; + io7_ioport_csrs *csrs; + + unsigned long saved_wbase[4]; + unsigned long saved_wmask[4]; + unsigned long saved_tbase[4]; +}; + +struct io7 { + struct io7 *next; + + unsigned int pe; + io7_port7_csrs *csrs; + struct io7_port ports[IO7_NUM_PORTS]; + + spinlock_t irq_lock; +}; + +#ifndef __EXTERN_INLINE +# define __EXTERN_INLINE extern inline +# define __IO_EXTERN_INLINE +#endif + +/* + * I/O functions. All access through linear space. + */ + +/* + * Memory functions. All accesses through linear space. + */ + +#define vucp volatile unsigned char __force * +#define vusp volatile unsigned short __force * + +extern unsigned int marvel_ioread8(void __iomem *); +extern void marvel_iowrite8(u8 b, void __iomem *); + +__EXTERN_INLINE unsigned int marvel_ioread16(void __iomem *addr) +{ + return __kernel_ldwu(*(vusp)addr); +} + +__EXTERN_INLINE void marvel_iowrite16(u16 b, void __iomem *addr) +{ + __kernel_stw(b, *(vusp)addr); +} + +extern void __iomem *marvel_ioremap(unsigned long addr, unsigned long size); +extern void marvel_iounmap(volatile void __iomem *addr); +extern void __iomem *marvel_ioportmap (unsigned long addr); + +__EXTERN_INLINE int marvel_is_ioaddr(unsigned long addr) +{ + return (addr >> 40) & 1; +} + +extern int marvel_is_mmio(const volatile void __iomem *); + +#undef vucp +#undef vusp + +#undef __IO_PREFIX +#define __IO_PREFIX marvel +#define marvel_trivial_rw_bw 1 +#define marvel_trivial_rw_lq 1 +#define marvel_trivial_io_bw 0 +#define marvel_trivial_io_lq 1 +#define marvel_trivial_iounmap 0 +#include <asm/io_trivial.h> + +#ifdef __IO_EXTERN_INLINE +# undef __EXTERN_INLINE +# undef __IO_EXTERN_INLINE +#endif + +#endif /* __KERNEL__ */ + +#endif /* __ALPHA_MARVEL__H__ */ diff --git a/arch/alpha/include/asm/core_mcpcia.h b/arch/alpha/include/asm/core_mcpcia.h new file mode 100644 index 00000000..ad44bef2 --- /dev/null +++ b/arch/alpha/include/asm/core_mcpcia.h @@ -0,0 +1,381 @@ +#ifndef __ALPHA_MCPCIA__H__ +#define __ALPHA_MCPCIA__H__ + +/* Define to experiment with fitting everything into one 128MB HAE window. + One window per bus, that is. */ +#define MCPCIA_ONE_HAE_WINDOW 1 + +#include <linux/types.h> +#include <asm/compiler.h> +#include <asm/mce.h> + +/* + * MCPCIA is the internal name for a core logic chipset which provides + * PCI access for the RAWHIDE family of systems. + * + * This file is based on: + * + * RAWHIDE System Programmer's Manual + * 16-May-96 + * Rev. 1.4 + * + */ + +/*------------------------------------------------------------------------** +** ** +** I/O procedures ** +** ** +** inport[b|w|t|l], outport[b|w|t|l] 8:16:24:32 IO xfers ** +** inportbxt: 8 bits only ** +** inport: alias of inportw ** +** outport: alias of outportw ** +** ** +** inmem[b|w|t|l], outmem[b|w|t|l] 8:16:24:32 ISA memory xfers ** +** inmembxt: 8 bits only ** +** inmem: alias of inmemw ** +** outmem: alias of outmemw ** +** ** +**------------------------------------------------------------------------*/ + + +/* MCPCIA ADDRESS BIT DEFINITIONS + * + * 3333 3333 3322 2222 2222 1111 1111 11 + * 9876 5432 1098 7654 3210 9876 5432 1098 7654 3210 + * ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- + * 1 000 + * ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- + * | |\| + * | Byte Enable --+ | + * | Transfer Length --+ + * +-- IO space, not cached + * + * Byte Transfer + * Enable Length Transfer Byte Address + * adr<6:5> adr<4:3> Length Enable Adder + * --------------------------------------------- + * 00 00 Byte 1110 0x000 + * 01 00 Byte 1101 0x020 + * 10 00 Byte 1011 0x040 + * 11 00 Byte 0111 0x060 + * + * 00 01 Word 1100 0x008 + * 01 01 Word 1001 0x028 <= Not supported in this code. + * 10 01 Word 0011 0x048 + * + * 00 10 Tribyte 1000 0x010 + * 01 10 Tribyte 0001 0x030 + * + * 10 11 Longword 0000 0x058 + * + * Note that byte enables are asserted low. + * + */ + +#define MCPCIA_MAX_HOSES 4 + +#define MCPCIA_MID(m) ((unsigned long)(m) << 33) + +/* Dodge has PCI0 and PCI1 at MID 4 and 5 respectively. + Durango adds PCI2 and PCI3 at MID 6 and 7 respectively. */ +#define MCPCIA_HOSE2MID(h) ((h) + 4) + +#define MCPCIA_MEM_MASK 0x07ffffff /* SPARSE Mem region mask is 27 bits */ + +/* + * Memory spaces: + */ +#define MCPCIA_SPARSE(m) (IDENT_ADDR + 0xf000000000UL + MCPCIA_MID(m)) +#define MCPCIA_DENSE(m) (IDENT_ADDR + 0xf100000000UL + MCPCIA_MID(m)) +#define MCPCIA_IO(m) (IDENT_ADDR + 0xf180000000UL + MCPCIA_MID(m)) +#define MCPCIA_CONF(m) (IDENT_ADDR + 0xf1c0000000UL + MCPCIA_MID(m)) +#define MCPCIA_CSR(m) (IDENT_ADDR + 0xf1e0000000UL + MCPCIA_MID(m)) +#define MCPCIA_IO_IACK(m) (IDENT_ADDR + 0xf1f0000000UL + MCPCIA_MID(m)) +#define MCPCIA_DENSE_IO(m) (IDENT_ADDR + 0xe1fc000000UL + MCPCIA_MID(m)) +#define MCPCIA_DENSE_CONF(m) (IDENT_ADDR + 0xe1fe000000UL + MCPCIA_MID(m)) + +/* + * General Registers + */ +#define MCPCIA_REV(m) (MCPCIA_CSR(m) + 0x000) +#define MCPCIA_WHOAMI(m) (MCPCIA_CSR(m) + 0x040) +#define MCPCIA_PCI_LAT(m) (MCPCIA_CSR(m) + 0x080) +#define MCPCIA_CAP_CTRL(m) (MCPCIA_CSR(m) + 0x100) +#define MCPCIA_HAE_MEM(m) (MCPCIA_CSR(m) + 0x400) +#define MCPCIA_HAE_IO(m) (MCPCIA_CSR(m) + 0x440) +#define _MCPCIA_IACK_SC(m) (MCPCIA_CSR(m) + 0x480) +#define MCPCIA_HAE_DENSE(m) (MCPCIA_CSR(m) + 0x4C0) + +/* + * Interrupt Control registers + */ +#define MCPCIA_INT_CTL(m) (MCPCIA_CSR(m) + 0x500) +#define MCPCIA_INT_REQ(m) (MCPCIA_CSR(m) + 0x540) +#define MCPCIA_INT_TARG(m) (MCPCIA_CSR(m) + 0x580) +#define MCPCIA_INT_ADR(m) (MCPCIA_CSR(m) + 0x5C0) +#define MCPCIA_INT_ADR_EXT(m) (MCPCIA_CSR(m) + 0x600) +#define MCPCIA_INT_MASK0(m) (MCPCIA_CSR(m) + 0x640) +#define MCPCIA_INT_MASK1(m) (MCPCIA_CSR(m) + 0x680) +#define MCPCIA_INT_ACK0(m) (MCPCIA_CSR(m) + 0x10003f00) +#define MCPCIA_INT_ACK1(m) (MCPCIA_CSR(m) + 0x10003f40) + +/* + * Performance Monitor registers + */ +#define MCPCIA_PERF_MON(m) (MCPCIA_CSR(m) + 0x300) +#define MCPCIA_PERF_CONT(m) (MCPCIA_CSR(m) + 0x340) + +/* + * Diagnostic Registers + */ +#define MCPCIA_CAP_DIAG(m) (MCPCIA_CSR(m) + 0x700) +#define MCPCIA_TOP_OF_MEM(m) (MCPCIA_CSR(m) + 0x7C0) + +/* + * Error registers + */ +#define MCPCIA_MC_ERR0(m) (MCPCIA_CSR(m) + 0x800) +#define MCPCIA_MC_ERR1(m) (MCPCIA_CSR(m) + 0x840) +#define MCPCIA_CAP_ERR(m) (MCPCIA_CSR(m) + 0x880) +#define MCPCIA_PCI_ERR1(m) (MCPCIA_CSR(m) + 0x1040) +#define MCPCIA_MDPA_STAT(m) (MCPCIA_CSR(m) + 0x4000) +#define MCPCIA_MDPA_SYN(m) (MCPCIA_CSR(m) + 0x4040) +#define MCPCIA_MDPA_DIAG(m) (MCPCIA_CSR(m) + 0x4080) +#define MCPCIA_MDPB_STAT(m) (MCPCIA_CSR(m) + 0x8000) +#define MCPCIA_MDPB_SYN(m) (MCPCIA_CSR(m) + 0x8040) +#define MCPCIA_MDPB_DIAG(m) (MCPCIA_CSR(m) + 0x8080) + +/* + * PCI Address Translation Registers. + */ +#define MCPCIA_SG_TBIA(m) (MCPCIA_CSR(m) + 0x1300) +#define MCPCIA_HBASE(m) (MCPCIA_CSR(m) + 0x1340) + +#define MCPCIA_W0_BASE(m) (MCPCIA_CSR(m) + 0x1400) +#define MCPCIA_W0_MASK(m) (MCPCIA_CSR(m) + 0x1440) +#define MCPCIA_T0_BASE(m) (MCPCIA_CSR(m) + 0x1480) + +#define MCPCIA_W1_BASE(m) (MCPCIA_CSR(m) + 0x1500) +#define MCPCIA_W1_MASK(m) (MCPCIA_CSR(m) + 0x1540) +#define MCPCIA_T1_BASE(m) (MCPCIA_CSR(m) + 0x1580) + +#define MCPCIA_W2_BASE(m) (MCPCIA_CSR(m) + 0x1600) +#define MCPCIA_W2_MASK(m) (MCPCIA_CSR(m) + 0x1640) +#define MCPCIA_T2_BASE(m) (MCPCIA_CSR(m) + 0x1680) + +#define MCPCIA_W3_BASE(m) (MCPCIA_CSR(m) + 0x1700) +#define MCPCIA_W3_MASK(m) (MCPCIA_CSR(m) + 0x1740) +#define MCPCIA_T3_BASE(m) (MCPCIA_CSR(m) + 0x1780) + +/* Hack! Only words for bus 0. */ + +#ifndef MCPCIA_ONE_HAE_WINDOW +#define MCPCIA_HAE_ADDRESS MCPCIA_HAE_MEM(4) +#endif +#define MCPCIA_IACK_SC _MCPCIA_IACK_SC(4) + +/* + * The canonical non-remaped I/O and MEM addresses have these values + * subtracted out. This is arranged so that folks manipulating ISA + * devices can use their familiar numbers and have them map to bus 0. + */ + +#define MCPCIA_IO_BIAS MCPCIA_IO(4) +#define MCPCIA_MEM_BIAS MCPCIA_DENSE(4) + +/* Offset between ram physical addresses and pci64 DAC bus addresses. */ +#define MCPCIA_DAC_OFFSET (1UL << 40) + +/* + * Data structure for handling MCPCIA machine checks: + */ +struct el_MCPCIA_uncorrected_frame_mcheck { + struct el_common header; + struct el_common_EV5_uncorrectable_mcheck procdata; +}; + + +#ifdef __KERNEL__ + +#ifndef __EXTERN_INLINE +#define __EXTERN_INLINE extern inline +#define __IO_EXTERN_INLINE +#endif + +/* + * I/O functions: + * + * MCPCIA, the RAWHIDE family PCI/memory support chipset for the EV5 (21164) + * and EV56 (21164a) processors, can use either a sparse address mapping + * scheme, or the so-called byte-word PCI address space, to get at PCI memory + * and I/O. + * + * Unfortunately, we can't use BWIO with EV5, so for now, we always use SPARSE. + */ + +/* + * Memory functions. 64-bit and 32-bit accesses are done through + * dense memory space, everything else through sparse space. + * + * For reading and writing 8 and 16 bit quantities we need to + * go through one of the three sparse address mapping regions + * and use the HAE_MEM CSR to provide some bits of the address. + * The following few routines use only sparse address region 1 + * which gives 1Gbyte of accessible space which relates exactly + * to the amount of PCI memory mapping *into* system address space. + * See p 6-17 of the specification but it looks something like this: + * + * 21164 Address: + * + * 3 2 1 + * 9876543210987654321098765432109876543210 + * 1ZZZZ0.PCI.QW.Address............BBLL + * + * ZZ = SBZ + * BB = Byte offset + * LL = Transfer length + * + * PCI Address: + * + * 3 2 1 + * 10987654321098765432109876543210 + * HHH....PCI.QW.Address........ 00 + * + * HHH = 31:29 HAE_MEM CSR + * + */ + +#define vip volatile int __force * +#define vuip volatile unsigned int __force * + +#ifndef MCPCIA_ONE_HAE_WINDOW +#define MCPCIA_FROB_MMIO \ + if (__mcpcia_is_mmio(hose)) { \ + set_hae(hose & 0xffffffff); \ + hose = hose - MCPCIA_DENSE(4) + MCPCIA_SPARSE(4); \ + } +#else +#define MCPCIA_FROB_MMIO \ + if (__mcpcia_is_mmio(hose)) { \ + hose = hose - MCPCIA_DENSE(4) + MCPCIA_SPARSE(4); \ + } +#endif + +extern inline int __mcpcia_is_mmio(unsigned long addr) +{ + return (addr & 0x80000000UL) == 0; +} + +__EXTERN_INLINE unsigned int mcpcia_ioread8(void __iomem *xaddr) +{ + unsigned long addr = (unsigned long)xaddr & MCPCIA_MEM_MASK; + unsigned long hose = (unsigned long)xaddr & ~MCPCIA_MEM_MASK; + unsigned long result; + + MCPCIA_FROB_MMIO; + + result = *(vip) ((addr << 5) + hose + 0x00); + return __kernel_extbl(result, addr & 3); +} + +__EXTERN_INLINE void mcpcia_iowrite8(u8 b, void __iomem *xaddr) +{ + unsigned long addr = (unsigned long)xaddr & MCPCIA_MEM_MASK; + unsigned long hose = (unsigned long)xaddr & ~MCPCIA_MEM_MASK; + unsigned long w; + + MCPCIA_FROB_MMIO; + + w = __kernel_insbl(b, addr & 3); + *(vuip) ((addr << 5) + hose + 0x00) = w; +} + +__EXTERN_INLINE unsigned int mcpcia_ioread16(void __iomem *xaddr) +{ + unsigned long addr = (unsigned long)xaddr & MCPCIA_MEM_MASK; + unsigned long hose = (unsigned long)xaddr & ~MCPCIA_MEM_MASK; + unsigned long result; + + MCPCIA_FROB_MMIO; + + result = *(vip) ((addr << 5) + hose + 0x08); + return __kernel_extwl(result, addr & 3); +} + +__EXTERN_INLINE void mcpcia_iowrite16(u16 b, void __iomem *xaddr) +{ + unsigned long addr = (unsigned long)xaddr & MCPCIA_MEM_MASK; + unsigned long hose = (unsigned long)xaddr & ~MCPCIA_MEM_MASK; + unsigned long w; + + MCPCIA_FROB_MMIO; + + w = __kernel_inswl(b, addr & 3); + *(vuip) ((addr << 5) + hose + 0x08) = w; +} + +__EXTERN_INLINE unsigned int mcpcia_ioread32(void __iomem *xaddr) +{ + unsigned long addr = (unsigned long)xaddr; + + if (!__mcpcia_is_mmio(addr)) + addr = ((addr & 0xffff) << 5) + (addr & ~0xfffful) + 0x18; + + return *(vuip)addr; +} + +__EXTERN_INLINE void mcpcia_iowrite32(u32 b, void __iomem *xaddr) +{ + unsigned long addr = (unsigned long)xaddr; + + if (!__mcpcia_is_mmio(addr)) + addr = ((addr & 0xffff) << 5) + (addr & ~0xfffful) + 0x18; + + *(vuip)addr = b; +} + + +__EXTERN_INLINE void __iomem *mcpcia_ioportmap(unsigned long addr) +{ + return (void __iomem *)(addr + MCPCIA_IO_BIAS); +} + +__EXTERN_INLINE void __iomem *mcpcia_ioremap(unsigned long addr, + unsigned long size) +{ + return (void __iomem *)(addr + MCPCIA_MEM_BIAS); +} + +__EXTERN_INLINE int mcpcia_is_ioaddr(unsigned long addr) +{ + return addr >= MCPCIA_SPARSE(0); +} + +__EXTERN_INLINE int mcpcia_is_mmio(const volatile void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + return __mcpcia_is_mmio(addr); +} + +#undef MCPCIA_FROB_MMIO + +#undef vip +#undef vuip + +#undef __IO_PREFIX +#define __IO_PREFIX mcpcia +#define mcpcia_trivial_rw_bw 2 +#define mcpcia_trivial_rw_lq 1 +#define mcpcia_trivial_io_bw 0 +#define mcpcia_trivial_io_lq 0 +#define mcpcia_trivial_iounmap 1 +#include <asm/io_trivial.h> + +#ifdef __IO_EXTERN_INLINE +#undef __EXTERN_INLINE +#undef __IO_EXTERN_INLINE +#endif + +#endif /* __KERNEL__ */ + +#endif /* __ALPHA_MCPCIA__H__ */ diff --git a/arch/alpha/include/asm/core_polaris.h b/arch/alpha/include/asm/core_polaris.h new file mode 100644 index 00000000..2f966b64 --- /dev/null +++ b/arch/alpha/include/asm/core_polaris.h @@ -0,0 +1,110 @@ +#ifndef __ALPHA_POLARIS__H__ +#define __ALPHA_POLARIS__H__ + +#include <linux/types.h> +#include <asm/compiler.h> + +/* + * POLARIS is the internal name for a core logic chipset which provides + * memory controller and PCI access for the 21164PC chip based systems. + * + * This file is based on: + * + * Polaris System Controller + * Device Functional Specification + * 22-Jan-98 + * Rev. 4.2 + * + */ + +/* Polaris memory regions */ +#define POLARIS_SPARSE_MEM_BASE (IDENT_ADDR + 0xf800000000UL) +#define POLARIS_DENSE_MEM_BASE (IDENT_ADDR + 0xf900000000UL) +#define POLARIS_SPARSE_IO_BASE (IDENT_ADDR + 0xf980000000UL) +#define POLARIS_SPARSE_CONFIG_BASE (IDENT_ADDR + 0xf9c0000000UL) +#define POLARIS_IACK_BASE (IDENT_ADDR + 0xf9f8000000UL) +#define POLARIS_DENSE_IO_BASE (IDENT_ADDR + 0xf9fc000000UL) +#define POLARIS_DENSE_CONFIG_BASE (IDENT_ADDR + 0xf9fe000000UL) + +#define POLARIS_IACK_SC POLARIS_IACK_BASE + +/* The Polaris command/status registers live in PCI Config space for + * bus 0/device 0. As such, they may be bytes, words, or doublewords. + */ +#define POLARIS_W_VENID (POLARIS_DENSE_CONFIG_BASE) +#define POLARIS_W_DEVID (POLARIS_DENSE_CONFIG_BASE+2) +#define POLARIS_W_CMD (POLARIS_DENSE_CONFIG_BASE+4) +#define POLARIS_W_STATUS (POLARIS_DENSE_CONFIG_BASE+6) + +/* + * Data structure for handling POLARIS machine checks: + */ +struct el_POLARIS_sysdata_mcheck { + u_long psc_status; + u_long psc_pcictl0; + u_long psc_pcictl1; + u_long psc_pcictl2; +}; + +#ifdef __KERNEL__ + +#ifndef __EXTERN_INLINE +#define __EXTERN_INLINE extern inline +#define __IO_EXTERN_INLINE +#endif + +/* + * I/O functions: + * + * POLARIS, the PCI/memory support chipset for the PCA56 (21164PC) + * processors, can use either a sparse address mapping scheme, or the + * so-called byte-word PCI address space, to get at PCI memory and I/O. + * + * However, we will support only the BWX form. + */ + +/* + * Memory functions. Polaris allows all accesses (byte/word + * as well as long/quad) to be done through dense space. + * + * We will only support DENSE access via BWX insns. + */ + +__EXTERN_INLINE void __iomem *polaris_ioportmap(unsigned long addr) +{ + return (void __iomem *)(addr + POLARIS_DENSE_IO_BASE); +} + +__EXTERN_INLINE void __iomem *polaris_ioremap(unsigned long addr, + unsigned long size) +{ + return (void __iomem *)(addr + POLARIS_DENSE_MEM_BASE); +} + +__EXTERN_INLINE int polaris_is_ioaddr(unsigned long addr) +{ + return addr >= POLARIS_SPARSE_MEM_BASE; +} + +__EXTERN_INLINE int polaris_is_mmio(const volatile void __iomem *addr) +{ + return (unsigned long)addr < POLARIS_SPARSE_IO_BASE; +} + +#undef __IO_PREFIX +#define __IO_PREFIX polaris +#define polaris_trivial_rw_bw 1 +#define polaris_trivial_rw_lq 1 +#define polaris_trivial_io_bw 1 +#define polaris_trivial_io_lq 1 +#define polaris_trivial_iounmap 1 +#include <asm/io_trivial.h> + +#ifdef __IO_EXTERN_INLINE +#undef __EXTERN_INLINE +#undef __IO_EXTERN_INLINE +#endif + +#endif /* __KERNEL__ */ + +#endif /* __ALPHA_POLARIS__H__ */ diff --git a/arch/alpha/include/asm/core_t2.h b/arch/alpha/include/asm/core_t2.h new file mode 100644 index 00000000..ade9d92e --- /dev/null +++ b/arch/alpha/include/asm/core_t2.h @@ -0,0 +1,614 @@ +#ifndef __ALPHA_T2__H__ +#define __ALPHA_T2__H__ + +/* Fit everything into one 128MB HAE window. */ +#define T2_ONE_HAE_WINDOW 1 + +#include <linux/types.h> +#include <linux/spinlock.h> +#include <asm/compiler.h> + +/* + * T2 is the internal name for the core logic chipset which provides + * memory controller and PCI access for the SABLE-based systems. + * + * This file is based on: + * + * SABLE I/O Specification + * Revision/Update Information: 1.3 + * + * jestabro@amt.tay1.dec.com Initial Version. + * + */ + +#define T2_MEM_R1_MASK 0x07ffffff /* Mem sparse region 1 mask is 27 bits */ + +/* GAMMA-SABLE is a SABLE with EV5-based CPUs */ +/* All LYNX machines, EV4 or EV5, use the GAMMA bias also */ +#define _GAMMA_BIAS 0x8000000000UL + +#if defined(CONFIG_ALPHA_GENERIC) +#define GAMMA_BIAS alpha_mv.sys.t2.gamma_bias +#elif defined(CONFIG_ALPHA_GAMMA) +#define GAMMA_BIAS _GAMMA_BIAS +#else +#define GAMMA_BIAS 0 +#endif + +/* + * Memory spaces: + */ +#define T2_CONF (IDENT_ADDR + GAMMA_BIAS + 0x390000000UL) +#define T2_IO (IDENT_ADDR + GAMMA_BIAS + 0x3a0000000UL) +#define T2_SPARSE_MEM (IDENT_ADDR + GAMMA_BIAS + 0x200000000UL) +#define T2_DENSE_MEM (IDENT_ADDR + GAMMA_BIAS + 0x3c0000000UL) + +#define T2_IOCSR (IDENT_ADDR + GAMMA_BIAS + 0x38e000000UL) +#define T2_CERR1 (IDENT_ADDR + GAMMA_BIAS + 0x38e000020UL) +#define T2_CERR2 (IDENT_ADDR + GAMMA_BIAS + 0x38e000040UL) +#define T2_CERR3 (IDENT_ADDR + GAMMA_BIAS + 0x38e000060UL) +#define T2_PERR1 (IDENT_ADDR + GAMMA_BIAS + 0x38e000080UL) +#define T2_PERR2 (IDENT_ADDR + GAMMA_BIAS + 0x38e0000a0UL) +#define T2_PSCR (IDENT_ADDR + GAMMA_BIAS + 0x38e0000c0UL) +#define T2_HAE_1 (IDENT_ADDR + GAMMA_BIAS + 0x38e0000e0UL) +#define T2_HAE_2 (IDENT_ADDR + GAMMA_BIAS + 0x38e000100UL) +#define T2_HBASE (IDENT_ADDR + GAMMA_BIAS + 0x38e000120UL) +#define T2_WBASE1 (IDENT_ADDR + GAMMA_BIAS + 0x38e000140UL) +#define T2_WMASK1 (IDENT_ADDR + GAMMA_BIAS + 0x38e000160UL) +#define T2_TBASE1 (IDENT_ADDR + GAMMA_BIAS + 0x38e000180UL) +#define T2_WBASE2 (IDENT_ADDR + GAMMA_BIAS + 0x38e0001a0UL) +#define T2_WMASK2 (IDENT_ADDR + GAMMA_BIAS + 0x38e0001c0UL) +#define T2_TBASE2 (IDENT_ADDR + GAMMA_BIAS + 0x38e0001e0UL) +#define T2_TLBBR (IDENT_ADDR + GAMMA_BIAS + 0x38e000200UL) +#define T2_IVR (IDENT_ADDR + GAMMA_BIAS + 0x38e000220UL) +#define T2_HAE_3 (IDENT_ADDR + GAMMA_BIAS + 0x38e000240UL) +#define T2_HAE_4 (IDENT_ADDR + GAMMA_BIAS + 0x38e000260UL) + +/* The CSRs below are T3/T4 only */ +#define T2_WBASE3 (IDENT_ADDR + GAMMA_BIAS + 0x38e000280UL) +#define T2_WMASK3 (IDENT_ADDR + GAMMA_BIAS + 0x38e0002a0UL) +#define T2_TBASE3 (IDENT_ADDR + GAMMA_BIAS + 0x38e0002c0UL) + +#define T2_TDR0 (IDENT_ADDR + GAMMA_BIAS + 0x38e000300UL) +#define T2_TDR1 (IDENT_ADDR + GAMMA_BIAS + 0x38e000320UL) +#define T2_TDR2 (IDENT_ADDR + GAMMA_BIAS + 0x38e000340UL) +#define T2_TDR3 (IDENT_ADDR + GAMMA_BIAS + 0x38e000360UL) +#define T2_TDR4 (IDENT_ADDR + GAMMA_BIAS + 0x38e000380UL) +#define T2_TDR5 (IDENT_ADDR + GAMMA_BIAS + 0x38e0003a0UL) +#define T2_TDR6 (IDENT_ADDR + GAMMA_BIAS + 0x38e0003c0UL) +#define T2_TDR7 (IDENT_ADDR + GAMMA_BIAS + 0x38e0003e0UL) + +#define T2_WBASE4 (IDENT_ADDR + GAMMA_BIAS + 0x38e000400UL) +#define T2_WMASK4 (IDENT_ADDR + GAMMA_BIAS + 0x38e000420UL) +#define T2_TBASE4 (IDENT_ADDR + GAMMA_BIAS + 0x38e000440UL) + +#define T2_AIR (IDENT_ADDR + GAMMA_BIAS + 0x38e000460UL) +#define T2_VAR (IDENT_ADDR + GAMMA_BIAS + 0x38e000480UL) +#define T2_DIR (IDENT_ADDR + GAMMA_BIAS + 0x38e0004a0UL) +#define T2_ICE (IDENT_ADDR + GAMMA_BIAS + 0x38e0004c0UL) + +#ifndef T2_ONE_HAE_WINDOW +#define T2_HAE_ADDRESS T2_HAE_1 +#endif + +/* T2 CSRs are in the non-cachable primary IO space from 3.8000.0000 to + 3.8fff.ffff + * + * +--------------+ 3 8000 0000 + * | CPU 0 CSRs | + * +--------------+ 3 8100 0000 + * | CPU 1 CSRs | + * +--------------+ 3 8200 0000 + * | CPU 2 CSRs | + * +--------------+ 3 8300 0000 + * | CPU 3 CSRs | + * +--------------+ 3 8400 0000 + * | CPU Reserved | + * +--------------+ 3 8700 0000 + * | Mem Reserved | + * +--------------+ 3 8800 0000 + * | Mem 0 CSRs | + * +--------------+ 3 8900 0000 + * | Mem 1 CSRs | + * +--------------+ 3 8a00 0000 + * | Mem 2 CSRs | + * +--------------+ 3 8b00 0000 + * | Mem 3 CSRs | + * +--------------+ 3 8c00 0000 + * | Mem Reserved | + * +--------------+ 3 8e00 0000 + * | PCI Bridge | + * +--------------+ 3 8f00 0000 + * | Expansion IO | + * +--------------+ 3 9000 0000 + * + * + */ +#define T2_CPU0_BASE (IDENT_ADDR + GAMMA_BIAS + 0x380000000L) +#define T2_CPU1_BASE (IDENT_ADDR + GAMMA_BIAS + 0x381000000L) +#define T2_CPU2_BASE (IDENT_ADDR + GAMMA_BIAS + 0x382000000L) +#define T2_CPU3_BASE (IDENT_ADDR + GAMMA_BIAS + 0x383000000L) + +#define T2_CPUn_BASE(n) (T2_CPU0_BASE + (((n)&3) * 0x001000000L)) + +#define T2_MEM0_BASE (IDENT_ADDR + GAMMA_BIAS + 0x388000000L) +#define T2_MEM1_BASE (IDENT_ADDR + GAMMA_BIAS + 0x389000000L) +#define T2_MEM2_BASE (IDENT_ADDR + GAMMA_BIAS + 0x38a000000L) +#define T2_MEM3_BASE (IDENT_ADDR + GAMMA_BIAS + 0x38b000000L) + + +/* + * Sable CPU Module CSRS + * + * These are CSRs for hardware other than the CPU chip on the CPU module. + * The CPU module has Backup Cache control logic, Cbus control logic, and + * interrupt control logic on it. There is a duplicate tag store to speed + * up maintaining cache coherency. + */ + +struct sable_cpu_csr { + unsigned long bcc; long fill_00[3]; /* Backup Cache Control */ + unsigned long bcce; long fill_01[3]; /* Backup Cache Correctable Error */ + unsigned long bccea; long fill_02[3]; /* B-Cache Corr Err Address Latch */ + unsigned long bcue; long fill_03[3]; /* B-Cache Uncorrectable Error */ + unsigned long bcuea; long fill_04[3]; /* B-Cache Uncorr Err Addr Latch */ + unsigned long dter; long fill_05[3]; /* Duplicate Tag Error */ + unsigned long cbctl; long fill_06[3]; /* CBus Control */ + unsigned long cbe; long fill_07[3]; /* CBus Error */ + unsigned long cbeal; long fill_08[3]; /* CBus Error Addr Latch low */ + unsigned long cbeah; long fill_09[3]; /* CBus Error Addr Latch high */ + unsigned long pmbx; long fill_10[3]; /* Processor Mailbox */ + unsigned long ipir; long fill_11[3]; /* Inter-Processor Int Request */ + unsigned long sic; long fill_12[3]; /* System Interrupt Clear */ + unsigned long adlk; long fill_13[3]; /* Address Lock (LDxL/STxC) */ + unsigned long madrl; long fill_14[3]; /* CBus Miss Address */ + unsigned long rev; long fill_15[3]; /* CMIC Revision */ +}; + +/* + * Data structure for handling T2 machine checks: + */ +struct el_t2_frame_header { + unsigned int elcf_fid; /* Frame ID (from above) */ + unsigned int elcf_size; /* Size of frame in bytes */ +}; + +struct el_t2_procdata_mcheck { + unsigned long elfmc_paltemp[32]; /* PAL TEMP REGS. */ + /* EV4-specific fields */ + unsigned long elfmc_exc_addr; /* Addr of excepting insn. */ + unsigned long elfmc_exc_sum; /* Summary of arith traps. */ + unsigned long elfmc_exc_mask; /* Exception mask (from exc_sum). */ + unsigned long elfmc_iccsr; /* IBox hardware enables. */ + unsigned long elfmc_pal_base; /* Base address for PALcode. */ + unsigned long elfmc_hier; /* Hardware Interrupt Enable. */ + unsigned long elfmc_hirr; /* Hardware Interrupt Request. */ + unsigned long elfmc_mm_csr; /* D-stream fault info. */ + unsigned long elfmc_dc_stat; /* D-cache status (ECC/Parity Err). */ + unsigned long elfmc_dc_addr; /* EV3 Phys Addr for ECC/DPERR. */ + unsigned long elfmc_abox_ctl; /* ABox Control Register. */ + unsigned long elfmc_biu_stat; /* BIU Status. */ + unsigned long elfmc_biu_addr; /* BUI Address. */ + unsigned long elfmc_biu_ctl; /* BIU Control. */ + unsigned long elfmc_fill_syndrome; /* For correcting ECC errors. */ + unsigned long elfmc_fill_addr;/* Cache block which was being read. */ + unsigned long elfmc_va; /* Effective VA of fault or miss. */ + unsigned long elfmc_bc_tag; /* Backup Cache Tag Probe Results. */ +}; + +/* + * Sable processor specific Machine Check Data segment. + */ + +struct el_t2_logout_header { + unsigned int elfl_size; /* size in bytes of logout area. */ + unsigned int elfl_sbz1:31; /* Should be zero. */ + unsigned int elfl_retry:1; /* Retry flag. */ + unsigned int elfl_procoffset; /* Processor-specific offset. */ + unsigned int elfl_sysoffset; /* Offset of system-specific. */ + unsigned int elfl_error_type; /* PAL error type code. */ + unsigned int elfl_frame_rev; /* PAL Frame revision. */ +}; +struct el_t2_sysdata_mcheck { + unsigned long elcmc_bcc; /* CSR 0 */ + unsigned long elcmc_bcce; /* CSR 1 */ + unsigned long elcmc_bccea; /* CSR 2 */ + unsigned long elcmc_bcue; /* CSR 3 */ + unsigned long elcmc_bcuea; /* CSR 4 */ + unsigned long elcmc_dter; /* CSR 5 */ + unsigned long elcmc_cbctl; /* CSR 6 */ + unsigned long elcmc_cbe; /* CSR 7 */ + unsigned long elcmc_cbeal; /* CSR 8 */ + unsigned long elcmc_cbeah; /* CSR 9 */ + unsigned long elcmc_pmbx; /* CSR 10 */ + unsigned long elcmc_ipir; /* CSR 11 */ + unsigned long elcmc_sic; /* CSR 12 */ + unsigned long elcmc_adlk; /* CSR 13 */ + unsigned long elcmc_madrl; /* CSR 14 */ + unsigned long elcmc_crrev4; /* CSR 15 */ +}; + +/* + * Sable memory error frame - sable pfms section 3.42 + */ +struct el_t2_data_memory { + struct el_t2_frame_header elcm_hdr; /* ID$MEM-FERR = 0x08 */ + unsigned int elcm_module; /* Module id. */ + unsigned int elcm_res04; /* Reserved. */ + unsigned long elcm_merr; /* CSR0: Error Reg 1. */ + unsigned long elcm_mcmd1; /* CSR1: Command Trap 1. */ + unsigned long elcm_mcmd2; /* CSR2: Command Trap 2. */ + unsigned long elcm_mconf; /* CSR3: Configuration. */ + unsigned long elcm_medc1; /* CSR4: EDC Status 1. */ + unsigned long elcm_medc2; /* CSR5: EDC Status 2. */ + unsigned long elcm_medcc; /* CSR6: EDC Control. */ + unsigned long elcm_msctl; /* CSR7: Stream Buffer Control. */ + unsigned long elcm_mref; /* CSR8: Refresh Control. */ + unsigned long elcm_filter; /* CSR9: CRD Filter Control. */ +}; + + +/* + * Sable other CPU error frame - sable pfms section 3.43 + */ +struct el_t2_data_other_cpu { + short elco_cpuid; /* CPU ID */ + short elco_res02[3]; + unsigned long elco_bcc; /* CSR 0 */ + unsigned long elco_bcce; /* CSR 1 */ + unsigned long elco_bccea; /* CSR 2 */ + unsigned long elco_bcue; /* CSR 3 */ + unsigned long elco_bcuea; /* CSR 4 */ + unsigned long elco_dter; /* CSR 5 */ + unsigned long elco_cbctl; /* CSR 6 */ + unsigned long elco_cbe; /* CSR 7 */ + unsigned long elco_cbeal; /* CSR 8 */ + unsigned long elco_cbeah; /* CSR 9 */ + unsigned long elco_pmbx; /* CSR 10 */ + unsigned long elco_ipir; /* CSR 11 */ + unsigned long elco_sic; /* CSR 12 */ + unsigned long elco_adlk; /* CSR 13 */ + unsigned long elco_madrl; /* CSR 14 */ + unsigned long elco_crrev4; /* CSR 15 */ +}; + +/* + * Sable other CPU error frame - sable pfms section 3.44 + */ +struct el_t2_data_t2{ + struct el_t2_frame_header elct_hdr; /* ID$T2-FRAME */ + unsigned long elct_iocsr; /* IO Control and Status Register */ + unsigned long elct_cerr1; /* Cbus Error Register 1 */ + unsigned long elct_cerr2; /* Cbus Error Register 2 */ + unsigned long elct_cerr3; /* Cbus Error Register 3 */ + unsigned long elct_perr1; /* PCI Error Register 1 */ + unsigned long elct_perr2; /* PCI Error Register 2 */ + unsigned long elct_hae0_1; /* High Address Extension Register 1 */ + unsigned long elct_hae0_2; /* High Address Extension Register 2 */ + unsigned long elct_hbase; /* High Base Register */ + unsigned long elct_wbase1; /* Window Base Register 1 */ + unsigned long elct_wmask1; /* Window Mask Register 1 */ + unsigned long elct_tbase1; /* Translated Base Register 1 */ + unsigned long elct_wbase2; /* Window Base Register 2 */ + unsigned long elct_wmask2; /* Window Mask Register 2 */ + unsigned long elct_tbase2; /* Translated Base Register 2 */ + unsigned long elct_tdr0; /* TLB Data Register 0 */ + unsigned long elct_tdr1; /* TLB Data Register 1 */ + unsigned long elct_tdr2; /* TLB Data Register 2 */ + unsigned long elct_tdr3; /* TLB Data Register 3 */ + unsigned long elct_tdr4; /* TLB Data Register 4 */ + unsigned long elct_tdr5; /* TLB Data Register 5 */ + unsigned long elct_tdr6; /* TLB Data Register 6 */ + unsigned long elct_tdr7; /* TLB Data Register 7 */ +}; + +/* + * Sable error log data structure - sable pfms section 3.40 + */ +struct el_t2_data_corrected { + unsigned long elcpb_biu_stat; + unsigned long elcpb_biu_addr; + unsigned long elcpb_biu_ctl; + unsigned long elcpb_fill_syndrome; + unsigned long elcpb_fill_addr; + unsigned long elcpb_bc_tag; +}; + +/* + * Sable error log data structure + * Note there are 4 memory slots on sable (see t2.h) + */ +struct el_t2_frame_mcheck { + struct el_t2_frame_header elfmc_header; /* ID$P-FRAME_MCHECK */ + struct el_t2_logout_header elfmc_hdr; + struct el_t2_procdata_mcheck elfmc_procdata; + struct el_t2_sysdata_mcheck elfmc_sysdata; + struct el_t2_data_t2 elfmc_t2data; + struct el_t2_data_memory elfmc_memdata[4]; + struct el_t2_frame_header elfmc_footer; /* empty */ +}; + + +/* + * Sable error log data structures on memory errors + */ +struct el_t2_frame_corrected { + struct el_t2_frame_header elfcc_header; /* ID$P-BC-COR */ + struct el_t2_logout_header elfcc_hdr; + struct el_t2_data_corrected elfcc_procdata; +/* struct el_t2_data_t2 elfcc_t2data; */ +/* struct el_t2_data_memory elfcc_memdata[4]; */ + struct el_t2_frame_header elfcc_footer; /* empty */ +}; + + +#ifdef __KERNEL__ + +#ifndef __EXTERN_INLINE +#define __EXTERN_INLINE extern inline +#define __IO_EXTERN_INLINE +#endif + +/* + * I/O functions: + * + * T2 (the core logic PCI/memory support chipset for the SABLE + * series of processors uses a sparse address mapping scheme to + * get at PCI memory and I/O. + */ + +#define vip volatile int * +#define vuip volatile unsigned int * + +extern inline u8 t2_inb(unsigned long addr) +{ + long result = *(vip) ((addr << 5) + T2_IO + 0x00); + return __kernel_extbl(result, addr & 3); +} + +extern inline void t2_outb(u8 b, unsigned long addr) +{ + unsigned long w; + + w = __kernel_insbl(b, addr & 3); + *(vuip) ((addr << 5) + T2_IO + 0x00) = w; + mb(); +} + +extern inline u16 t2_inw(unsigned long addr) +{ + long result = *(vip) ((addr << 5) + T2_IO + 0x08); + return __kernel_extwl(result, addr & 3); +} + +extern inline void t2_outw(u16 b, unsigned long addr) +{ + unsigned long w; + + w = __kernel_inswl(b, addr & 3); + *(vuip) ((addr << 5) + T2_IO + 0x08) = w; + mb(); +} + +extern inline u32 t2_inl(unsigned long addr) +{ + return *(vuip) ((addr << 5) + T2_IO + 0x18); +} + +extern inline void t2_outl(u32 b, unsigned long addr) +{ + *(vuip) ((addr << 5) + T2_IO + 0x18) = b; + mb(); +} + + +/* + * Memory functions. + * + * For reading and writing 8 and 16 bit quantities we need to + * go through one of the three sparse address mapping regions + * and use the HAE_MEM CSR to provide some bits of the address. + * The following few routines use only sparse address region 1 + * which gives 1Gbyte of accessible space which relates exactly + * to the amount of PCI memory mapping *into* system address space. + * See p 6-17 of the specification but it looks something like this: + * + * 21164 Address: + * + * 3 2 1 + * 9876543210987654321098765432109876543210 + * 1ZZZZ0.PCI.QW.Address............BBLL + * + * ZZ = SBZ + * BB = Byte offset + * LL = Transfer length + * + * PCI Address: + * + * 3 2 1 + * 10987654321098765432109876543210 + * HHH....PCI.QW.Address........ 00 + * + * HHH = 31:29 HAE_MEM CSR + * + */ + +#ifdef T2_ONE_HAE_WINDOW +#define t2_set_hae +#else +#define t2_set_hae { \ + unsigned long msb = addr >> 27; \ + addr &= T2_MEM_R1_MASK; \ + set_hae(msb); \ +} +#endif + +/* + * NOTE: take T2_DENSE_MEM off in each readX/writeX routine, since + * they may be called directly, rather than through the + * ioreadNN/iowriteNN routines. + */ + +__EXTERN_INLINE u8 t2_readb(const volatile void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr - T2_DENSE_MEM; + unsigned long result; + + t2_set_hae; + + result = *(vip) ((addr << 5) + T2_SPARSE_MEM + 0x00); + return __kernel_extbl(result, addr & 3); +} + +__EXTERN_INLINE u16 t2_readw(const volatile void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr - T2_DENSE_MEM; + unsigned long result; + + t2_set_hae; + + result = *(vuip) ((addr << 5) + T2_SPARSE_MEM + 0x08); + return __kernel_extwl(result, addr & 3); +} + +/* + * On SABLE with T2, we must use SPARSE memory even for 32-bit access, + * because we cannot access all of DENSE without changing its HAE. + */ +__EXTERN_INLINE u32 t2_readl(const volatile void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr - T2_DENSE_MEM; + unsigned long result; + + t2_set_hae; + + result = *(vuip) ((addr << 5) + T2_SPARSE_MEM + 0x18); + return result & 0xffffffffUL; +} + +__EXTERN_INLINE u64 t2_readq(const volatile void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr - T2_DENSE_MEM; + unsigned long r0, r1, work; + + t2_set_hae; + + work = (addr << 5) + T2_SPARSE_MEM + 0x18; + r0 = *(vuip)(work); + r1 = *(vuip)(work + (4 << 5)); + return r1 << 32 | r0; +} + +__EXTERN_INLINE void t2_writeb(u8 b, volatile void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr - T2_DENSE_MEM; + unsigned long w; + + t2_set_hae; + + w = __kernel_insbl(b, addr & 3); + *(vuip) ((addr << 5) + T2_SPARSE_MEM + 0x00) = w; +} + +__EXTERN_INLINE void t2_writew(u16 b, volatile void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr - T2_DENSE_MEM; + unsigned long w; + + t2_set_hae; + + w = __kernel_inswl(b, addr & 3); + *(vuip) ((addr << 5) + T2_SPARSE_MEM + 0x08) = w; +} + +/* + * On SABLE with T2, we must use SPARSE memory even for 32-bit access, + * because we cannot access all of DENSE without changing its HAE. + */ +__EXTERN_INLINE void t2_writel(u32 b, volatile void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr - T2_DENSE_MEM; + + t2_set_hae; + + *(vuip) ((addr << 5) + T2_SPARSE_MEM + 0x18) = b; +} + +__EXTERN_INLINE void t2_writeq(u64 b, volatile void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr - T2_DENSE_MEM; + unsigned long work; + + t2_set_hae; + + work = (addr << 5) + T2_SPARSE_MEM + 0x18; + *(vuip)work = b; + *(vuip)(work + (4 << 5)) = b >> 32; +} + +__EXTERN_INLINE void __iomem *t2_ioportmap(unsigned long addr) +{ + return (void __iomem *)(addr + T2_IO); +} + +__EXTERN_INLINE void __iomem *t2_ioremap(unsigned long addr, + unsigned long size) +{ + return (void __iomem *)(addr + T2_DENSE_MEM); +} + +__EXTERN_INLINE int t2_is_ioaddr(unsigned long addr) +{ + return (long)addr >= 0; +} + +__EXTERN_INLINE int t2_is_mmio(const volatile void __iomem *addr) +{ + return (unsigned long)addr >= T2_DENSE_MEM; +} + +/* New-style ioread interface. The mmio routines are so ugly for T2 that + it doesn't make sense to merge the pio and mmio routines. */ + +#define IOPORT(OS, NS) \ +__EXTERN_INLINE unsigned int t2_ioread##NS(void __iomem *xaddr) \ +{ \ + if (t2_is_mmio(xaddr)) \ + return t2_read##OS(xaddr); \ + else \ + return t2_in##OS((unsigned long)xaddr - T2_IO); \ +} \ +__EXTERN_INLINE void t2_iowrite##NS(u##NS b, void __iomem *xaddr) \ +{ \ + if (t2_is_mmio(xaddr)) \ + t2_write##OS(b, xaddr); \ + else \ + t2_out##OS(b, (unsigned long)xaddr - T2_IO); \ +} + +IOPORT(b, 8) +IOPORT(w, 16) +IOPORT(l, 32) + +#undef IOPORT + +#undef vip +#undef vuip + +#undef __IO_PREFIX +#define __IO_PREFIX t2 +#define t2_trivial_rw_bw 0 +#define t2_trivial_rw_lq 0 +#define t2_trivial_io_bw 0 +#define t2_trivial_io_lq 0 +#define t2_trivial_iounmap 1 +#include <asm/io_trivial.h> + +#ifdef __IO_EXTERN_INLINE +#undef __EXTERN_INLINE +#undef __IO_EXTERN_INLINE +#endif + +#endif /* __KERNEL__ */ + +#endif /* __ALPHA_T2__H__ */ diff --git a/arch/alpha/include/asm/core_titan.h b/arch/alpha/include/asm/core_titan.h new file mode 100644 index 00000000..8cf79d12 --- /dev/null +++ b/arch/alpha/include/asm/core_titan.h @@ -0,0 +1,409 @@ +#ifndef __ALPHA_TITAN__H__ +#define __ALPHA_TITAN__H__ + +#include <linux/types.h> +#include <asm/compiler.h> + +/* + * TITAN is the internal names for a core logic chipset which provides + * memory controller and PCI/AGP access for 21264 based systems. + * + * This file is based on: + * + * Titan Chipset Engineering Specification + * Revision 0.12 + * 13 July 1999 + * + */ + +/* XXX: Do we need to conditionalize on this? */ +#ifdef USE_48_BIT_KSEG +#define TI_BIAS 0x80000000000UL +#else +#define TI_BIAS 0x10000000000UL +#endif + +/* + * CChip, DChip, and PChip registers + */ + +typedef struct { + volatile unsigned long csr __attribute__((aligned(64))); +} titan_64; + +typedef struct { + titan_64 csc; + titan_64 mtr; + titan_64 misc; + titan_64 mpd; + titan_64 aar0; + titan_64 aar1; + titan_64 aar2; + titan_64 aar3; + titan_64 dim0; + titan_64 dim1; + titan_64 dir0; + titan_64 dir1; + titan_64 drir; + titan_64 prben; + titan_64 iic0; + titan_64 iic1; + titan_64 mpr0; + titan_64 mpr1; + titan_64 mpr2; + titan_64 mpr3; + titan_64 rsvd[2]; + titan_64 ttr; + titan_64 tdr; + titan_64 dim2; + titan_64 dim3; + titan_64 dir2; + titan_64 dir3; + titan_64 iic2; + titan_64 iic3; + titan_64 pwr; + titan_64 reserved[17]; + titan_64 cmonctla; + titan_64 cmonctlb; + titan_64 cmoncnt01; + titan_64 cmoncnt23; + titan_64 cpen; +} titan_cchip; + +typedef struct { + titan_64 dsc; + titan_64 str; + titan_64 drev; + titan_64 dsc2; +} titan_dchip; + +typedef struct { + titan_64 wsba[4]; + titan_64 wsm[4]; + titan_64 tba[4]; + titan_64 pctl; + titan_64 plat; + titan_64 reserved0[2]; + union { + struct { + titan_64 serror; + titan_64 serren; + titan_64 serrset; + titan_64 reserved0; + titan_64 gperror; + titan_64 gperren; + titan_64 gperrset; + titan_64 reserved1; + titan_64 gtlbiv; + titan_64 gtlbia; + titan_64 reserved2[2]; + titan_64 sctl; + titan_64 reserved3[3]; + } g; + struct { + titan_64 agperror; + titan_64 agperren; + titan_64 agperrset; + titan_64 agplastwr; + titan_64 aperror; + titan_64 aperren; + titan_64 aperrset; + titan_64 reserved0; + titan_64 atlbiv; + titan_64 atlbia; + titan_64 reserved1[6]; + } a; + } port_specific; + titan_64 sprst; + titan_64 reserved1[31]; +} titan_pachip_port; + +typedef struct { + titan_pachip_port g_port; + titan_pachip_port a_port; +} titan_pachip; + +#define TITAN_cchip ((titan_cchip *)(IDENT_ADDR+TI_BIAS+0x1A0000000UL)) +#define TITAN_dchip ((titan_dchip *)(IDENT_ADDR+TI_BIAS+0x1B0000800UL)) +#define TITAN_pachip0 ((titan_pachip *)(IDENT_ADDR+TI_BIAS+0x180000000UL)) +#define TITAN_pachip1 ((titan_pachip *)(IDENT_ADDR+TI_BIAS+0x380000000UL)) +extern unsigned TITAN_agp; +extern int TITAN_bootcpu; + +/* + * TITAN PA-chip Window Space Base Address register. + * (WSBA[0-2]) + */ +#define wsba_m_ena 0x1 +#define wsba_m_sg 0x2 +#define wsba_m_addr 0xFFF00000 +#define wmask_k_sz1gb 0x3FF00000 +union TPAchipWSBA { + struct { + unsigned wsba_v_ena : 1; + unsigned wsba_v_sg : 1; + unsigned wsba_v_rsvd1 : 18; + unsigned wsba_v_addr : 12; + unsigned wsba_v_rsvd2 : 32; + } wsba_r_bits; + int wsba_q_whole [2]; +}; + +/* + * TITAN PA-chip Control Register + * This definition covers both the G-Port GPCTL and the A-PORT APCTL. + * Bits <51:0> are the same in both cases. APCTL<63:52> are only + * applicable to AGP. + */ +#define pctl_m_fbtb 0x00000001 +#define pctl_m_thdis 0x00000002 +#define pctl_m_chaindis 0x00000004 +#define pctl_m_tgtlat 0x00000018 +#define pctl_m_hole 0x00000020 +#define pctl_m_mwin 0x00000040 +#define pctl_m_arbena 0x00000080 +#define pctl_m_prigrp 0x0000FF00 +#define pctl_m_ppri 0x00010000 +#define pctl_m_pcispd66 0x00020000 +#define pctl_m_cngstlt 0x003C0000 +#define pctl_m_ptpdesten 0x3FC00000 +#define pctl_m_dpcen 0x40000000 +#define pctl_m_apcen 0x0000000080000000UL +#define pctl_m_dcrtv 0x0000000300000000UL +#define pctl_m_en_stepping 0x0000000400000000UL +#define apctl_m_rsvd1 0x000FFFF800000000UL +#define apctl_m_agp_rate 0x0030000000000000UL +#define apctl_m_agp_sba_en 0x0040000000000000UL +#define apctl_m_agp_en 0x0080000000000000UL +#define apctl_m_rsvd2 0x0100000000000000UL +#define apctl_m_agp_present 0x0200000000000000UL +#define apctl_agp_hp_rd 0x1C00000000000000UL +#define apctl_agp_lp_rd 0xE000000000000000UL +#define gpctl_m_rsvd 0xFFFFFFF800000000UL +union TPAchipPCTL { + struct { + unsigned pctl_v_fbtb : 1; /* A/G [0] */ + unsigned pctl_v_thdis : 1; /* A/G [1] */ + unsigned pctl_v_chaindis : 1; /* A/G [2] */ + unsigned pctl_v_tgtlat : 2; /* A/G [4:3] */ + unsigned pctl_v_hole : 1; /* A/G [5] */ + unsigned pctl_v_mwin : 1; /* A/G [6] */ + unsigned pctl_v_arbena : 1; /* A/G [7] */ + unsigned pctl_v_prigrp : 8; /* A/G [15:8] */ + unsigned pctl_v_ppri : 1; /* A/G [16] */ + unsigned pctl_v_pcispd66 : 1; /* A/G [17] */ + unsigned pctl_v_cngstlt : 4; /* A/G [21:18] */ + unsigned pctl_v_ptpdesten : 8; /* A/G [29:22] */ + unsigned pctl_v_dpcen : 1; /* A/G [30] */ + unsigned pctl_v_apcen : 1; /* A/G [31] */ + unsigned pctl_v_dcrtv : 2; /* A/G [33:32] */ + unsigned pctl_v_en_stepping :1; /* A/G [34] */ + unsigned apctl_v_rsvd1 : 17; /* A [51:35] */ + unsigned apctl_v_agp_rate : 2; /* A [53:52] */ + unsigned apctl_v_agp_sba_en : 1; /* A [54] */ + unsigned apctl_v_agp_en : 1; /* A [55] */ + unsigned apctl_v_rsvd2 : 1; /* A [56] */ + unsigned apctl_v_agp_present : 1; /* A [57] */ + unsigned apctl_v_agp_hp_rd : 3; /* A [60:58] */ + unsigned apctl_v_agp_lp_rd : 3; /* A [63:61] */ + } pctl_r_bits; + unsigned int pctl_l_whole [2]; + unsigned long pctl_q_whole; +}; + +/* + * SERROR / SERREN / SERRSET + */ +union TPAchipSERR { + struct { + unsigned serr_v_lost_uecc : 1; /* [0] */ + unsigned serr_v_uecc : 1; /* [1] */ + unsigned serr_v_cre : 1; /* [2] */ + unsigned serr_v_nxio : 1; /* [3] */ + unsigned serr_v_lost_cre : 1; /* [4] */ + unsigned serr_v_rsvd0 : 10; /* [14:5] */ + unsigned serr_v_addr : 32; /* [46:15] */ + unsigned serr_v_rsvd1 : 5; /* [51:47] */ + unsigned serr_v_source : 2; /* [53:52] */ + unsigned serr_v_cmd : 2; /* [55:54] */ + unsigned serr_v_syn : 8; /* [63:56] */ + } serr_r_bits; + unsigned int serr_l_whole[2]; + unsigned long serr_q_whole; +}; + +/* + * GPERROR / APERROR / GPERREN / APERREN / GPERRSET / APERRSET + */ +union TPAchipPERR { + struct { + unsigned long perr_v_lost : 1; /* [0] */ + unsigned long perr_v_serr : 1; /* [1] */ + unsigned long perr_v_perr : 1; /* [2] */ + unsigned long perr_v_dcrto : 1; /* [3] */ + unsigned long perr_v_sge : 1; /* [4] */ + unsigned long perr_v_ape : 1; /* [5] */ + unsigned long perr_v_ta : 1; /* [6] */ + unsigned long perr_v_dpe : 1; /* [7] */ + unsigned long perr_v_nds : 1; /* [8] */ + unsigned long perr_v_iptpr : 1; /* [9] */ + unsigned long perr_v_iptpw : 1; /* [10] */ + unsigned long perr_v_rsvd0 : 3; /* [13:11] */ + unsigned long perr_v_addr : 33; /* [46:14] */ + unsigned long perr_v_dac : 1; /* [47] */ + unsigned long perr_v_mwin : 1; /* [48] */ + unsigned long perr_v_rsvd1 : 3; /* [51:49] */ + unsigned long perr_v_cmd : 4; /* [55:52] */ + unsigned long perr_v_rsvd2 : 8; /* [63:56] */ + } perr_r_bits; + unsigned int perr_l_whole[2]; + unsigned long perr_q_whole; +}; + +/* + * AGPERROR / AGPERREN / AGPERRSET + */ +union TPAchipAGPERR { + struct { + unsigned agperr_v_lost : 1; /* [0] */ + unsigned agperr_v_lpqfull : 1; /* [1] */ + unsigned apgerr_v_hpqfull : 1; /* [2] */ + unsigned agperr_v_rescmd : 1; /* [3] */ + unsigned agperr_v_ipte : 1; /* [4] */ + unsigned agperr_v_ptp : 1; /* [5] */ + unsigned agperr_v_nowindow : 1; /* [6] */ + unsigned agperr_v_rsvd0 : 8; /* [14:7] */ + unsigned agperr_v_addr : 32; /* [46:15] */ + unsigned agperr_v_rsvd1 : 1; /* [47] */ + unsigned agperr_v_dac : 1; /* [48] */ + unsigned agperr_v_mwin : 1; /* [49] */ + unsigned agperr_v_cmd : 3; /* [52:50] */ + unsigned agperr_v_length : 6; /* [58:53] */ + unsigned agperr_v_fence : 1; /* [59] */ + unsigned agperr_v_rsvd2 : 4; /* [63:60] */ + } agperr_r_bits; + unsigned int agperr_l_whole[2]; + unsigned long agperr_q_whole; +}; +/* + * Memory spaces: + * Hose numbers are assigned as follows: + * 0 - pachip 0 / G Port + * 1 - pachip 1 / G Port + * 2 - pachip 0 / A Port + * 3 - pachip 1 / A Port + */ +#define TITAN_HOSE_SHIFT (33) +#define TITAN_HOSE(h) (((unsigned long)(h)) << TITAN_HOSE_SHIFT) +#define TITAN_BASE (IDENT_ADDR + TI_BIAS) +#define TITAN_MEM(h) (TITAN_BASE+TITAN_HOSE(h)+0x000000000UL) +#define _TITAN_IACK_SC(h) (TITAN_BASE+TITAN_HOSE(h)+0x1F8000000UL) +#define TITAN_IO(h) (TITAN_BASE+TITAN_HOSE(h)+0x1FC000000UL) +#define TITAN_CONF(h) (TITAN_BASE+TITAN_HOSE(h)+0x1FE000000UL) + +#define TITAN_HOSE_MASK TITAN_HOSE(3) +#define TITAN_IACK_SC _TITAN_IACK_SC(0) /* hack! */ + +/* + * The canonical non-remaped I/O and MEM addresses have these values + * subtracted out. This is arranged so that folks manipulating ISA + * devices can use their familiar numbers and have them map to bus 0. + */ + +#define TITAN_IO_BIAS TITAN_IO(0) +#define TITAN_MEM_BIAS TITAN_MEM(0) + +/* The IO address space is larger than 0xffff */ +#define TITAN_IO_SPACE (TITAN_CONF(0) - TITAN_IO(0)) + +/* TIG Space */ +#define TITAN_TIG_SPACE (TITAN_BASE + 0x100000000UL) + +/* Offset between ram physical addresses and pci64 DAC bus addresses. */ +/* ??? Just a guess. Ought to confirm it hasn't been moved. */ +#define TITAN_DAC_OFFSET (1UL << 40) + +/* + * Data structure for handling TITAN machine checks: + */ +#define SCB_Q_SYSERR 0x620 +#define SCB_Q_PROCERR 0x630 +#define SCB_Q_SYSMCHK 0x660 +#define SCB_Q_PROCMCHK 0x670 +#define SCB_Q_SYSEVENT 0x680 /* environmental / system management */ +struct el_TITAN_sysdata_mcheck { + u64 summary; /* 0x00 */ + u64 c_dirx; /* 0x08 */ + u64 c_misc; /* 0x10 */ + u64 p0_serror; /* 0x18 */ + u64 p0_gperror; /* 0x20 */ + u64 p0_aperror; /* 0x28 */ + u64 p0_agperror;/* 0x30 */ + u64 p1_serror; /* 0x38 */ + u64 p1_gperror; /* 0x40 */ + u64 p1_aperror; /* 0x48 */ + u64 p1_agperror;/* 0x50 */ +}; + +/* + * System area for a privateer 680 environmental/system management mcheck + */ +struct el_PRIVATEER_envdata_mcheck { + u64 summary; /* 0x00 */ + u64 c_dirx; /* 0x08 */ + u64 smir; /* 0x10 */ + u64 cpuir; /* 0x18 */ + u64 psir; /* 0x20 */ + u64 fault; /* 0x28 */ + u64 sys_doors; /* 0x30 */ + u64 temp_warn; /* 0x38 */ + u64 fan_ctrl; /* 0x40 */ + u64 code; /* 0x48 */ + u64 reserved; /* 0x50 */ +}; + +#ifdef __KERNEL__ + +#ifndef __EXTERN_INLINE +#define __EXTERN_INLINE extern inline +#define __IO_EXTERN_INLINE +#endif + +/* + * I/O functions: + * + * TITAN, a 21??? PCI/memory support chipset for the EV6 (21264) + * can only use linear accesses to get at PCI/AGP memory and I/O spaces. + */ + +/* + * Memory functions. all accesses are done through linear space. + */ +extern void __iomem *titan_ioportmap(unsigned long addr); +extern void __iomem *titan_ioremap(unsigned long addr, unsigned long size); +extern void titan_iounmap(volatile void __iomem *addr); + +__EXTERN_INLINE int titan_is_ioaddr(unsigned long addr) +{ + return addr >= TITAN_BASE; +} + +extern int titan_is_mmio(const volatile void __iomem *addr); + +#undef __IO_PREFIX +#define __IO_PREFIX titan +#define titan_trivial_rw_bw 1 +#define titan_trivial_rw_lq 1 +#define titan_trivial_io_bw 1 +#define titan_trivial_io_lq 1 +#define titan_trivial_iounmap 0 +#include <asm/io_trivial.h> + +#ifdef __IO_EXTERN_INLINE +#undef __EXTERN_INLINE +#undef __IO_EXTERN_INLINE +#endif + +#endif /* __KERNEL__ */ + +#endif /* __ALPHA_TITAN__H__ */ diff --git a/arch/alpha/include/asm/core_tsunami.h b/arch/alpha/include/asm/core_tsunami.h new file mode 100644 index 00000000..8e39ecf0 --- /dev/null +++ b/arch/alpha/include/asm/core_tsunami.h @@ -0,0 +1,334 @@ +#ifndef __ALPHA_TSUNAMI__H__ +#define __ALPHA_TSUNAMI__H__ + +#include <linux/types.h> +#include <asm/compiler.h> + +/* + * TSUNAMI/TYPHOON are the internal names for the core logic chipset which + * provides memory controller and PCI access for the 21264 based systems. + * + * This file is based on: + * + * Tsunami System Programmers Manual + * Preliminary, Chapters 2-5 + * + */ + +/* XXX: Do we need to conditionalize on this? */ +#ifdef USE_48_BIT_KSEG +#define TS_BIAS 0x80000000000UL +#else +#define TS_BIAS 0x10000000000UL +#endif + +/* + * CChip, DChip, and PChip registers + */ + +typedef struct { + volatile unsigned long csr __attribute__((aligned(64))); +} tsunami_64; + +typedef struct { + tsunami_64 csc; + tsunami_64 mtr; + tsunami_64 misc; + tsunami_64 mpd; + tsunami_64 aar0; + tsunami_64 aar1; + tsunami_64 aar2; + tsunami_64 aar3; + tsunami_64 dim0; + tsunami_64 dim1; + tsunami_64 dir0; + tsunami_64 dir1; + tsunami_64 drir; + tsunami_64 prben; + tsunami_64 iic; /* a.k.a. iic0 */ + tsunami_64 wdr; /* a.k.a. iic1 */ + tsunami_64 mpr0; + tsunami_64 mpr1; + tsunami_64 mpr2; + tsunami_64 mpr3; + tsunami_64 mctl; + tsunami_64 __pad1; + tsunami_64 ttr; + tsunami_64 tdr; + tsunami_64 dim2; + tsunami_64 dim3; + tsunami_64 dir2; + tsunami_64 dir3; + tsunami_64 iic2; + tsunami_64 iic3; +} tsunami_cchip; + +typedef struct { + tsunami_64 dsc; + tsunami_64 str; + tsunami_64 drev; +} tsunami_dchip; + +typedef struct { + tsunami_64 wsba[4]; + tsunami_64 wsm[4]; + tsunami_64 tba[4]; + tsunami_64 pctl; + tsunami_64 plat; + tsunami_64 reserved; + tsunami_64 perror; + tsunami_64 perrmask; + tsunami_64 perrset; + tsunami_64 tlbiv; + tsunami_64 tlbia; + tsunami_64 pmonctl; + tsunami_64 pmoncnt; +} tsunami_pchip; + +#define TSUNAMI_cchip ((tsunami_cchip *)(IDENT_ADDR+TS_BIAS+0x1A0000000UL)) +#define TSUNAMI_dchip ((tsunami_dchip *)(IDENT_ADDR+TS_BIAS+0x1B0000800UL)) +#define TSUNAMI_pchip0 ((tsunami_pchip *)(IDENT_ADDR+TS_BIAS+0x180000000UL)) +#define TSUNAMI_pchip1 ((tsunami_pchip *)(IDENT_ADDR+TS_BIAS+0x380000000UL)) +extern int TSUNAMI_bootcpu; + +/* + * TSUNAMI Pchip Error register. + */ + +#define perror_m_lost 0x1 +#define perror_m_serr 0x2 +#define perror_m_perr 0x4 +#define perror_m_dcrto 0x8 +#define perror_m_sge 0x10 +#define perror_m_ape 0x20 +#define perror_m_ta 0x40 +#define perror_m_rdpe 0x80 +#define perror_m_nds 0x100 +#define perror_m_rto 0x200 +#define perror_m_uecc 0x400 +#define perror_m_cre 0x800 +#define perror_m_addrl 0xFFFFFFFF0000UL +#define perror_m_addrh 0x7000000000000UL +#define perror_m_cmd 0xF0000000000000UL +#define perror_m_syn 0xFF00000000000000UL +union TPchipPERROR { + struct { + unsigned int perror_v_lost : 1; + unsigned perror_v_serr : 1; + unsigned perror_v_perr : 1; + unsigned perror_v_dcrto : 1; + unsigned perror_v_sge : 1; + unsigned perror_v_ape : 1; + unsigned perror_v_ta : 1; + unsigned perror_v_rdpe : 1; + unsigned perror_v_nds : 1; + unsigned perror_v_rto : 1; + unsigned perror_v_uecc : 1; + unsigned perror_v_cre : 1; + unsigned perror_v_rsvd1 : 4; + unsigned perror_v_addrl : 32; + unsigned perror_v_addrh : 3; + unsigned perror_v_rsvd2 : 1; + unsigned perror_v_cmd : 4; + unsigned perror_v_syn : 8; + } perror_r_bits; + int perror_q_whole [2]; +}; + +/* + * TSUNAMI Pchip Window Space Base Address register. + */ +#define wsba_m_ena 0x1 +#define wsba_m_sg 0x2 +#define wsba_m_ptp 0x4 +#define wsba_m_addr 0xFFF00000 +#define wmask_k_sz1gb 0x3FF00000 +union TPchipWSBA { + struct { + unsigned wsba_v_ena : 1; + unsigned wsba_v_sg : 1; + unsigned wsba_v_ptp : 1; + unsigned wsba_v_rsvd1 : 17; + unsigned wsba_v_addr : 12; + unsigned wsba_v_rsvd2 : 32; + } wsba_r_bits; + int wsba_q_whole [2]; +}; + +/* + * TSUNAMI Pchip Control Register + */ +#define pctl_m_fdsc 0x1 +#define pctl_m_fbtb 0x2 +#define pctl_m_thdis 0x4 +#define pctl_m_chaindis 0x8 +#define pctl_m_tgtlat 0x10 +#define pctl_m_hole 0x20 +#define pctl_m_mwin 0x40 +#define pctl_m_arbena 0x80 +#define pctl_m_prigrp 0x7F00 +#define pctl_m_ppri 0x8000 +#define pctl_m_rsvd1 0x30000 +#define pctl_m_eccen 0x40000 +#define pctl_m_padm 0x80000 +#define pctl_m_cdqmax 0xF00000 +#define pctl_m_rev 0xFF000000 +#define pctl_m_crqmax 0xF00000000UL +#define pctl_m_ptpmax 0xF000000000UL +#define pctl_m_pclkx 0x30000000000UL +#define pctl_m_fdsdis 0x40000000000UL +#define pctl_m_fdwdis 0x80000000000UL +#define pctl_m_ptevrfy 0x100000000000UL +#define pctl_m_rpp 0x200000000000UL +#define pctl_m_pid 0xC00000000000UL +#define pctl_m_rsvd2 0xFFFF000000000000UL + +union TPchipPCTL { + struct { + unsigned pctl_v_fdsc : 1; + unsigned pctl_v_fbtb : 1; + unsigned pctl_v_thdis : 1; + unsigned pctl_v_chaindis : 1; + unsigned pctl_v_tgtlat : 1; + unsigned pctl_v_hole : 1; + unsigned pctl_v_mwin : 1; + unsigned pctl_v_arbena : 1; + unsigned pctl_v_prigrp : 7; + unsigned pctl_v_ppri : 1; + unsigned pctl_v_rsvd1 : 2; + unsigned pctl_v_eccen : 1; + unsigned pctl_v_padm : 1; + unsigned pctl_v_cdqmax : 4; + unsigned pctl_v_rev : 8; + unsigned pctl_v_crqmax : 4; + unsigned pctl_v_ptpmax : 4; + unsigned pctl_v_pclkx : 2; + unsigned pctl_v_fdsdis : 1; + unsigned pctl_v_fdwdis : 1; + unsigned pctl_v_ptevrfy : 1; + unsigned pctl_v_rpp : 1; + unsigned pctl_v_pid : 2; + unsigned pctl_v_rsvd2 : 16; + } pctl_r_bits; + int pctl_q_whole [2]; +}; + +/* + * TSUNAMI Pchip Error Mask Register. + */ +#define perrmask_m_lost 0x1 +#define perrmask_m_serr 0x2 +#define perrmask_m_perr 0x4 +#define perrmask_m_dcrto 0x8 +#define perrmask_m_sge 0x10 +#define perrmask_m_ape 0x20 +#define perrmask_m_ta 0x40 +#define perrmask_m_rdpe 0x80 +#define perrmask_m_nds 0x100 +#define perrmask_m_rto 0x200 +#define perrmask_m_uecc 0x400 +#define perrmask_m_cre 0x800 +#define perrmask_m_rsvd 0xFFFFFFFFFFFFF000UL +union TPchipPERRMASK { + struct { + unsigned int perrmask_v_lost : 1; + unsigned perrmask_v_serr : 1; + unsigned perrmask_v_perr : 1; + unsigned perrmask_v_dcrto : 1; + unsigned perrmask_v_sge : 1; + unsigned perrmask_v_ape : 1; + unsigned perrmask_v_ta : 1; + unsigned perrmask_v_rdpe : 1; + unsigned perrmask_v_nds : 1; + unsigned perrmask_v_rto : 1; + unsigned perrmask_v_uecc : 1; + unsigned perrmask_v_cre : 1; + unsigned perrmask_v_rsvd1 : 20; + unsigned perrmask_v_rsvd2 : 32; + } perrmask_r_bits; + int perrmask_q_whole [2]; +}; + +/* + * Memory spaces: + */ +#define TSUNAMI_HOSE(h) (((unsigned long)(h)) << 33) +#define TSUNAMI_BASE (IDENT_ADDR + TS_BIAS) + +#define TSUNAMI_MEM(h) (TSUNAMI_BASE+TSUNAMI_HOSE(h) + 0x000000000UL) +#define _TSUNAMI_IACK_SC(h) (TSUNAMI_BASE+TSUNAMI_HOSE(h) + 0x1F8000000UL) +#define TSUNAMI_IO(h) (TSUNAMI_BASE+TSUNAMI_HOSE(h) + 0x1FC000000UL) +#define TSUNAMI_CONF(h) (TSUNAMI_BASE+TSUNAMI_HOSE(h) + 0x1FE000000UL) + +#define TSUNAMI_IACK_SC _TSUNAMI_IACK_SC(0) /* hack! */ + + +/* + * The canonical non-remaped I/O and MEM addresses have these values + * subtracted out. This is arranged so that folks manipulating ISA + * devices can use their familiar numbers and have them map to bus 0. + */ + +#define TSUNAMI_IO_BIAS TSUNAMI_IO(0) +#define TSUNAMI_MEM_BIAS TSUNAMI_MEM(0) + +/* The IO address space is larger than 0xffff */ +#define TSUNAMI_IO_SPACE (TSUNAMI_CONF(0) - TSUNAMI_IO(0)) + +/* Offset between ram physical addresses and pci64 DAC bus addresses. */ +#define TSUNAMI_DAC_OFFSET (1UL << 40) + +/* + * Data structure for handling TSUNAMI machine checks: + */ +struct el_TSUNAMI_sysdata_mcheck { +}; + + +#ifdef __KERNEL__ + +#ifndef __EXTERN_INLINE +#define __EXTERN_INLINE extern inline +#define __IO_EXTERN_INLINE +#endif + +/* + * I/O functions: + * + * TSUNAMI, the 21??? PCI/memory support chipset for the EV6 (21264) + * can only use linear accesses to get at PCI memory and I/O spaces. + */ + +/* + * Memory functions. all accesses are done through linear space. + */ +extern void __iomem *tsunami_ioportmap(unsigned long addr); +extern void __iomem *tsunami_ioremap(unsigned long addr, unsigned long size); +__EXTERN_INLINE int tsunami_is_ioaddr(unsigned long addr) +{ + return addr >= TSUNAMI_BASE; +} + +__EXTERN_INLINE int tsunami_is_mmio(const volatile void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + return (addr & 0x100000000UL) == 0; +} + +#undef __IO_PREFIX +#define __IO_PREFIX tsunami +#define tsunami_trivial_rw_bw 1 +#define tsunami_trivial_rw_lq 1 +#define tsunami_trivial_io_bw 1 +#define tsunami_trivial_io_lq 1 +#define tsunami_trivial_iounmap 1 +#include <asm/io_trivial.h> + +#ifdef __IO_EXTERN_INLINE +#undef __EXTERN_INLINE +#undef __IO_EXTERN_INLINE +#endif + +#endif /* __KERNEL__ */ + +#endif /* __ALPHA_TSUNAMI__H__ */ diff --git a/arch/alpha/include/asm/core_wildfire.h b/arch/alpha/include/asm/core_wildfire.h new file mode 100644 index 00000000..cd562f54 --- /dev/null +++ b/arch/alpha/include/asm/core_wildfire.h @@ -0,0 +1,318 @@ +#ifndef __ALPHA_WILDFIRE__H__ +#define __ALPHA_WILDFIRE__H__ + +#include <linux/types.h> +#include <asm/compiler.h> + +#define WILDFIRE_MAX_QBB 8 /* more than 8 requires other mods */ +#define WILDFIRE_PCA_PER_QBB 4 +#define WILDFIRE_IRQ_PER_PCA 64 + +#define WILDFIRE_NR_IRQS \ + (WILDFIRE_MAX_QBB * WILDFIRE_PCA_PER_QBB * WILDFIRE_IRQ_PER_PCA) + +extern unsigned char wildfire_hard_qbb_map[WILDFIRE_MAX_QBB]; +extern unsigned char wildfire_soft_qbb_map[WILDFIRE_MAX_QBB]; +#define QBB_MAP_EMPTY 0xff + +extern unsigned long wildfire_hard_qbb_mask; +extern unsigned long wildfire_soft_qbb_mask; +extern unsigned long wildfire_gp_mask; +extern unsigned long wildfire_hs_mask; +extern unsigned long wildfire_iop_mask; +extern unsigned long wildfire_ior_mask; +extern unsigned long wildfire_pca_mask; +extern unsigned long wildfire_cpu_mask; +extern unsigned long wildfire_mem_mask; + +#define WILDFIRE_QBB_EXISTS(qbbno) (wildfire_soft_qbb_mask & (1 << (qbbno))) + +#define WILDFIRE_MEM_EXISTS(qbbno) (wildfire_mem_mask & (0xf << ((qbbno) << 2))) + +#define WILDFIRE_PCA_EXISTS(qbbno, pcano) \ + (wildfire_pca_mask & (1 << (((qbbno) << 2) + (pcano)))) + +typedef struct { + volatile unsigned long csr __attribute__((aligned(64))); +} wildfire_64; + +typedef struct { + volatile unsigned long csr __attribute__((aligned(256))); +} wildfire_256; + +typedef struct { + volatile unsigned long csr __attribute__((aligned(2048))); +} wildfire_2k; + +typedef struct { + wildfire_64 qsd_whami; + wildfire_64 qsd_rev; + wildfire_64 qsd_port_present; + wildfire_64 qsd_port_active; + wildfire_64 qsd_fault_ena; + wildfire_64 qsd_cpu_int_ena; + wildfire_64 qsd_mem_config; + wildfire_64 qsd_err_sum; + wildfire_64 ce_sum[4]; + wildfire_64 dev_init[4]; + wildfire_64 it_int[4]; + wildfire_64 ip_int[4]; + wildfire_64 uce_sum[4]; + wildfire_64 se_sum__non_dev_int[4]; + wildfire_64 scratch[4]; + wildfire_64 qsd_timer; + wildfire_64 qsd_diag; +} wildfire_qsd; + +typedef struct { + wildfire_256 qsd_whami; + wildfire_256 __pad1; + wildfire_256 ce_sum; + wildfire_256 dev_init; + wildfire_256 it_int; + wildfire_256 ip_int; + wildfire_256 uce_sum; + wildfire_256 se_sum; +} wildfire_fast_qsd; + +typedef struct { + wildfire_2k qsa_qbb_id; + wildfire_2k __pad1; + wildfire_2k qsa_port_ena; + wildfire_2k qsa_scratch; + wildfire_2k qsa_config[5]; + wildfire_2k qsa_ref_int; + wildfire_2k qsa_qbb_pop[2]; + wildfire_2k qsa_dtag_fc; + wildfire_2k __pad2[3]; + wildfire_2k qsa_diag; + wildfire_2k qsa_diag_lock[4]; + wildfire_2k __pad3[11]; + wildfire_2k qsa_cpu_err_sum; + wildfire_2k qsa_misc_err_sum; + wildfire_2k qsa_tmo_err_sum; + wildfire_2k qsa_err_ena; + wildfire_2k qsa_tmo_config; + wildfire_2k qsa_ill_cmd_err_sum; + wildfire_2k __pad4[26]; + wildfire_2k qsa_busy_mask; + wildfire_2k qsa_arr_valid; + wildfire_2k __pad5[2]; + wildfire_2k qsa_port_map[4]; + wildfire_2k qsa_arr_addr[8]; + wildfire_2k qsa_arr_mask[8]; +} wildfire_qsa; + +typedef struct { + wildfire_64 ioa_config; + wildfire_64 iod_config; + wildfire_64 iop_switch_credits; + wildfire_64 __pad1; + wildfire_64 iop_hose_credits; + wildfire_64 __pad2[11]; + struct { + wildfire_64 __pad3; + wildfire_64 init; + } iop_hose[4]; + wildfire_64 ioa_hose_0_ctrl; + wildfire_64 iod_hose_0_ctrl; + wildfire_64 ioa_hose_1_ctrl; + wildfire_64 iod_hose_1_ctrl; + wildfire_64 ioa_hose_2_ctrl; + wildfire_64 iod_hose_2_ctrl; + wildfire_64 ioa_hose_3_ctrl; + wildfire_64 iod_hose_3_ctrl; + struct { + wildfire_64 target; + wildfire_64 __pad4; + } iop_dev_int[4]; + + wildfire_64 iop_err_int_target; + wildfire_64 __pad5[7]; + wildfire_64 iop_qbb_err_sum; + wildfire_64 __pad6; + wildfire_64 iop_qbb_se_sum; + wildfire_64 __pad7; + wildfire_64 ioa_err_sum; + wildfire_64 iod_err_sum; + wildfire_64 __pad8[4]; + wildfire_64 ioa_diag_force_err; + wildfire_64 iod_diag_force_err; + wildfire_64 __pad9[4]; + wildfire_64 iop_diag_send_err_int; + wildfire_64 __pad10[15]; + wildfire_64 ioa_scratch; + wildfire_64 iod_scratch; +} wildfire_iop; + +typedef struct { + wildfire_2k gpa_qbb_map[4]; + wildfire_2k gpa_mem_pop_map; + wildfire_2k gpa_scratch; + wildfire_2k gpa_diag; + wildfire_2k gpa_config_0; + wildfire_2k __pad1; + wildfire_2k gpa_init_id; + wildfire_2k gpa_config_2; + /* not complete */ +} wildfire_gp; + +typedef struct { + wildfire_64 pca_what_am_i; + wildfire_64 pca_err_sum; + wildfire_64 pca_diag_force_err; + wildfire_64 pca_diag_send_err_int; + wildfire_64 pca_hose_credits; + wildfire_64 pca_scratch; + wildfire_64 pca_micro_addr; + wildfire_64 pca_micro_data; + wildfire_64 pca_pend_int; + wildfire_64 pca_sent_int; + wildfire_64 __pad1; + wildfire_64 pca_stdio_edge_level; + wildfire_64 __pad2[52]; + struct { + wildfire_64 target; + wildfire_64 enable; + } pca_int[4]; + wildfire_64 __pad3[56]; + wildfire_64 pca_alt_sent_int[32]; +} wildfire_pca; + +typedef struct { + wildfire_64 ne_what_am_i; + /* not complete */ +} wildfire_ne; + +typedef struct { + wildfire_64 fe_what_am_i; + /* not complete */ +} wildfire_fe; + +typedef struct { + wildfire_64 pci_io_addr_ext; + wildfire_64 pci_ctrl; + wildfire_64 pci_err_sum; + wildfire_64 pci_err_addr; + wildfire_64 pci_stall_cnt; + wildfire_64 pci_iack_special; + wildfire_64 __pad1[2]; + wildfire_64 pci_pend_int; + wildfire_64 pci_sent_int; + wildfire_64 __pad2[54]; + struct { + wildfire_64 wbase; + wildfire_64 wmask; + wildfire_64 tbase; + } pci_window[4]; + wildfire_64 pci_flush_tlb; + wildfire_64 pci_perf_mon; +} wildfire_pci; + +#define WILDFIRE_ENTITY_SHIFT 18 + +#define WILDFIRE_GP_ENTITY (0x10UL << WILDFIRE_ENTITY_SHIFT) +#define WILDFIRE_IOP_ENTITY (0x08UL << WILDFIRE_ENTITY_SHIFT) +#define WILDFIRE_QSA_ENTITY (0x04UL << WILDFIRE_ENTITY_SHIFT) +#define WILDFIRE_QSD_ENTITY_SLOW (0x05UL << WILDFIRE_ENTITY_SHIFT) +#define WILDFIRE_QSD_ENTITY_FAST (0x01UL << WILDFIRE_ENTITY_SHIFT) + +#define WILDFIRE_PCA_ENTITY(pca) ((0xc|(pca))<<WILDFIRE_ENTITY_SHIFT) + +#define WILDFIRE_BASE (IDENT_ADDR | (1UL << 40)) + +#define WILDFIRE_QBB_MASK 0x0fUL /* for now, only 4 bits/16 QBBs */ + +#define WILDFIRE_QBB(q) ((~((long)(q)) & WILDFIRE_QBB_MASK) << 36) +#define WILDFIRE_HOSE(h) ((long)(h) << 33) + +#define WILDFIRE_QBB_IO(q) (WILDFIRE_BASE | WILDFIRE_QBB(q)) +#define WILDFIRE_QBB_HOSE(q,h) (WILDFIRE_QBB_IO(q) | WILDFIRE_HOSE(h)) + +#define WILDFIRE_MEM(q,h) (WILDFIRE_QBB_HOSE(q,h) | 0x000000000UL) +#define WILDFIRE_CONF(q,h) (WILDFIRE_QBB_HOSE(q,h) | 0x1FE000000UL) +#define WILDFIRE_IO(q,h) (WILDFIRE_QBB_HOSE(q,h) | 0x1FF000000UL) + +#define WILDFIRE_qsd(q) \ + ((wildfire_qsd *)(WILDFIRE_QBB_IO(q)|WILDFIRE_QSD_ENTITY_SLOW|(((1UL<<13)-1)<<23))) + +#define WILDFIRE_fast_qsd() \ + ((wildfire_fast_qsd *)(WILDFIRE_QBB_IO(0)|WILDFIRE_QSD_ENTITY_FAST|(((1UL<<13)-1)<<23))) + +#define WILDFIRE_qsa(q) \ + ((wildfire_qsa *)(WILDFIRE_QBB_IO(q)|WILDFIRE_QSA_ENTITY|(((1UL<<13)-1)<<23))) + +#define WILDFIRE_iop(q) \ + ((wildfire_iop *)(WILDFIRE_QBB_IO(q)|WILDFIRE_IOP_ENTITY|(((1UL<<13)-1)<<23))) + +#define WILDFIRE_gp(q) \ + ((wildfire_gp *)(WILDFIRE_QBB_IO(q)|WILDFIRE_GP_ENTITY|(((1UL<<13)-1)<<23))) + +#define WILDFIRE_pca(q,pca) \ + ((wildfire_pca *)(WILDFIRE_QBB_IO(q)|WILDFIRE_PCA_ENTITY(pca)|(((1UL<<13)-1)<<23))) + +#define WILDFIRE_ne(q,pca) \ + ((wildfire_ne *)(WILDFIRE_QBB_IO(q)|WILDFIRE_PCA_ENTITY(pca)|(((1UL<<13)-1)<<23)|(1UL<<16))) + +#define WILDFIRE_fe(q,pca) \ + ((wildfire_fe *)(WILDFIRE_QBB_IO(q)|WILDFIRE_PCA_ENTITY(pca)|(((1UL<<13)-1)<<23)|(3UL<<15))) + +#define WILDFIRE_pci(q,h) \ + ((wildfire_pci *)(WILDFIRE_QBB_IO(q)|WILDFIRE_PCA_ENTITY(((h)&6)>>1)|((((h)&1)|2)<<16)|(((1UL<<13)-1)<<23))) + +#define WILDFIRE_IO_BIAS WILDFIRE_IO(0,0) +#define WILDFIRE_MEM_BIAS WILDFIRE_MEM(0,0) /* ??? */ + +/* The IO address space is larger than 0xffff */ +#define WILDFIRE_IO_SPACE (8UL*1024*1024) + +#ifdef __KERNEL__ + +#ifndef __EXTERN_INLINE +#define __EXTERN_INLINE extern inline +#define __IO_EXTERN_INLINE +#endif + +/* + * Memory functions. all accesses are done through linear space. + */ + +__EXTERN_INLINE void __iomem *wildfire_ioportmap(unsigned long addr) +{ + return (void __iomem *)(addr + WILDFIRE_IO_BIAS); +} + +__EXTERN_INLINE void __iomem *wildfire_ioremap(unsigned long addr, + unsigned long size) +{ + return (void __iomem *)(addr + WILDFIRE_MEM_BIAS); +} + +__EXTERN_INLINE int wildfire_is_ioaddr(unsigned long addr) +{ + return addr >= WILDFIRE_BASE; +} + +__EXTERN_INLINE int wildfire_is_mmio(const volatile void __iomem *xaddr) +{ + unsigned long addr = (unsigned long)xaddr; + return (addr & 0x100000000UL) == 0; +} + +#undef __IO_PREFIX +#define __IO_PREFIX wildfire +#define wildfire_trivial_rw_bw 1 +#define wildfire_trivial_rw_lq 1 +#define wildfire_trivial_io_bw 1 +#define wildfire_trivial_io_lq 1 +#define wildfire_trivial_iounmap 1 +#include <asm/io_trivial.h> + +#ifdef __IO_EXTERN_INLINE +#undef __EXTERN_INLINE +#undef __IO_EXTERN_INLINE +#endif + +#endif /* __KERNEL__ */ + +#endif /* __ALPHA_WILDFIRE__H__ */ diff --git a/arch/alpha/include/asm/cputime.h b/arch/alpha/include/asm/cputime.h new file mode 100644 index 00000000..19577fd9 --- /dev/null +++ b/arch/alpha/include/asm/cputime.h @@ -0,0 +1,6 @@ +#ifndef __ALPHA_CPUTIME_H +#define __ALPHA_CPUTIME_H + +#include <asm-generic/cputime.h> + +#endif /* __ALPHA_CPUTIME_H */ diff --git a/arch/alpha/include/asm/current.h b/arch/alpha/include/asm/current.h new file mode 100644 index 00000000..094d285a --- /dev/null +++ b/arch/alpha/include/asm/current.h @@ -0,0 +1,9 @@ +#ifndef _ALPHA_CURRENT_H +#define _ALPHA_CURRENT_H + +#include <linux/thread_info.h> + +#define get_current() (current_thread_info()->task) +#define current get_current() + +#endif /* _ALPHA_CURRENT_H */ diff --git a/arch/alpha/include/asm/delay.h b/arch/alpha/include/asm/delay.h new file mode 100644 index 00000000..2aa3f410 --- /dev/null +++ b/arch/alpha/include/asm/delay.h @@ -0,0 +1,10 @@ +#ifndef __ALPHA_DELAY_H +#define __ALPHA_DELAY_H + +extern void __delay(int loops); +extern void udelay(unsigned long usecs); + +extern void ndelay(unsigned long nsecs); +#define ndelay ndelay + +#endif /* defined(__ALPHA_DELAY_H) */ diff --git a/arch/alpha/include/asm/device.h b/arch/alpha/include/asm/device.h new file mode 100644 index 00000000..d8f9872b --- /dev/null +++ b/arch/alpha/include/asm/device.h @@ -0,0 +1,7 @@ +/* + * Arch specific extensions to struct device + * + * This file is released under the GPLv2 + */ +#include <asm-generic/device.h> + diff --git a/arch/alpha/include/asm/div64.h b/arch/alpha/include/asm/div64.h new file mode 100644 index 00000000..6cd978ce --- /dev/null +++ b/arch/alpha/include/asm/div64.h @@ -0,0 +1 @@ +#include <asm-generic/div64.h> diff --git a/arch/alpha/include/asm/dma-mapping.h b/arch/alpha/include/asm/dma-mapping.h new file mode 100644 index 00000000..dfa32f06 --- /dev/null +++ b/arch/alpha/include/asm/dma-mapping.h @@ -0,0 +1,53 @@ +#ifndef _ALPHA_DMA_MAPPING_H +#define _ALPHA_DMA_MAPPING_H + +#include <linux/dma-attrs.h> + +extern struct dma_map_ops *dma_ops; + +static inline struct dma_map_ops *get_dma_ops(struct device *dev) +{ + return dma_ops; +} + +#include <asm-generic/dma-mapping-common.h> + +#define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL) + +static inline void *dma_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, + struct dma_attrs *attrs) +{ + return get_dma_ops(dev)->alloc(dev, size, dma_handle, gfp, attrs); +} + +#define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL) + +static inline void dma_free_attrs(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle, + struct dma_attrs *attrs) +{ + get_dma_ops(dev)->free(dev, size, vaddr, dma_handle, attrs); +} + +static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return get_dma_ops(dev)->mapping_error(dev, dma_addr); +} + +static inline int dma_supported(struct device *dev, u64 mask) +{ + return get_dma_ops(dev)->dma_supported(dev, mask); +} + +static inline int dma_set_mask(struct device *dev, u64 mask) +{ + return get_dma_ops(dev)->set_dma_mask(dev, mask); +} + +#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) +#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) + +#define dma_cache_sync(dev, va, size, dir) ((void)0) + +#endif /* _ALPHA_DMA_MAPPING_H */ diff --git a/arch/alpha/include/asm/dma.h b/arch/alpha/include/asm/dma.h new file mode 100644 index 00000000..87cfdbdf --- /dev/null +++ b/arch/alpha/include/asm/dma.h @@ -0,0 +1,376 @@ +/* + * include/asm-alpha/dma.h + * + * This is essentially the same as the i386 DMA stuff, as the AlphaPCs + * use ISA-compatible dma. The only extension is support for high-page + * registers that allow to set the top 8 bits of a 32-bit DMA address. + * This register should be written last when setting up a DMA address + * as this will also enable DMA across 64 KB boundaries. + */ + +/* $Id: dma.h,v 1.7 1992/12/14 00:29:34 root Exp root $ + * linux/include/asm/dma.h: Defines for using and allocating dma channels. + * Written by Hennus Bergman, 1992. + * High DMA channel support & info by Hannu Savolainen + * and John Boyd, Nov. 1992. + */ + +#ifndef _ASM_DMA_H +#define _ASM_DMA_H + +#include <linux/spinlock.h> +#include <asm/io.h> + +#define dma_outb outb +#define dma_inb inb + +/* + * NOTES about DMA transfers: + * + * controller 1: channels 0-3, byte operations, ports 00-1F + * controller 2: channels 4-7, word operations, ports C0-DF + * + * - ALL registers are 8 bits only, regardless of transfer size + * - channel 4 is not used - cascades 1 into 2. + * - channels 0-3 are byte - addresses/counts are for physical bytes + * - channels 5-7 are word - addresses/counts are for physical words + * - transfers must not cross physical 64K (0-3) or 128K (5-7) boundaries + * - transfer count loaded to registers is 1 less than actual count + * - controller 2 offsets are all even (2x offsets for controller 1) + * - page registers for 5-7 don't use data bit 0, represent 128K pages + * - page registers for 0-3 use bit 0, represent 64K pages + * + * DMA transfers are limited to the lower 16MB of _physical_ memory. + * Note that addresses loaded into registers must be _physical_ addresses, + * not logical addresses (which may differ if paging is active). + * + * Address mapping for channels 0-3: + * + * A23 ... A16 A15 ... A8 A7 ... A0 (Physical addresses) + * | ... | | ... | | ... | + * | ... | | ... | | ... | + * | ... | | ... | | ... | + * P7 ... P0 A7 ... A0 A7 ... A0 + * | Page | Addr MSB | Addr LSB | (DMA registers) + * + * Address mapping for channels 5-7: + * + * A23 ... A17 A16 A15 ... A9 A8 A7 ... A1 A0 (Physical addresses) + * | ... | \ \ ... \ \ \ ... \ \ + * | ... | \ \ ... \ \ \ ... \ (not used) + * | ... | \ \ ... \ \ \ ... \ + * P7 ... P1 (0) A7 A6 ... A0 A7 A6 ... A0 + * | Page | Addr MSB | Addr LSB | (DMA registers) + * + * Again, channels 5-7 transfer _physical_ words (16 bits), so addresses + * and counts _must_ be word-aligned (the lowest address bit is _ignored_ at + * the hardware level, so odd-byte transfers aren't possible). + * + * Transfer count (_not # bytes_) is limited to 64K, represented as actual + * count - 1 : 64K => 0xFFFF, 1 => 0x0000. Thus, count is always 1 or more, + * and up to 128K bytes may be transferred on channels 5-7 in one operation. + * + */ + +#define MAX_DMA_CHANNELS 8 + +/* + ISA DMA limitations on Alpha platforms, + + These may be due to SIO (PCI<->ISA bridge) chipset limitation, or + just a wiring limit. +*/ + +/* The maximum address for ISA DMA transfer on Alpha XL, due to an + hardware SIO limitation, is 64MB. +*/ +#define ALPHA_XL_MAX_ISA_DMA_ADDRESS 0x04000000UL + +/* The maximum address for ISA DMA transfer on RUFFIAN, + due to an hardware SIO limitation, is 16MB. +*/ +#define ALPHA_RUFFIAN_MAX_ISA_DMA_ADDRESS 0x01000000UL + +/* The maximum address for ISA DMA transfer on SABLE, and some ALCORs, + due to an hardware SIO chip limitation, is 2GB. +*/ +#define ALPHA_SABLE_MAX_ISA_DMA_ADDRESS 0x80000000UL +#define ALPHA_ALCOR_MAX_ISA_DMA_ADDRESS 0x80000000UL + +/* + Maximum address for all the others is the complete 32-bit bus + address space. +*/ +#define ALPHA_MAX_ISA_DMA_ADDRESS 0x100000000UL + +#ifdef CONFIG_ALPHA_GENERIC +# define MAX_ISA_DMA_ADDRESS (alpha_mv.max_isa_dma_address) +#else +# if defined(CONFIG_ALPHA_XL) +# define MAX_ISA_DMA_ADDRESS ALPHA_XL_MAX_ISA_DMA_ADDRESS +# elif defined(CONFIG_ALPHA_RUFFIAN) +# define MAX_ISA_DMA_ADDRESS ALPHA_RUFFIAN_MAX_ISA_DMA_ADDRESS +# elif defined(CONFIG_ALPHA_SABLE) +# define MAX_ISA_DMA_ADDRESS ALPHA_SABLE_MAX_ISA_DMA_ADDRESS +# elif defined(CONFIG_ALPHA_ALCOR) +# define MAX_ISA_DMA_ADDRESS ALPHA_ALCOR_MAX_ISA_DMA_ADDRESS +# else +# define MAX_ISA_DMA_ADDRESS ALPHA_MAX_ISA_DMA_ADDRESS +# endif +#endif + +/* If we have the iommu, we don't have any address limitations on DMA. + Otherwise (Nautilus, RX164), we have to have 0-16 Mb DMA zone + like i386. */ +#define MAX_DMA_ADDRESS (alpha_mv.mv_pci_tbi ? \ + ~0UL : IDENT_ADDR + 0x01000000) + +/* 8237 DMA controllers */ +#define IO_DMA1_BASE 0x00 /* 8 bit slave DMA, channels 0..3 */ +#define IO_DMA2_BASE 0xC0 /* 16 bit master DMA, ch 4(=slave input)..7 */ + +/* DMA controller registers */ +#define DMA1_CMD_REG 0x08 /* command register (w) */ +#define DMA1_STAT_REG 0x08 /* status register (r) */ +#define DMA1_REQ_REG 0x09 /* request register (w) */ +#define DMA1_MASK_REG 0x0A /* single-channel mask (w) */ +#define DMA1_MODE_REG 0x0B /* mode register (w) */ +#define DMA1_CLEAR_FF_REG 0x0C /* clear pointer flip-flop (w) */ +#define DMA1_TEMP_REG 0x0D /* Temporary Register (r) */ +#define DMA1_RESET_REG 0x0D /* Master Clear (w) */ +#define DMA1_CLR_MASK_REG 0x0E /* Clear Mask */ +#define DMA1_MASK_ALL_REG 0x0F /* all-channels mask (w) */ +#define DMA1_EXT_MODE_REG (0x400 | DMA1_MODE_REG) + +#define DMA2_CMD_REG 0xD0 /* command register (w) */ +#define DMA2_STAT_REG 0xD0 /* status register (r) */ +#define DMA2_REQ_REG 0xD2 /* request register (w) */ +#define DMA2_MASK_REG 0xD4 /* single-channel mask (w) */ +#define DMA2_MODE_REG 0xD6 /* mode register (w) */ +#define DMA2_CLEAR_FF_REG 0xD8 /* clear pointer flip-flop (w) */ +#define DMA2_TEMP_REG 0xDA /* Temporary Register (r) */ +#define DMA2_RESET_REG 0xDA /* Master Clear (w) */ +#define DMA2_CLR_MASK_REG 0xDC /* Clear Mask */ +#define DMA2_MASK_ALL_REG 0xDE /* all-channels mask (w) */ +#define DMA2_EXT_MODE_REG (0x400 | DMA2_MODE_REG) + +#define DMA_ADDR_0 0x00 /* DMA address registers */ +#define DMA_ADDR_1 0x02 +#define DMA_ADDR_2 0x04 +#define DMA_ADDR_3 0x06 +#define DMA_ADDR_4 0xC0 +#define DMA_ADDR_5 0xC4 +#define DMA_ADDR_6 0xC8 +#define DMA_ADDR_7 0xCC + +#define DMA_CNT_0 0x01 /* DMA count registers */ +#define DMA_CNT_1 0x03 +#define DMA_CNT_2 0x05 +#define DMA_CNT_3 0x07 +#define DMA_CNT_4 0xC2 +#define DMA_CNT_5 0xC6 +#define DMA_CNT_6 0xCA +#define DMA_CNT_7 0xCE + +#define DMA_PAGE_0 0x87 /* DMA page registers */ +#define DMA_PAGE_1 0x83 +#define DMA_PAGE_2 0x81 +#define DMA_PAGE_3 0x82 +#define DMA_PAGE_5 0x8B +#define DMA_PAGE_6 0x89 +#define DMA_PAGE_7 0x8A + +#define DMA_HIPAGE_0 (0x400 | DMA_PAGE_0) +#define DMA_HIPAGE_1 (0x400 | DMA_PAGE_1) +#define DMA_HIPAGE_2 (0x400 | DMA_PAGE_2) +#define DMA_HIPAGE_3 (0x400 | DMA_PAGE_3) +#define DMA_HIPAGE_4 (0x400 | DMA_PAGE_4) +#define DMA_HIPAGE_5 (0x400 | DMA_PAGE_5) +#define DMA_HIPAGE_6 (0x400 | DMA_PAGE_6) +#define DMA_HIPAGE_7 (0x400 | DMA_PAGE_7) + +#define DMA_MODE_READ 0x44 /* I/O to memory, no autoinit, increment, single mode */ +#define DMA_MODE_WRITE 0x48 /* memory to I/O, no autoinit, increment, single mode */ +#define DMA_MODE_CASCADE 0xC0 /* pass thru DREQ->HRQ, DACK<-HLDA only */ + +#define DMA_AUTOINIT 0x10 + +extern spinlock_t dma_spin_lock; + +static __inline__ unsigned long claim_dma_lock(void) +{ + unsigned long flags; + spin_lock_irqsave(&dma_spin_lock, flags); + return flags; +} + +static __inline__ void release_dma_lock(unsigned long flags) +{ + spin_unlock_irqrestore(&dma_spin_lock, flags); +} + +/* enable/disable a specific DMA channel */ +static __inline__ void enable_dma(unsigned int dmanr) +{ + if (dmanr<=3) + dma_outb(dmanr, DMA1_MASK_REG); + else + dma_outb(dmanr & 3, DMA2_MASK_REG); +} + +static __inline__ void disable_dma(unsigned int dmanr) +{ + if (dmanr<=3) + dma_outb(dmanr | 4, DMA1_MASK_REG); + else + dma_outb((dmanr & 3) | 4, DMA2_MASK_REG); +} + +/* Clear the 'DMA Pointer Flip Flop'. + * Write 0 for LSB/MSB, 1 for MSB/LSB access. + * Use this once to initialize the FF to a known state. + * After that, keep track of it. :-) + * --- In order to do that, the DMA routines below should --- + * --- only be used while interrupts are disabled! --- + */ +static __inline__ void clear_dma_ff(unsigned int dmanr) +{ + if (dmanr<=3) + dma_outb(0, DMA1_CLEAR_FF_REG); + else + dma_outb(0, DMA2_CLEAR_FF_REG); +} + +/* set mode (above) for a specific DMA channel */ +static __inline__ void set_dma_mode(unsigned int dmanr, char mode) +{ + if (dmanr<=3) + dma_outb(mode | dmanr, DMA1_MODE_REG); + else + dma_outb(mode | (dmanr&3), DMA2_MODE_REG); +} + +/* set extended mode for a specific DMA channel */ +static __inline__ void set_dma_ext_mode(unsigned int dmanr, char ext_mode) +{ + if (dmanr<=3) + dma_outb(ext_mode | dmanr, DMA1_EXT_MODE_REG); + else + dma_outb(ext_mode | (dmanr&3), DMA2_EXT_MODE_REG); +} + +/* Set only the page register bits of the transfer address. + * This is used for successive transfers when we know the contents of + * the lower 16 bits of the DMA current address register. + */ +static __inline__ void set_dma_page(unsigned int dmanr, unsigned int pagenr) +{ + switch(dmanr) { + case 0: + dma_outb(pagenr, DMA_PAGE_0); + dma_outb((pagenr >> 8), DMA_HIPAGE_0); + break; + case 1: + dma_outb(pagenr, DMA_PAGE_1); + dma_outb((pagenr >> 8), DMA_HIPAGE_1); + break; + case 2: + dma_outb(pagenr, DMA_PAGE_2); + dma_outb((pagenr >> 8), DMA_HIPAGE_2); + break; + case 3: + dma_outb(pagenr, DMA_PAGE_3); + dma_outb((pagenr >> 8), DMA_HIPAGE_3); + break; + case 5: + dma_outb(pagenr & 0xfe, DMA_PAGE_5); + dma_outb((pagenr >> 8), DMA_HIPAGE_5); + break; + case 6: + dma_outb(pagenr & 0xfe, DMA_PAGE_6); + dma_outb((pagenr >> 8), DMA_HIPAGE_6); + break; + case 7: + dma_outb(pagenr & 0xfe, DMA_PAGE_7); + dma_outb((pagenr >> 8), DMA_HIPAGE_7); + break; + } +} + + +/* Set transfer address & page bits for specific DMA channel. + * Assumes dma flipflop is clear. + */ +static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int a) +{ + if (dmanr <= 3) { + dma_outb( a & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE ); + dma_outb( (a>>8) & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE ); + } else { + dma_outb( (a>>1) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE ); + dma_outb( (a>>9) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE ); + } + set_dma_page(dmanr, a>>16); /* set hipage last to enable 32-bit mode */ +} + + +/* Set transfer size (max 64k for DMA1..3, 128k for DMA5..7) for + * a specific DMA channel. + * You must ensure the parameters are valid. + * NOTE: from a manual: "the number of transfers is one more + * than the initial word count"! This is taken into account. + * Assumes dma flip-flop is clear. + * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7. + */ +static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count) +{ + count--; + if (dmanr <= 3) { + dma_outb( count & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE ); + dma_outb( (count>>8) & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE ); + } else { + dma_outb( (count>>1) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE ); + dma_outb( (count>>9) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE ); + } +} + + +/* Get DMA residue count. After a DMA transfer, this + * should return zero. Reading this while a DMA transfer is + * still in progress will return unpredictable results. + * If called before the channel has been used, it may return 1. + * Otherwise, it returns the number of _bytes_ left to transfer. + * + * Assumes DMA flip-flop is clear. + */ +static __inline__ int get_dma_residue(unsigned int dmanr) +{ + unsigned int io_port = (dmanr<=3)? ((dmanr&3)<<1) + 1 + IO_DMA1_BASE + : ((dmanr&3)<<2) + 2 + IO_DMA2_BASE; + + /* using short to get 16-bit wrap around */ + unsigned short count; + + count = 1 + dma_inb(io_port); + count += dma_inb(io_port) << 8; + + return (dmanr<=3)? count : (count<<1); +} + + +/* These are in kernel/dma.c: */ +extern int request_dma(unsigned int dmanr, const char * device_id); /* reserve a DMA channel */ +extern void free_dma(unsigned int dmanr); /* release it again */ +#define KERNEL_HAVE_CHECK_DMA +extern int check_dma(unsigned int dmanr); + +/* From PCI */ + +#ifdef CONFIG_PCI +extern int isa_dma_bridge_buggy; +#else +#define isa_dma_bridge_buggy (0) +#endif + + +#endif /* _ASM_DMA_H */ diff --git a/arch/alpha/include/asm/elf.h b/arch/alpha/include/asm/elf.h new file mode 100644 index 00000000..968d9991 --- /dev/null +++ b/arch/alpha/include/asm/elf.h @@ -0,0 +1,165 @@ +#ifndef __ASM_ALPHA_ELF_H +#define __ASM_ALPHA_ELF_H + +#include <asm/auxvec.h> +#include <asm/special_insns.h> + +/* Special values for the st_other field in the symbol table. */ + +#define STO_ALPHA_NOPV 0x80 +#define STO_ALPHA_STD_GPLOAD 0x88 + +/* + * Alpha ELF relocation types + */ +#define R_ALPHA_NONE 0 /* No reloc */ +#define R_ALPHA_REFLONG 1 /* Direct 32 bit */ +#define R_ALPHA_REFQUAD 2 /* Direct 64 bit */ +#define R_ALPHA_GPREL32 3 /* GP relative 32 bit */ +#define R_ALPHA_LITERAL 4 /* GP relative 16 bit w/optimization */ +#define R_ALPHA_LITUSE 5 /* Optimization hint for LITERAL */ +#define R_ALPHA_GPDISP 6 /* Add displacement to GP */ +#define R_ALPHA_BRADDR 7 /* PC+4 relative 23 bit shifted */ +#define R_ALPHA_HINT 8 /* PC+4 relative 16 bit shifted */ +#define R_ALPHA_SREL16 9 /* PC relative 16 bit */ +#define R_ALPHA_SREL32 10 /* PC relative 32 bit */ +#define R_ALPHA_SREL64 11 /* PC relative 64 bit */ +#define R_ALPHA_GPRELHIGH 17 /* GP relative 32 bit, high 16 bits */ +#define R_ALPHA_GPRELLOW 18 /* GP relative 32 bit, low 16 bits */ +#define R_ALPHA_GPREL16 19 /* GP relative 16 bit */ +#define R_ALPHA_COPY 24 /* Copy symbol at runtime */ +#define R_ALPHA_GLOB_DAT 25 /* Create GOT entry */ +#define R_ALPHA_JMP_SLOT 26 /* Create PLT entry */ +#define R_ALPHA_RELATIVE 27 /* Adjust by program base */ +#define R_ALPHA_BRSGP 28 +#define R_ALPHA_TLSGD 29 +#define R_ALPHA_TLS_LDM 30 +#define R_ALPHA_DTPMOD64 31 +#define R_ALPHA_GOTDTPREL 32 +#define R_ALPHA_DTPREL64 33 +#define R_ALPHA_DTPRELHI 34 +#define R_ALPHA_DTPRELLO 35 +#define R_ALPHA_DTPREL16 36 +#define R_ALPHA_GOTTPREL 37 +#define R_ALPHA_TPREL64 38 +#define R_ALPHA_TPRELHI 39 +#define R_ALPHA_TPRELLO 40 +#define R_ALPHA_TPREL16 41 + +#define SHF_ALPHA_GPREL 0x10000000 + +/* Legal values for e_flags field of Elf64_Ehdr. */ + +#define EF_ALPHA_32BIT 1 /* All addresses are below 2GB */ + +/* + * ELF register definitions.. + */ + +/* + * The OSF/1 version of <sys/procfs.h> makes gregset_t 46 entries long. + * I have no idea why that is so. For now, we just leave it at 33 + * (32 general regs + processor status word). + */ +#define ELF_NGREG 33 +#define ELF_NFPREG 32 + +typedef unsigned long elf_greg_t; +typedef elf_greg_t elf_gregset_t[ELF_NGREG]; + +typedef double elf_fpreg_t; +typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; + +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch(x) ((x)->e_machine == EM_ALPHA) + +/* + * These are used to set parameters in the core dumps. + */ +#define ELF_CLASS ELFCLASS64 +#define ELF_DATA ELFDATA2LSB +#define ELF_ARCH EM_ALPHA + +#define ELF_EXEC_PAGESIZE 8192 + +/* This is the location that an ET_DYN program is loaded if exec'ed. Typical + use of this is to invoke "./ld.so someprog" to test out a new version of + the loader. We need to make sure that it is out of the way of the program + that it will "exec", and that there is sufficient room for the brk. */ + +#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000) + +/* $0 is set by ld.so to a pointer to a function which might be + registered using atexit. This provides a mean for the dynamic + linker to call DT_FINI functions for shared libraries that have + been loaded before the code runs. + + So that we can use the same startup file with static executables, + we start programs with a value of 0 to indicate that there is no + such function. */ + +#define ELF_PLAT_INIT(_r, load_addr) _r->r0 = 0 + +/* The registers are laid out in pt_regs for PAL and syscall + convenience. Re-order them for the linear elf_gregset_t. */ + +struct pt_regs; +struct thread_info; +struct task_struct; +extern void dump_elf_thread(elf_greg_t *dest, struct pt_regs *pt, + struct thread_info *ti); +#define ELF_CORE_COPY_REGS(DEST, REGS) \ + dump_elf_thread(DEST, REGS, current_thread_info()); + +/* Similar, but for a thread other than current. */ + +extern int dump_elf_task(elf_greg_t *dest, struct task_struct *task); +#define ELF_CORE_COPY_TASK_REGS(TASK, DEST) \ + dump_elf_task(*(DEST), TASK) + +/* Similar, but for the FP registers. */ + +extern int dump_elf_task_fp(elf_fpreg_t *dest, struct task_struct *task); +#define ELF_CORE_COPY_FPREGS(TASK, DEST) \ + dump_elf_task_fp(*(DEST), TASK) + +/* This yields a mask that user programs can use to figure out what + instruction set this CPU supports. This is trivial on Alpha, + but not so on other machines. */ + +#define ELF_HWCAP (~amask(-1)) + +/* This yields a string that ld.so will use to load implementation + specific libraries for optimization. This is more specific in + intent than poking at uname or /proc/cpuinfo. */ + +#define ELF_PLATFORM \ +({ \ + enum implver_enum i_ = implver(); \ + ( i_ == IMPLVER_EV4 ? "ev4" \ + : i_ == IMPLVER_EV5 \ + ? (amask(AMASK_BWX) ? "ev5" : "ev56") \ + : amask (AMASK_CIX) ? "ev6" : "ev67"); \ +}) + +#define SET_PERSONALITY(EX) \ + set_personality(((EX).e_flags & EF_ALPHA_32BIT) \ + ? PER_LINUX_32BIT : PER_LINUX) + +extern int alpha_l1i_cacheshape; +extern int alpha_l1d_cacheshape; +extern int alpha_l2_cacheshape; +extern int alpha_l3_cacheshape; + +/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ +#define ARCH_DLINFO \ + do { \ + NEW_AUX_ENT(AT_L1I_CACHESHAPE, alpha_l1i_cacheshape); \ + NEW_AUX_ENT(AT_L1D_CACHESHAPE, alpha_l1d_cacheshape); \ + NEW_AUX_ENT(AT_L2_CACHESHAPE, alpha_l2_cacheshape); \ + NEW_AUX_ENT(AT_L3_CACHESHAPE, alpha_l3_cacheshape); \ + } while (0) + +#endif /* __ASM_ALPHA_ELF_H */ diff --git a/arch/alpha/include/asm/emergency-restart.h b/arch/alpha/include/asm/emergency-restart.h new file mode 100644 index 00000000..108d8c48 --- /dev/null +++ b/arch/alpha/include/asm/emergency-restart.h @@ -0,0 +1,6 @@ +#ifndef _ASM_EMERGENCY_RESTART_H +#define _ASM_EMERGENCY_RESTART_H + +#include <asm-generic/emergency-restart.h> + +#endif /* _ASM_EMERGENCY_RESTART_H */ diff --git a/arch/alpha/include/asm/err_common.h b/arch/alpha/include/asm/err_common.h new file mode 100644 index 00000000..c2509594 --- /dev/null +++ b/arch/alpha/include/asm/err_common.h @@ -0,0 +1,118 @@ +/* + * linux/include/asm-alpha/err_common.h + * + * Copyright (C) 2000 Jeff Wiedemeier (Compaq Computer Corporation) + * + * Contains declarations and macros to support Alpha error handling + * implementations. + */ + +#ifndef __ALPHA_ERR_COMMON_H +#define __ALPHA_ERR_COMMON_H 1 + +/* + * SCB Vector definitions + */ +#define SCB_Q_SYSERR 0x620 +#define SCB_Q_PROCERR 0x630 +#define SCB_Q_SYSMCHK 0x660 +#define SCB_Q_PROCMCHK 0x670 +#define SCB_Q_SYSEVENT 0x680 + +/* + * Disposition definitions for logout frame parser + */ +#define MCHK_DISPOSITION_UNKNOWN_ERROR 0x00 +#define MCHK_DISPOSITION_REPORT 0x01 +#define MCHK_DISPOSITION_DISMISS 0x02 + +/* + * Error Log definitions + */ +/* + * Types + */ + +#define EL_CLASS__TERMINATION (0) +# define EL_TYPE__TERMINATION__TERMINATION (0) +#define EL_CLASS__HEADER (5) +# define EL_TYPE__HEADER__SYSTEM_ERROR_FRAME (1) +# define EL_TYPE__HEADER__SYSTEM_EVENT_FRAME (2) +# define EL_TYPE__HEADER__HALT_FRAME (3) +# define EL_TYPE__HEADER__LOGOUT_FRAME (19) +#define EL_CLASS__GENERAL_NOTIFICATION (9) +#define EL_CLASS__PCI_ERROR_FRAME (11) +#define EL_CLASS__REGATTA_FAMILY (12) +# define EL_TYPE__REGATTA__PROCESSOR_ERROR_FRAME (1) +# define EL_TYPE__REGATTA__SYSTEM_ERROR_FRAME (2) +# define EL_TYPE__REGATTA__ENVIRONMENTAL_FRAME (3) +# define EL_TYPE__REGATTA__TITAN_PCHIP0_EXTENDED (8) +# define EL_TYPE__REGATTA__TITAN_PCHIP1_EXTENDED (9) +# define EL_TYPE__REGATTA__TITAN_MEMORY_EXTENDED (10) +# define EL_TYPE__REGATTA__PROCESSOR_DBL_ERROR_HALT (11) +# define EL_TYPE__REGATTA__SYSTEM_DBL_ERROR_HALT (12) +#define EL_CLASS__PAL (14) +# define EL_TYPE__PAL__LOGOUT_FRAME (1) +# define EL_TYPE__PAL__EV7_PROCESSOR (4) +# define EL_TYPE__PAL__EV7_ZBOX (5) +# define EL_TYPE__PAL__EV7_RBOX (6) +# define EL_TYPE__PAL__EV7_IO (7) +# define EL_TYPE__PAL__ENV__AMBIENT_TEMPERATURE (10) +# define EL_TYPE__PAL__ENV__AIRMOVER_FAN (11) +# define EL_TYPE__PAL__ENV__VOLTAGE (12) +# define EL_TYPE__PAL__ENV__INTRUSION (13) +# define EL_TYPE__PAL__ENV__POWER_SUPPLY (14) +# define EL_TYPE__PAL__ENV__LAN (15) +# define EL_TYPE__PAL__ENV__HOT_PLUG (16) + +union el_timestamp { + struct { + u8 second; + u8 minute; + u8 hour; + u8 day; + u8 month; + u8 year; + } b; + u64 as_int; +}; + +struct el_subpacket { + u16 length; /* length of header (in bytes) */ + u16 class; /* header class and type... */ + u16 type; /* ...determine content */ + u16 revision; /* header revision */ + union { + struct { /* Class 5, Type 1 - System Error */ + u32 frame_length; + u32 frame_packet_count; + } sys_err; + struct { /* Class 5, Type 2 - System Event */ + union el_timestamp timestamp; + u32 frame_length; + u32 frame_packet_count; + } sys_event; + struct { /* Class 5, Type 3 - Double Error Halt */ + u16 halt_code; + u16 reserved; + union el_timestamp timestamp; + u32 frame_length; + u32 frame_packet_count; + } err_halt; + struct { /* Clasee 5, Type 19 - Logout Frame Header */ + u32 frame_length; + u32 frame_flags; + u32 cpu_offset; + u32 system_offset; + } logout_header; + struct { /* Class 12 - Regatta */ + u64 cpuid; + u64 data_start[1]; + } regatta_frame; + struct { /* Raw */ + u64 data_start[1]; + } raw; + } by_type; +}; + +#endif /* __ALPHA_ERR_COMMON_H */ diff --git a/arch/alpha/include/asm/err_ev6.h b/arch/alpha/include/asm/err_ev6.h new file mode 100644 index 00000000..ea637791 --- /dev/null +++ b/arch/alpha/include/asm/err_ev6.h @@ -0,0 +1,6 @@ +#ifndef __ALPHA_ERR_EV6_H +#define __ALPHA_ERR_EV6_H 1 + +/* Dummy include for now. */ + +#endif /* __ALPHA_ERR_EV6_H */ diff --git a/arch/alpha/include/asm/err_ev7.h b/arch/alpha/include/asm/err_ev7.h new file mode 100644 index 00000000..87f99777 --- /dev/null +++ b/arch/alpha/include/asm/err_ev7.h @@ -0,0 +1,202 @@ +#ifndef __ALPHA_ERR_EV7_H +#define __ALPHA_ERR_EV7_H 1 + +/* + * Data for el packet class PAL (14), type LOGOUT_FRAME (1) + */ +struct ev7_pal_logout_subpacket { + u32 mchk_code; + u32 subpacket_count; + u64 whami; + u64 rbox_whami; + u64 rbox_int; + u64 exc_addr; + union el_timestamp timestamp; + u64 halt_code; + u64 reserved; +}; + +/* + * Data for el packet class PAL (14), type EV7_PROCESSOR (4) + */ +struct ev7_pal_processor_subpacket { + u64 i_stat; + u64 dc_stat; + u64 c_addr; + u64 c_syndrome_1; + u64 c_syndrome_0; + u64 c_stat; + u64 c_sts; + u64 mm_stat; + u64 exc_addr; + u64 ier_cm; + u64 isum; + u64 pal_base; + u64 i_ctl; + u64 process_context; + u64 cbox_ctl; + u64 cbox_stp_ctl; + u64 cbox_acc_ctl; + u64 cbox_lcl_set; + u64 cbox_gbl_set; + u64 bbox_ctl; + u64 bbox_err_sts; + u64 bbox_err_idx; + u64 cbox_ddp_err_sts; + u64 bbox_dat_rmp; + u64 reserved[2]; +}; + +/* + * Data for el packet class PAL (14), type EV7_ZBOX (5) + */ +struct ev7_pal_zbox_subpacket { + u32 zbox0_dram_err_status_1; + u32 zbox0_dram_err_status_2; + u32 zbox0_dram_err_status_3; + u32 zbox0_dram_err_ctl; + u32 zbox0_dram_err_adr; + u32 zbox0_dift_timeout; + u32 zbox0_dram_mapper_ctl; + u32 zbox0_frc_err_adr; + u32 zbox0_dift_err_status; + u32 reserved1; + u32 zbox1_dram_err_status_1; + u32 zbox1_dram_err_status_2; + u32 zbox1_dram_err_status_3; + u32 zbox1_dram_err_ctl; + u32 zbox1_dram_err_adr; + u32 zbox1_dift_timeout; + u32 zbox1_dram_mapper_ctl; + u32 zbox1_frc_err_adr; + u32 zbox1_dift_err_status; + u32 reserved2; + u64 cbox_ctl; + u64 cbox_stp_ctl; + u64 zbox0_error_pa; + u64 zbox1_error_pa; + u64 zbox0_ored_syndrome; + u64 zbox1_ored_syndrome; + u64 reserved3[2]; +}; + +/* + * Data for el packet class PAL (14), type EV7_RBOX (6) + */ +struct ev7_pal_rbox_subpacket { + u64 rbox_cfg; + u64 rbox_n_cfg; + u64 rbox_s_cfg; + u64 rbox_e_cfg; + u64 rbox_w_cfg; + u64 rbox_n_err; + u64 rbox_s_err; + u64 rbox_e_err; + u64 rbox_w_err; + u64 rbox_io_cfg; + u64 rbox_io_err; + u64 rbox_l_err; + u64 rbox_whoami; + u64 rbox_imask; + u64 rbox_intq; + u64 rbox_int; + u64 reserved[2]; +}; + +/* + * Data for el packet class PAL (14), type EV7_IO (7) + */ +struct ev7_pal_io_one_port { + u64 pox_err_sum; + u64 pox_tlb_err; + u64 pox_spl_cmplt; + u64 pox_trans_sum; + u64 pox_first_err; + u64 pox_mult_err; + u64 pox_dm_source; + u64 pox_dm_dest; + u64 pox_dm_size; + u64 pox_dm_ctrl; + u64 reserved; +}; + +struct ev7_pal_io_subpacket { + u64 io_asic_rev; + u64 io_sys_rev; + u64 io7_uph; + u64 hpi_ctl; + u64 crd_ctl; + u64 hei_ctl; + u64 po7_error_sum; + u64 po7_uncrr_sym; + u64 po7_crrct_sym; + u64 po7_ugbge_sym; + u64 po7_err_pkt0; + u64 po7_err_pkt1; + u64 reserved[2]; + struct ev7_pal_io_one_port ports[4]; +}; + +/* + * Environmental subpacket. Data used for el packets: + * class PAL (14), type AMBIENT_TEMPERATURE (10) + * class PAL (14), type AIRMOVER_FAN (11) + * class PAL (14), type VOLTAGE (12) + * class PAL (14), type INTRUSION (13) + * class PAL (14), type POWER_SUPPLY (14) + * class PAL (14), type LAN (15) + * class PAL (14), type HOT_PLUG (16) + */ +struct ev7_pal_environmental_subpacket { + u16 cabinet; + u16 drawer; + u16 reserved1[2]; + u8 module_type; + u8 unit_id; /* unit reporting condition */ + u8 reserved2; + u8 condition; /* condition reported */ +}; + +/* + * Convert environmental type to index + */ +static inline int ev7_lf_env_index(int type) +{ + BUG_ON((type < EL_TYPE__PAL__ENV__AMBIENT_TEMPERATURE) + || (type > EL_TYPE__PAL__ENV__HOT_PLUG)); + + return type - EL_TYPE__PAL__ENV__AMBIENT_TEMPERATURE; +} + +/* + * Data for generic el packet class PAL. + */ +struct ev7_pal_subpacket { + union { + struct ev7_pal_logout_subpacket logout; /* Type 1 */ + struct ev7_pal_processor_subpacket ev7; /* Type 4 */ + struct ev7_pal_zbox_subpacket zbox; /* Type 5 */ + struct ev7_pal_rbox_subpacket rbox; /* Type 6 */ + struct ev7_pal_io_subpacket io; /* Type 7 */ + struct ev7_pal_environmental_subpacket env; /* Type 10-16 */ + u64 as_quad[1]; /* Raw u64 */ + } by_type; +}; + +/* + * Struct to contain collected logout from subpackets. + */ +struct ev7_lf_subpackets { + struct ev7_pal_logout_subpacket *logout; /* Type 1 */ + struct ev7_pal_processor_subpacket *ev7; /* Type 4 */ + struct ev7_pal_zbox_subpacket *zbox; /* Type 5 */ + struct ev7_pal_rbox_subpacket *rbox; /* Type 6 */ + struct ev7_pal_io_subpacket *io; /* Type 7 */ + struct ev7_pal_environmental_subpacket *env[7]; /* Type 10-16 */ + + unsigned int io_pid; +}; + +#endif /* __ALPHA_ERR_EV7_H */ + + diff --git a/arch/alpha/include/asm/errno.h b/arch/alpha/include/asm/errno.h new file mode 100644 index 00000000..e5f29ca2 --- /dev/null +++ b/arch/alpha/include/asm/errno.h @@ -0,0 +1,127 @@ +#ifndef _ALPHA_ERRNO_H +#define _ALPHA_ERRNO_H + +#include <asm-generic/errno-base.h> + +#undef EAGAIN /* 11 in errno-base.h */ + +#define EDEADLK 11 /* Resource deadlock would occur */ + +#define EAGAIN 35 /* Try again */ +#define EWOULDBLOCK EAGAIN /* Operation would block */ +#define EINPROGRESS 36 /* Operation now in progress */ +#define EALREADY 37 /* Operation already in progress */ +#define ENOTSOCK 38 /* Socket operation on non-socket */ +#define EDESTADDRREQ 39 /* Destination address required */ +#define EMSGSIZE 40 /* Message too long */ +#define EPROTOTYPE 41 /* Protocol wrong type for socket */ +#define ENOPROTOOPT 42 /* Protocol not available */ +#define EPROTONOSUPPORT 43 /* Protocol not supported */ +#define ESOCKTNOSUPPORT 44 /* Socket type not supported */ +#define EOPNOTSUPP 45 /* Operation not supported on transport endpoint */ +#define EPFNOSUPPORT 46 /* Protocol family not supported */ +#define EAFNOSUPPORT 47 /* Address family not supported by protocol */ +#define EADDRINUSE 48 /* Address already in use */ +#define EADDRNOTAVAIL 49 /* Cannot assign requested address */ +#define ENETDOWN 50 /* Network is down */ +#define ENETUNREACH 51 /* Network is unreachable */ +#define ENETRESET 52 /* Network dropped connection because of reset */ +#define ECONNABORTED 53 /* Software caused connection abort */ +#define ECONNRESET 54 /* Connection reset by peer */ +#define ENOBUFS 55 /* No buffer space available */ +#define EISCONN 56 /* Transport endpoint is already connected */ +#define ENOTCONN 57 /* Transport endpoint is not connected */ +#define ESHUTDOWN 58 /* Cannot send after transport endpoint shutdown */ +#define ETOOMANYREFS 59 /* Too many references: cannot splice */ +#define ETIMEDOUT 60 /* Connection timed out */ +#define ECONNREFUSED 61 /* Connection refused */ +#define ELOOP 62 /* Too many symbolic links encountered */ +#define ENAMETOOLONG 63 /* File name too long */ +#define EHOSTDOWN 64 /* Host is down */ +#define EHOSTUNREACH 65 /* No route to host */ +#define ENOTEMPTY 66 /* Directory not empty */ + +#define EUSERS 68 /* Too many users */ +#define EDQUOT 69 /* Quota exceeded */ +#define ESTALE 70 /* Stale NFS file handle */ +#define EREMOTE 71 /* Object is remote */ + +#define ENOLCK 77 /* No record locks available */ +#define ENOSYS 78 /* Function not implemented */ + +#define ENOMSG 80 /* No message of desired type */ +#define EIDRM 81 /* Identifier removed */ +#define ENOSR 82 /* Out of streams resources */ +#define ETIME 83 /* Timer expired */ +#define EBADMSG 84 /* Not a data message */ +#define EPROTO 85 /* Protocol error */ +#define ENODATA 86 /* No data available */ +#define ENOSTR 87 /* Device not a stream */ + +#define ENOPKG 92 /* Package not installed */ + +#define EILSEQ 116 /* Illegal byte sequence */ + +/* The following are just random noise.. */ +#define ECHRNG 88 /* Channel number out of range */ +#define EL2NSYNC 89 /* Level 2 not synchronized */ +#define EL3HLT 90 /* Level 3 halted */ +#define EL3RST 91 /* Level 3 reset */ + +#define ELNRNG 93 /* Link number out of range */ +#define EUNATCH 94 /* Protocol driver not attached */ +#define ENOCSI 95 /* No CSI structure available */ +#define EL2HLT 96 /* Level 2 halted */ +#define EBADE 97 /* Invalid exchange */ +#define EBADR 98 /* Invalid request descriptor */ +#define EXFULL 99 /* Exchange full */ +#define ENOANO 100 /* No anode */ +#define EBADRQC 101 /* Invalid request code */ +#define EBADSLT 102 /* Invalid slot */ + +#define EDEADLOCK EDEADLK + +#define EBFONT 104 /* Bad font file format */ +#define ENONET 105 /* Machine is not on the network */ +#define ENOLINK 106 /* Link has been severed */ +#define EADV 107 /* Advertise error */ +#define ESRMNT 108 /* Srmount error */ +#define ECOMM 109 /* Communication error on send */ +#define EMULTIHOP 110 /* Multihop attempted */ +#define EDOTDOT 111 /* RFS specific error */ +#define EOVERFLOW 112 /* Value too large for defined data type */ +#define ENOTUNIQ 113 /* Name not unique on network */ +#define EBADFD 114 /* File descriptor in bad state */ +#define EREMCHG 115 /* Remote address changed */ + +#define EUCLEAN 117 /* Structure needs cleaning */ +#define ENOTNAM 118 /* Not a XENIX named type file */ +#define ENAVAIL 119 /* No XENIX semaphores available */ +#define EISNAM 120 /* Is a named type file */ +#define EREMOTEIO 121 /* Remote I/O error */ + +#define ELIBACC 122 /* Can not access a needed shared library */ +#define ELIBBAD 123 /* Accessing a corrupted shared library */ +#define ELIBSCN 124 /* .lib section in a.out corrupted */ +#define ELIBMAX 125 /* Attempting to link in too many shared libraries */ +#define ELIBEXEC 126 /* Cannot exec a shared library directly */ +#define ERESTART 127 /* Interrupted system call should be restarted */ +#define ESTRPIPE 128 /* Streams pipe error */ + +#define ENOMEDIUM 129 /* No medium found */ +#define EMEDIUMTYPE 130 /* Wrong medium type */ +#define ECANCELED 131 /* Operation Cancelled */ +#define ENOKEY 132 /* Required key not available */ +#define EKEYEXPIRED 133 /* Key has expired */ +#define EKEYREVOKED 134 /* Key has been revoked */ +#define EKEYREJECTED 135 /* Key was rejected by service */ + +/* for robust mutexes */ +#define EOWNERDEAD 136 /* Owner died */ +#define ENOTRECOVERABLE 137 /* State not recoverable */ + +#define ERFKILL 138 /* Operation not possible due to RF-kill */ + +#define EHWPOISON 139 /* Memory page has hardware error */ + +#endif diff --git a/arch/alpha/include/asm/exec.h b/arch/alpha/include/asm/exec.h new file mode 100644 index 00000000..4a5a41f3 --- /dev/null +++ b/arch/alpha/include/asm/exec.h @@ -0,0 +1,6 @@ +#ifndef __ALPHA_EXEC_H +#define __ALPHA_EXEC_H + +#define arch_align_stack(x) (x) + +#endif /* __ALPHA_EXEC_H */ diff --git a/arch/alpha/include/asm/fb.h b/arch/alpha/include/asm/fb.h new file mode 100644 index 00000000..fa9bbb96 --- /dev/null +++ b/arch/alpha/include/asm/fb.h @@ -0,0 +1,13 @@ +#ifndef _ASM_FB_H_ +#define _ASM_FB_H_ +#include <linux/device.h> + +/* Caching is off in the I/O space quadrant by design. */ +#define fb_pgprotect(...) do {} while (0) + +static inline int fb_is_primary_device(struct fb_info *info) +{ + return 0; +} + +#endif /* _ASM_FB_H_ */ diff --git a/arch/alpha/include/asm/fcntl.h b/arch/alpha/include/asm/fcntl.h new file mode 100644 index 00000000..6d9e805f --- /dev/null +++ b/arch/alpha/include/asm/fcntl.h @@ -0,0 +1,56 @@ +#ifndef _ALPHA_FCNTL_H +#define _ALPHA_FCNTL_H + +#define O_CREAT 01000 /* not fcntl */ +#define O_TRUNC 02000 /* not fcntl */ +#define O_EXCL 04000 /* not fcntl */ +#define O_NOCTTY 010000 /* not fcntl */ + +#define O_NONBLOCK 00004 +#define O_APPEND 00010 +#define O_DSYNC 040000 /* used to be O_SYNC, see below */ +#define O_DIRECTORY 0100000 /* must be a directory */ +#define O_NOFOLLOW 0200000 /* don't follow links */ +#define O_LARGEFILE 0400000 /* will be set by the kernel on every open */ +#define O_DIRECT 02000000 /* direct disk access - should check with OSF/1 */ +#define O_NOATIME 04000000 +#define O_CLOEXEC 010000000 /* set close_on_exec */ +/* + * Before Linux 2.6.33 only O_DSYNC semantics were implemented, but using + * the O_SYNC flag. We continue to use the existing numerical value + * for O_DSYNC semantics now, but using the correct symbolic name for it. + * This new value is used to request true Posix O_SYNC semantics. It is + * defined in this strange way to make sure applications compiled against + * new headers get at least O_DSYNC semantics on older kernels. + * + * This has the nice side-effect that we can simply test for O_DSYNC + * wherever we do not care if O_DSYNC or O_SYNC is used. + * + * Note: __O_SYNC must never be used directly. + */ +#define __O_SYNC 020000000 +#define O_SYNC (__O_SYNC|O_DSYNC) + +#define O_PATH 040000000 + +#define F_GETLK 7 +#define F_SETLK 8 +#define F_SETLKW 9 + +#define F_SETOWN 5 /* for sockets. */ +#define F_GETOWN 6 /* for sockets. */ +#define F_SETSIG 10 /* for sockets. */ +#define F_GETSIG 11 /* for sockets. */ + +/* for posix fcntl() and lockf() */ +#define F_RDLCK 1 +#define F_WRLCK 2 +#define F_UNLCK 8 + +/* for old implementation of bsd flock () */ +#define F_EXLCK 16 /* or 3 */ +#define F_SHLCK 32 /* or 4 */ + +#include <asm-generic/fcntl.h> + +#endif diff --git a/arch/alpha/include/asm/floppy.h b/arch/alpha/include/asm/floppy.h new file mode 100644 index 00000000..46cefbd5 --- /dev/null +++ b/arch/alpha/include/asm/floppy.h @@ -0,0 +1,115 @@ +/* + * Architecture specific parts of the Floppy driver + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1995 + */ +#ifndef __ASM_ALPHA_FLOPPY_H +#define __ASM_ALPHA_FLOPPY_H + + +#define fd_inb(port) inb_p(port) +#define fd_outb(value,port) outb_p(value,port) + +#define fd_enable_dma() enable_dma(FLOPPY_DMA) +#define fd_disable_dma() disable_dma(FLOPPY_DMA) +#define fd_request_dma() request_dma(FLOPPY_DMA,"floppy") +#define fd_free_dma() free_dma(FLOPPY_DMA) +#define fd_clear_dma_ff() clear_dma_ff(FLOPPY_DMA) +#define fd_set_dma_mode(mode) set_dma_mode(FLOPPY_DMA,mode) +#define fd_set_dma_addr(addr) set_dma_addr(FLOPPY_DMA,virt_to_bus(addr)) +#define fd_set_dma_count(count) set_dma_count(FLOPPY_DMA,count) +#define fd_enable_irq() enable_irq(FLOPPY_IRQ) +#define fd_disable_irq() disable_irq(FLOPPY_IRQ) +#define fd_cacheflush(addr,size) /* nothing */ +#define fd_request_irq() request_irq(FLOPPY_IRQ, floppy_interrupt,\ + IRQF_DISABLED, "floppy", NULL) +#define fd_free_irq() free_irq(FLOPPY_IRQ, NULL) + +#ifdef CONFIG_PCI + +#include <linux/pci.h> + +#define fd_dma_setup(addr,size,mode,io) alpha_fd_dma_setup(addr,size,mode,io) + +static __inline__ int +alpha_fd_dma_setup(char *addr, unsigned long size, int mode, int io) +{ + static unsigned long prev_size; + static dma_addr_t bus_addr = 0; + static char *prev_addr; + static int prev_dir; + int dir; + + dir = (mode != DMA_MODE_READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE; + + if (bus_addr + && (addr != prev_addr || size != prev_size || dir != prev_dir)) { + /* different from last time -- unmap prev */ + pci_unmap_single(isa_bridge, bus_addr, prev_size, prev_dir); + bus_addr = 0; + } + + if (!bus_addr) /* need to map it */ + bus_addr = pci_map_single(isa_bridge, addr, size, dir); + + /* remember this one as prev */ + prev_addr = addr; + prev_size = size; + prev_dir = dir; + + fd_clear_dma_ff(); + fd_cacheflush(addr, size); + fd_set_dma_mode(mode); + set_dma_addr(FLOPPY_DMA, bus_addr); + fd_set_dma_count(size); + virtual_dma_port = io; + fd_enable_dma(); + + return 0; +} + +#endif /* CONFIG_PCI */ + +__inline__ void virtual_dma_init(void) +{ + /* Nothing to do on an Alpha */ +} + +static int FDC1 = 0x3f0; +static int FDC2 = -1; + +/* + * Again, the CMOS information doesn't work on the alpha.. + */ +#define FLOPPY0_TYPE 6 +#define FLOPPY1_TYPE 0 + +#define N_FDC 2 +#define N_DRIVE 8 + +/* + * Most Alphas have no problems with floppy DMA crossing 64k borders, + * except for certain ones, like XL and RUFFIAN. + * + * However, the test is simple and fast, and this *is* floppy, after all, + * so we do it for all platforms, just to make sure. + * + * This is advantageous in other circumstances as well, as in moving + * about the PCI DMA windows and forcing the floppy to start doing + * scatter-gather when it never had before, and there *is* a problem + * on that platform... ;-} + */ + +static inline unsigned long CROSS_64KB(void *a, unsigned long s) +{ + unsigned long p = (unsigned long)a; + return ((p + s - 1) ^ p) & ~0xffffUL; +} + +#define EXTRA_FLOPPY_PARAMS + +#endif /* __ASM_ALPHA_FLOPPY_H */ diff --git a/arch/alpha/include/asm/fpu.h b/arch/alpha/include/asm/fpu.h new file mode 100644 index 00000000..db00f788 --- /dev/null +++ b/arch/alpha/include/asm/fpu.h @@ -0,0 +1,195 @@ +#ifndef __ASM_ALPHA_FPU_H +#define __ASM_ALPHA_FPU_H + +#include <asm/special_insns.h> + +/* + * Alpha floating-point control register defines: + */ +#define FPCR_DNOD (1UL<<47) /* denorm INV trap disable */ +#define FPCR_DNZ (1UL<<48) /* denorms to zero */ +#define FPCR_INVD (1UL<<49) /* invalid op disable (opt.) */ +#define FPCR_DZED (1UL<<50) /* division by zero disable (opt.) */ +#define FPCR_OVFD (1UL<<51) /* overflow disable (optional) */ +#define FPCR_INV (1UL<<52) /* invalid operation */ +#define FPCR_DZE (1UL<<53) /* division by zero */ +#define FPCR_OVF (1UL<<54) /* overflow */ +#define FPCR_UNF (1UL<<55) /* underflow */ +#define FPCR_INE (1UL<<56) /* inexact */ +#define FPCR_IOV (1UL<<57) /* integer overflow */ +#define FPCR_UNDZ (1UL<<60) /* underflow to zero (opt.) */ +#define FPCR_UNFD (1UL<<61) /* underflow disable (opt.) */ +#define FPCR_INED (1UL<<62) /* inexact disable (opt.) */ +#define FPCR_SUM (1UL<<63) /* summary bit */ + +#define FPCR_DYN_SHIFT 58 /* first dynamic rounding mode bit */ +#define FPCR_DYN_CHOPPED (0x0UL << FPCR_DYN_SHIFT) /* towards 0 */ +#define FPCR_DYN_MINUS (0x1UL << FPCR_DYN_SHIFT) /* towards -INF */ +#define FPCR_DYN_NORMAL (0x2UL << FPCR_DYN_SHIFT) /* towards nearest */ +#define FPCR_DYN_PLUS (0x3UL << FPCR_DYN_SHIFT) /* towards +INF */ +#define FPCR_DYN_MASK (0x3UL << FPCR_DYN_SHIFT) + +#define FPCR_MASK 0xffff800000000000L + +/* + * IEEE trap enables are implemented in software. These per-thread + * bits are stored in the "ieee_state" field of "struct thread_info". + * Thus, the bits are defined so as not to conflict with the + * floating-point enable bit (which is architected). On top of that, + * we want to make these bits compatible with OSF/1 so + * ieee_set_fp_control() etc. can be implemented easily and + * compatibly. The corresponding definitions are in + * /usr/include/machine/fpu.h under OSF/1. + */ +#define IEEE_TRAP_ENABLE_INV (1UL<<1) /* invalid op */ +#define IEEE_TRAP_ENABLE_DZE (1UL<<2) /* division by zero */ +#define IEEE_TRAP_ENABLE_OVF (1UL<<3) /* overflow */ +#define IEEE_TRAP_ENABLE_UNF (1UL<<4) /* underflow */ +#define IEEE_TRAP_ENABLE_INE (1UL<<5) /* inexact */ +#define IEEE_TRAP_ENABLE_DNO (1UL<<6) /* denorm */ +#define IEEE_TRAP_ENABLE_MASK (IEEE_TRAP_ENABLE_INV | IEEE_TRAP_ENABLE_DZE |\ + IEEE_TRAP_ENABLE_OVF | IEEE_TRAP_ENABLE_UNF |\ + IEEE_TRAP_ENABLE_INE | IEEE_TRAP_ENABLE_DNO) + +/* Denorm and Underflow flushing */ +#define IEEE_MAP_DMZ (1UL<<12) /* Map denorm inputs to zero */ +#define IEEE_MAP_UMZ (1UL<<13) /* Map underflowed outputs to zero */ + +#define IEEE_MAP_MASK (IEEE_MAP_DMZ | IEEE_MAP_UMZ) + +/* status bits coming from fpcr: */ +#define IEEE_STATUS_INV (1UL<<17) +#define IEEE_STATUS_DZE (1UL<<18) +#define IEEE_STATUS_OVF (1UL<<19) +#define IEEE_STATUS_UNF (1UL<<20) +#define IEEE_STATUS_INE (1UL<<21) +#define IEEE_STATUS_DNO (1UL<<22) + +#define IEEE_STATUS_MASK (IEEE_STATUS_INV | IEEE_STATUS_DZE | \ + IEEE_STATUS_OVF | IEEE_STATUS_UNF | \ + IEEE_STATUS_INE | IEEE_STATUS_DNO) + +#define IEEE_SW_MASK (IEEE_TRAP_ENABLE_MASK | \ + IEEE_STATUS_MASK | IEEE_MAP_MASK) + +#define IEEE_CURRENT_RM_SHIFT 32 +#define IEEE_CURRENT_RM_MASK (3UL<<IEEE_CURRENT_RM_SHIFT) + +#define IEEE_STATUS_TO_EXCSUM_SHIFT 16 + +#define IEEE_INHERIT (1UL<<63) /* inherit on thread create? */ + +/* + * Convert the software IEEE trap enable and status bits into the + * hardware fpcr format. + * + * Digital Unix engineers receive my thanks for not defining the + * software bits identical to the hardware bits. The chip designers + * receive my thanks for making all the not-implemented fpcr bits + * RAZ forcing us to use system calls to read/write this value. + */ + +static inline unsigned long +ieee_swcr_to_fpcr(unsigned long sw) +{ + unsigned long fp; + fp = (sw & IEEE_STATUS_MASK) << 35; + fp |= (sw & IEEE_MAP_DMZ) << 36; + fp |= (sw & IEEE_STATUS_MASK ? FPCR_SUM : 0); + fp |= (~sw & (IEEE_TRAP_ENABLE_INV + | IEEE_TRAP_ENABLE_DZE + | IEEE_TRAP_ENABLE_OVF)) << 48; + fp |= (~sw & (IEEE_TRAP_ENABLE_UNF | IEEE_TRAP_ENABLE_INE)) << 57; + fp |= (sw & IEEE_MAP_UMZ ? FPCR_UNDZ | FPCR_UNFD : 0); + fp |= (~sw & IEEE_TRAP_ENABLE_DNO) << 41; + return fp; +} + +static inline unsigned long +ieee_fpcr_to_swcr(unsigned long fp) +{ + unsigned long sw; + sw = (fp >> 35) & IEEE_STATUS_MASK; + sw |= (fp >> 36) & IEEE_MAP_DMZ; + sw |= (~fp >> 48) & (IEEE_TRAP_ENABLE_INV + | IEEE_TRAP_ENABLE_DZE + | IEEE_TRAP_ENABLE_OVF); + sw |= (~fp >> 57) & (IEEE_TRAP_ENABLE_UNF | IEEE_TRAP_ENABLE_INE); + sw |= (fp >> 47) & IEEE_MAP_UMZ; + sw |= (~fp >> 41) & IEEE_TRAP_ENABLE_DNO; + return sw; +} + +#ifdef __KERNEL__ + +/* The following two functions don't need trapb/excb instructions + around the mf_fpcr/mt_fpcr instructions because (a) the kernel + never generates arithmetic faults and (b) call_pal instructions + are implied trap barriers. */ + +static inline unsigned long +rdfpcr(void) +{ + unsigned long tmp, ret; + +#if defined(CONFIG_ALPHA_EV6) || defined(CONFIG_ALPHA_EV67) + __asm__ __volatile__ ( + "ftoit $f0,%0\n\t" + "mf_fpcr $f0\n\t" + "ftoit $f0,%1\n\t" + "itoft %0,$f0" + : "=r"(tmp), "=r"(ret)); +#else + __asm__ __volatile__ ( + "stt $f0,%0\n\t" + "mf_fpcr $f0\n\t" + "stt $f0,%1\n\t" + "ldt $f0,%0" + : "=m"(tmp), "=m"(ret)); +#endif + + return ret; +} + +static inline void +wrfpcr(unsigned long val) +{ + unsigned long tmp; + +#if defined(CONFIG_ALPHA_EV6) || defined(CONFIG_ALPHA_EV67) + __asm__ __volatile__ ( + "ftoit $f0,%0\n\t" + "itoft %1,$f0\n\t" + "mt_fpcr $f0\n\t" + "itoft %0,$f0" + : "=&r"(tmp) : "r"(val)); +#else + __asm__ __volatile__ ( + "stt $f0,%0\n\t" + "ldt $f0,%1\n\t" + "mt_fpcr $f0\n\t" + "ldt $f0,%0" + : "=m"(tmp) : "m"(val)); +#endif +} + +static inline unsigned long +swcr_update_status(unsigned long swcr, unsigned long fpcr) +{ + /* EV6 implements most of the bits in hardware. Collect + the acrued exception bits from the real fpcr. */ + if (implver() == IMPLVER_EV6) { + swcr &= ~IEEE_STATUS_MASK; + swcr |= (fpcr >> 35) & IEEE_STATUS_MASK; + } + return swcr; +} + +extern unsigned long alpha_read_fp_reg (unsigned long reg); +extern void alpha_write_fp_reg (unsigned long reg, unsigned long val); +extern unsigned long alpha_read_fp_reg_s (unsigned long reg); +extern void alpha_write_fp_reg_s (unsigned long reg, unsigned long val); + +#endif /* __KERNEL__ */ + +#endif /* __ASM_ALPHA_FPU_H */ diff --git a/arch/alpha/include/asm/ftrace.h b/arch/alpha/include/asm/ftrace.h new file mode 100644 index 00000000..40a8c178 --- /dev/null +++ b/arch/alpha/include/asm/ftrace.h @@ -0,0 +1 @@ +/* empty */ diff --git a/arch/alpha/include/asm/futex.h b/arch/alpha/include/asm/futex.h new file mode 100644 index 00000000..f9397943 --- /dev/null +++ b/arch/alpha/include/asm/futex.h @@ -0,0 +1,119 @@ +#ifndef _ASM_ALPHA_FUTEX_H +#define _ASM_ALPHA_FUTEX_H + +#ifdef __KERNEL__ + +#include <linux/futex.h> +#include <linux/uaccess.h> +#include <asm/errno.h> +#include <asm/barrier.h> + +#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \ + __asm__ __volatile__( \ + __ASM_SMP_MB \ + "1: ldl_l %0,0(%2)\n" \ + insn \ + "2: stl_c %1,0(%2)\n" \ + " beq %1,4f\n" \ + " mov $31,%1\n" \ + "3: .subsection 2\n" \ + "4: br 1b\n" \ + " .previous\n" \ + " .section __ex_table,\"a\"\n" \ + " .long 1b-.\n" \ + " lda $31,3b-1b(%1)\n" \ + " .long 2b-.\n" \ + " lda $31,3b-2b(%1)\n" \ + " .previous\n" \ + : "=&r" (oldval), "=&r"(ret) \ + : "r" (uaddr), "r"(oparg) \ + : "memory") + +static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret; + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) + return -EFAULT; + + pagefault_disable(); + + switch (op) { + case FUTEX_OP_SET: + __futex_atomic_op("mov %3,%1\n", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_ADD: + __futex_atomic_op("addl %0,%3,%1\n", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_OR: + __futex_atomic_op("or %0,%3,%1\n", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_ANDN: + __futex_atomic_op("andnot %0,%3,%1\n", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_XOR: + __futex_atomic_op("xor %0,%3,%1\n", ret, oldval, uaddr, oparg); + break; + default: + ret = -ENOSYS; + } + + pagefault_enable(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +static inline int +futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) +{ + int ret = 0, cmp; + u32 prev; + + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) + return -EFAULT; + + __asm__ __volatile__ ( + __ASM_SMP_MB + "1: ldl_l %1,0(%3)\n" + " cmpeq %1,%4,%2\n" + " beq %2,3f\n" + " mov %5,%2\n" + "2: stl_c %2,0(%3)\n" + " beq %2,4f\n" + "3: .subsection 2\n" + "4: br 1b\n" + " .previous\n" + " .section __ex_table,\"a\"\n" + " .long 1b-.\n" + " lda $31,3b-1b(%0)\n" + " .long 2b-.\n" + " lda $31,3b-2b(%0)\n" + " .previous\n" + : "+r"(ret), "=&r"(prev), "=&r"(cmp) + : "r"(uaddr), "r"((long)(int)oldval), "r"(newval) + : "memory"); + + *uval = prev; + return ret; +} + +#endif /* __KERNEL__ */ +#endif /* _ASM_ALPHA_FUTEX_H */ diff --git a/arch/alpha/include/asm/gct.h b/arch/alpha/include/asm/gct.h new file mode 100644 index 00000000..3504c704 --- /dev/null +++ b/arch/alpha/include/asm/gct.h @@ -0,0 +1,58 @@ +#ifndef __ALPHA_GCT_H +#define __ALPHA_GCT_H + +typedef u64 gct_id; +typedef u64 gct6_handle; + +typedef struct __gct6_node { + u8 type; + u8 subtype; + u16 size; + u32 hd_extension; + gct6_handle owner; + gct6_handle active_user; + gct_id id; + u64 flags; + u16 rev; + u16 change_counter; + u16 max_child; + u16 reserved1; + gct6_handle saved_owner; + gct6_handle affinity; + gct6_handle parent; + gct6_handle next; + gct6_handle prev; + gct6_handle child; + u64 fw_flags; + u64 os_usage; + u64 fru_id; + u32 checksum; + u32 magic; /* 'GLXY' */ +} gct6_node; + +typedef struct { + u8 type; + u8 subtype; + void (*callout)(gct6_node *); +} gct6_search_struct; + +#define GCT_NODE_MAGIC 0x59584c47 /* 'GLXY' */ + +/* + * node types + */ +#define GCT_TYPE_HOSE 0x0E + +/* + * node subtypes + */ +#define GCT_SUBTYPE_IO_PORT_MODULE 0x2C + +#define GCT_NODE_PTR(off) ((gct6_node *)((char *)hwrpb + \ + hwrpb->frut_offset + \ + (gct6_handle)(off))) \ + +int gct6_find_nodes(gct6_node *, gct6_search_struct *); + +#endif /* __ALPHA_GCT_H */ + diff --git a/arch/alpha/include/asm/gentrap.h b/arch/alpha/include/asm/gentrap.h new file mode 100644 index 00000000..ae50cc31 --- /dev/null +++ b/arch/alpha/include/asm/gentrap.h @@ -0,0 +1,37 @@ +#ifndef _ASMAXP_GENTRAP_H +#define _ASMAXP_GENTRAP_H + +/* + * Definitions for gentrap causes. They are generated by user-level + * programs and therefore should be compatible with the corresponding + * OSF/1 definitions. + */ +#define GEN_INTOVF -1 /* integer overflow */ +#define GEN_INTDIV -2 /* integer division by zero */ +#define GEN_FLTOVF -3 /* fp overflow */ +#define GEN_FLTDIV -4 /* fp division by zero */ +#define GEN_FLTUND -5 /* fp underflow */ +#define GEN_FLTINV -6 /* invalid fp operand */ +#define GEN_FLTINE -7 /* inexact fp operand */ +#define GEN_DECOVF -8 /* decimal overflow (for COBOL??) */ +#define GEN_DECDIV -9 /* decimal division by zero */ +#define GEN_DECINV -10 /* invalid decimal operand */ +#define GEN_ROPRAND -11 /* reserved operand */ +#define GEN_ASSERTERR -12 /* assertion error */ +#define GEN_NULPTRERR -13 /* null pointer error */ +#define GEN_STKOVF -14 /* stack overflow */ +#define GEN_STRLENERR -15 /* string length error */ +#define GEN_SUBSTRERR -16 /* substring error */ +#define GEN_RANGERR -17 /* range error */ +#define GEN_SUBRNG -18 +#define GEN_SUBRNG1 -19 +#define GEN_SUBRNG2 -20 +#define GEN_SUBRNG3 -21 /* these report range errors for */ +#define GEN_SUBRNG4 -22 /* subscripting (indexing) at levels 0..7 */ +#define GEN_SUBRNG5 -23 +#define GEN_SUBRNG6 -24 +#define GEN_SUBRNG7 -25 + +/* the remaining codes (-26..-1023) are reserved. */ + +#endif /* _ASMAXP_GENTRAP_H */ diff --git a/arch/alpha/include/asm/gpio.h b/arch/alpha/include/asm/gpio.h new file mode 100644 index 00000000..7dc6a634 --- /dev/null +++ b/arch/alpha/include/asm/gpio.h @@ -0,0 +1,55 @@ +/* + * Generic GPIO API implementation for Alpha. + * + * A stright copy of that for PowerPC which was: + * + * Copyright (c) 2007-2008 MontaVista Software, Inc. + * + * Author: Anton Vorontsov <avorontsov@ru.mvista.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _ASM_ALPHA_GPIO_H +#define _ASM_ALPHA_GPIO_H + +#include <linux/errno.h> +#include <asm-generic/gpio.h> + +#ifdef CONFIG_GPIOLIB + +/* + * We don't (yet) implement inlined/rapid versions for on-chip gpios. + * Just call gpiolib. + */ +static inline int gpio_get_value(unsigned int gpio) +{ + return __gpio_get_value(gpio); +} + +static inline void gpio_set_value(unsigned int gpio, int value) +{ + __gpio_set_value(gpio, value); +} + +static inline int gpio_cansleep(unsigned int gpio) +{ + return __gpio_cansleep(gpio); +} + +static inline int gpio_to_irq(unsigned int gpio) +{ + return __gpio_to_irq(gpio); +} + +static inline int irq_to_gpio(unsigned int irq) +{ + return -EINVAL; +} + +#endif /* CONFIG_GPIOLIB */ + +#endif /* _ASM_ALPHA_GPIO_H */ diff --git a/arch/alpha/include/asm/hardirq.h b/arch/alpha/include/asm/hardirq.h new file mode 100644 index 00000000..242c09ba --- /dev/null +++ b/arch/alpha/include/asm/hardirq.h @@ -0,0 +1,9 @@ +#ifndef _ALPHA_HARDIRQ_H +#define _ALPHA_HARDIRQ_H + +void ack_bad_irq(unsigned int irq); +#define ack_bad_irq ack_bad_irq + +#include <asm-generic/hardirq.h> + +#endif /* _ALPHA_HARDIRQ_H */ diff --git a/arch/alpha/include/asm/hw_irq.h b/arch/alpha/include/asm/hw_irq.h new file mode 100644 index 00000000..5050ac81 --- /dev/null +++ b/arch/alpha/include/asm/hw_irq.h @@ -0,0 +1,14 @@ +#ifndef _ALPHA_HW_IRQ_H +#define _ALPHA_HW_IRQ_H + + +extern volatile unsigned long irq_err_count; +DECLARE_PER_CPU(unsigned long, irq_pmi_count); + +#ifdef CONFIG_ALPHA_GENERIC +#define ACTUAL_NR_IRQS alpha_mv.nr_irqs +#else +#define ACTUAL_NR_IRQS NR_IRQS +#endif + +#endif diff --git a/arch/alpha/include/asm/hwrpb.h b/arch/alpha/include/asm/hwrpb.h new file mode 100644 index 00000000..8e8f871a --- /dev/null +++ b/arch/alpha/include/asm/hwrpb.h @@ -0,0 +1,220 @@ +#ifndef __ALPHA_HWRPB_H +#define __ALPHA_HWRPB_H + +#define INIT_HWRPB ((struct hwrpb_struct *) 0x10000000) + +/* + * DEC processor types for Alpha systems. Found in HWRPB. + * These values are architected. + */ + +#define EV3_CPU 1 /* EV3 */ +#define EV4_CPU 2 /* EV4 (21064) */ +#define LCA4_CPU 4 /* LCA4 (21066/21068) */ +#define EV5_CPU 5 /* EV5 (21164) */ +#define EV45_CPU 6 /* EV4.5 (21064/xxx) */ +#define EV56_CPU 7 /* EV5.6 (21164) */ +#define EV6_CPU 8 /* EV6 (21264) */ +#define PCA56_CPU 9 /* PCA56 (21164PC) */ +#define PCA57_CPU 10 /* PCA57 (notyet) */ +#define EV67_CPU 11 /* EV67 (21264A) */ +#define EV68CB_CPU 12 /* EV68CB (21264C) */ +#define EV68AL_CPU 13 /* EV68AL (21264B) */ +#define EV68CX_CPU 14 /* EV68CX (21264D) */ +#define EV7_CPU 15 /* EV7 (21364) */ +#define EV79_CPU 16 /* EV79 (21364??) */ +#define EV69_CPU 17 /* EV69 (21264/EV69A) */ + +/* + * DEC system types for Alpha systems. Found in HWRPB. + * These values are architected. + */ + +#define ST_ADU 1 /* Alpha ADU systype */ +#define ST_DEC_4000 2 /* Cobra systype */ +#define ST_DEC_7000 3 /* Ruby systype */ +#define ST_DEC_3000_500 4 /* Flamingo systype */ +#define ST_DEC_2000_300 6 /* Jensen systype */ +#define ST_DEC_3000_300 7 /* Pelican systype */ +#define ST_DEC_2100_A500 9 /* Sable systype */ +#define ST_DEC_AXPVME_64 10 /* AXPvme system type */ +#define ST_DEC_AXPPCI_33 11 /* NoName system type */ +#define ST_DEC_TLASER 12 /* Turbolaser systype */ +#define ST_DEC_2100_A50 13 /* Avanti systype */ +#define ST_DEC_MUSTANG 14 /* Mustang systype */ +#define ST_DEC_ALCOR 15 /* Alcor (EV5) systype */ +#define ST_DEC_1000 17 /* Mikasa systype */ +#define ST_DEC_EB64 18 /* EB64 systype */ +#define ST_DEC_EB66 19 /* EB66 systype */ +#define ST_DEC_EB64P 20 /* EB64+ systype */ +#define ST_DEC_BURNS 21 /* laptop systype */ +#define ST_DEC_RAWHIDE 22 /* Rawhide systype */ +#define ST_DEC_K2 23 /* K2 systype */ +#define ST_DEC_LYNX 24 /* Lynx systype */ +#define ST_DEC_XL 25 /* Alpha XL systype */ +#define ST_DEC_EB164 26 /* EB164 systype */ +#define ST_DEC_NORITAKE 27 /* Noritake systype */ +#define ST_DEC_CORTEX 28 /* Cortex systype */ +#define ST_DEC_MIATA 30 /* Miata systype */ +#define ST_DEC_XXM 31 /* XXM systype */ +#define ST_DEC_TAKARA 32 /* Takara systype */ +#define ST_DEC_YUKON 33 /* Yukon systype */ +#define ST_DEC_TSUNAMI 34 /* Tsunami systype */ +#define ST_DEC_WILDFIRE 35 /* Wildfire systype */ +#define ST_DEC_CUSCO 36 /* CUSCO systype */ +#define ST_DEC_EIGER 37 /* Eiger systype */ +#define ST_DEC_TITAN 38 /* Titan systype */ +#define ST_DEC_MARVEL 39 /* Marvel systype */ + +/* UNOFFICIAL!!! */ +#define ST_UNOFFICIAL_BIAS 100 +#define ST_DTI_RUFFIAN 101 /* RUFFIAN systype */ + +/* Alpha Processor, Inc. systems */ +#define ST_API_BIAS 200 +#define ST_API_NAUTILUS 201 /* UP1000 systype */ + +struct pcb_struct { + unsigned long ksp; + unsigned long usp; + unsigned long ptbr; + unsigned int pcc; + unsigned int asn; + unsigned long unique; + unsigned long flags; + unsigned long res1, res2; +}; + +struct percpu_struct { + unsigned long hwpcb[16]; + unsigned long flags; + unsigned long pal_mem_size; + unsigned long pal_scratch_size; + unsigned long pal_mem_pa; + unsigned long pal_scratch_pa; + unsigned long pal_revision; + unsigned long type; + unsigned long variation; + unsigned long revision; + unsigned long serial_no[2]; + unsigned long logout_area_pa; + unsigned long logout_area_len; + unsigned long halt_PCBB; + unsigned long halt_PC; + unsigned long halt_PS; + unsigned long halt_arg; + unsigned long halt_ra; + unsigned long halt_pv; + unsigned long halt_reason; + unsigned long res; + unsigned long ipc_buffer[21]; + unsigned long palcode_avail[16]; + unsigned long compatibility; + unsigned long console_data_log_pa; + unsigned long console_data_log_length; + unsigned long bcache_info; +}; + +struct procdesc_struct { + unsigned long weird_vms_stuff; + unsigned long address; +}; + +struct vf_map_struct { + unsigned long va; + unsigned long pa; + unsigned long count; +}; + +struct crb_struct { + struct procdesc_struct * dispatch_va; + struct procdesc_struct * dispatch_pa; + struct procdesc_struct * fixup_va; + struct procdesc_struct * fixup_pa; + /* virtual->physical map */ + unsigned long map_entries; + unsigned long map_pages; + struct vf_map_struct map[1]; +}; + +struct memclust_struct { + unsigned long start_pfn; + unsigned long numpages; + unsigned long numtested; + unsigned long bitmap_va; + unsigned long bitmap_pa; + unsigned long bitmap_chksum; + unsigned long usage; +}; + +struct memdesc_struct { + unsigned long chksum; + unsigned long optional_pa; + unsigned long numclusters; + struct memclust_struct cluster[0]; +}; + +struct dsr_struct { + long smm; /* SMM nubber used by LMF */ + unsigned long lurt_off; /* offset to LURT table */ + unsigned long sysname_off; /* offset to sysname char count */ +}; + +struct hwrpb_struct { + unsigned long phys_addr; /* check: physical address of the hwrpb */ + unsigned long id; /* check: "HWRPB\0\0\0" */ + unsigned long revision; + unsigned long size; /* size of hwrpb */ + unsigned long cpuid; + unsigned long pagesize; /* 8192, I hope */ + unsigned long pa_bits; /* number of physical address bits */ + unsigned long max_asn; + unsigned char ssn[16]; /* system serial number: big bother is watching */ + unsigned long sys_type; + unsigned long sys_variation; + unsigned long sys_revision; + unsigned long intr_freq; /* interval clock frequency * 4096 */ + unsigned long cycle_freq; /* cycle counter frequency */ + unsigned long vptb; /* Virtual Page Table Base address */ + unsigned long res1; + unsigned long tbhb_offset; /* Translation Buffer Hint Block */ + unsigned long nr_processors; + unsigned long processor_size; + unsigned long processor_offset; + unsigned long ctb_nr; + unsigned long ctb_size; /* console terminal block size */ + unsigned long ctbt_offset; /* console terminal block table offset */ + unsigned long crb_offset; /* console callback routine block */ + unsigned long mddt_offset; /* memory data descriptor table */ + unsigned long cdb_offset; /* configuration data block (or NULL) */ + unsigned long frut_offset; /* FRU table (or NULL) */ + void (*save_terminal)(unsigned long); + unsigned long save_terminal_data; + void (*restore_terminal)(unsigned long); + unsigned long restore_terminal_data; + void (*CPU_restart)(unsigned long); + unsigned long CPU_restart_data; + unsigned long res2; + unsigned long res3; + unsigned long chksum; + unsigned long rxrdy; + unsigned long txrdy; + unsigned long dsr_offset; /* "Dynamic System Recognition Data Block Table" */ +}; + +#ifdef __KERNEL__ + +extern struct hwrpb_struct *hwrpb; + +static inline void +hwrpb_update_checksum(struct hwrpb_struct *h) +{ + unsigned long sum = 0, *l; + for (l = (unsigned long *) h; l < (unsigned long *) &h->chksum; ++l) + sum += *l; + h->chksum = sum; +} + +#endif /* __KERNEL__ */ + +#endif /* __ALPHA_HWRPB_H */ diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h new file mode 100644 index 00000000..7a3d38d5 --- /dev/null +++ b/arch/alpha/include/asm/io.h @@ -0,0 +1,575 @@ +#ifndef __ALPHA_IO_H +#define __ALPHA_IO_H + +#ifdef __KERNEL__ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <asm/compiler.h> +#include <asm/pgtable.h> +#include <asm/machvec.h> +#include <asm/hwrpb.h> + +/* The generic header contains only prototypes. Including it ensures that + the implementation we have here matches that interface. */ +#include <asm-generic/iomap.h> + +/* We don't use IO slowdowns on the Alpha, but.. */ +#define __SLOW_DOWN_IO do { } while (0) +#define SLOW_DOWN_IO do { } while (0) + +/* + * Virtual -> physical identity mapping starts at this offset + */ +#ifdef USE_48_BIT_KSEG +#define IDENT_ADDR 0xffff800000000000UL +#else +#define IDENT_ADDR 0xfffffc0000000000UL +#endif + +/* + * We try to avoid hae updates (thus the cache), but when we + * do need to update the hae, we need to do it atomically, so + * that any interrupts wouldn't get confused with the hae + * register not being up-to-date with respect to the hardware + * value. + */ +extern inline void __set_hae(unsigned long new_hae) +{ + unsigned long flags = swpipl(IPL_MAX); + + barrier(); + + alpha_mv.hae_cache = new_hae; + *alpha_mv.hae_register = new_hae; + mb(); + /* Re-read to make sure it was written. */ + new_hae = *alpha_mv.hae_register; + + setipl(flags); + barrier(); +} + +extern inline void set_hae(unsigned long new_hae) +{ + if (new_hae != alpha_mv.hae_cache) + __set_hae(new_hae); +} + +/* + * Change virtual addresses to physical addresses and vv. + */ +#ifdef USE_48_BIT_KSEG +static inline unsigned long virt_to_phys(void *address) +{ + return (unsigned long)address - IDENT_ADDR; +} + +static inline void * phys_to_virt(unsigned long address) +{ + return (void *) (address + IDENT_ADDR); +} +#else +static inline unsigned long virt_to_phys(void *address) +{ + unsigned long phys = (unsigned long)address; + + /* Sign-extend from bit 41. */ + phys <<= (64 - 41); + phys = (long)phys >> (64 - 41); + + /* Crop to the physical address width of the processor. */ + phys &= (1ul << hwrpb->pa_bits) - 1; + + return phys; +} + +static inline void * phys_to_virt(unsigned long address) +{ + return (void *)(IDENT_ADDR + (address & ((1ul << 41) - 1))); +} +#endif + +#define page_to_phys(page) page_to_pa(page) + +static inline dma_addr_t __deprecated isa_page_to_bus(struct page *page) +{ + return page_to_phys(page); +} + +/* Maximum PIO space address supported? */ +#define IO_SPACE_LIMIT 0xffff + +/* + * Change addresses as seen by the kernel (virtual) to addresses as + * seen by a device (bus), and vice versa. + * + * Note that this only works for a limited range of kernel addresses, + * and very well may not span all memory. Consider this interface + * deprecated in favour of the DMA-mapping API. + */ +extern unsigned long __direct_map_base; +extern unsigned long __direct_map_size; + +static inline unsigned long __deprecated virt_to_bus(void *address) +{ + unsigned long phys = virt_to_phys(address); + unsigned long bus = phys + __direct_map_base; + return phys <= __direct_map_size ? bus : 0; +} +#define isa_virt_to_bus virt_to_bus + +static inline void * __deprecated bus_to_virt(unsigned long address) +{ + void *virt; + + /* This check is a sanity check but also ensures that bus address 0 + maps to virtual address 0 which is useful to detect null pointers + (the NCR driver is much simpler if NULL pointers are preserved). */ + address -= __direct_map_base; + virt = phys_to_virt(address); + return (long)address <= 0 ? NULL : virt; +} +#define isa_bus_to_virt bus_to_virt + +/* + * There are different chipsets to interface the Alpha CPUs to the world. + */ + +#define IO_CONCAT(a,b) _IO_CONCAT(a,b) +#define _IO_CONCAT(a,b) a ## _ ## b + +#ifdef CONFIG_ALPHA_GENERIC + +/* In a generic kernel, we always go through the machine vector. */ + +#define REMAP1(TYPE, NAME, QUAL) \ +static inline TYPE generic_##NAME(QUAL void __iomem *addr) \ +{ \ + return alpha_mv.mv_##NAME(addr); \ +} + +#define REMAP2(TYPE, NAME, QUAL) \ +static inline void generic_##NAME(TYPE b, QUAL void __iomem *addr) \ +{ \ + alpha_mv.mv_##NAME(b, addr); \ +} + +REMAP1(unsigned int, ioread8, /**/) +REMAP1(unsigned int, ioread16, /**/) +REMAP1(unsigned int, ioread32, /**/) +REMAP1(u8, readb, const volatile) +REMAP1(u16, readw, const volatile) +REMAP1(u32, readl, const volatile) +REMAP1(u64, readq, const volatile) + +REMAP2(u8, iowrite8, /**/) +REMAP2(u16, iowrite16, /**/) +REMAP2(u32, iowrite32, /**/) +REMAP2(u8, writeb, volatile) +REMAP2(u16, writew, volatile) +REMAP2(u32, writel, volatile) +REMAP2(u64, writeq, volatile) + +#undef REMAP1 +#undef REMAP2 + +extern inline void __iomem *generic_ioportmap(unsigned long a) +{ + return alpha_mv.mv_ioportmap(a); +} + +static inline void __iomem *generic_ioremap(unsigned long a, unsigned long s) +{ + return alpha_mv.mv_ioremap(a, s); +} + +static inline void generic_iounmap(volatile void __iomem *a) +{ + return alpha_mv.mv_iounmap(a); +} + +static inline int generic_is_ioaddr(unsigned long a) +{ + return alpha_mv.mv_is_ioaddr(a); +} + +static inline int generic_is_mmio(const volatile void __iomem *a) +{ + return alpha_mv.mv_is_mmio(a); +} + +#define __IO_PREFIX generic +#define generic_trivial_rw_bw 0 +#define generic_trivial_rw_lq 0 +#define generic_trivial_io_bw 0 +#define generic_trivial_io_lq 0 +#define generic_trivial_iounmap 0 + +#else + +#if defined(CONFIG_ALPHA_APECS) +# include <asm/core_apecs.h> +#elif defined(CONFIG_ALPHA_CIA) +# include <asm/core_cia.h> +#elif defined(CONFIG_ALPHA_IRONGATE) +# include <asm/core_irongate.h> +#elif defined(CONFIG_ALPHA_JENSEN) +# include <asm/jensen.h> +#elif defined(CONFIG_ALPHA_LCA) +# include <asm/core_lca.h> +#elif defined(CONFIG_ALPHA_MARVEL) +# include <asm/core_marvel.h> +#elif defined(CONFIG_ALPHA_MCPCIA) +# include <asm/core_mcpcia.h> +#elif defined(CONFIG_ALPHA_POLARIS) +# include <asm/core_polaris.h> +#elif defined(CONFIG_ALPHA_T2) +# include <asm/core_t2.h> +#elif defined(CONFIG_ALPHA_TSUNAMI) +# include <asm/core_tsunami.h> +#elif defined(CONFIG_ALPHA_TITAN) +# include <asm/core_titan.h> +#elif defined(CONFIG_ALPHA_WILDFIRE) +# include <asm/core_wildfire.h> +#else +#error "What system is this?" +#endif + +#endif /* GENERIC */ + +/* + * We always have external versions of these routines. + */ +extern u8 inb(unsigned long port); +extern u16 inw(unsigned long port); +extern u32 inl(unsigned long port); +extern void outb(u8 b, unsigned long port); +extern void outw(u16 b, unsigned long port); +extern void outl(u32 b, unsigned long port); + +extern u8 readb(const volatile void __iomem *addr); +extern u16 readw(const volatile void __iomem *addr); +extern u32 readl(const volatile void __iomem *addr); +extern u64 readq(const volatile void __iomem *addr); +extern void writeb(u8 b, volatile void __iomem *addr); +extern void writew(u16 b, volatile void __iomem *addr); +extern void writel(u32 b, volatile void __iomem *addr); +extern void writeq(u64 b, volatile void __iomem *addr); + +extern u8 __raw_readb(const volatile void __iomem *addr); +extern u16 __raw_readw(const volatile void __iomem *addr); +extern u32 __raw_readl(const volatile void __iomem *addr); +extern u64 __raw_readq(const volatile void __iomem *addr); +extern void __raw_writeb(u8 b, volatile void __iomem *addr); +extern void __raw_writew(u16 b, volatile void __iomem *addr); +extern void __raw_writel(u32 b, volatile void __iomem *addr); +extern void __raw_writeq(u64 b, volatile void __iomem *addr); + +/* + * Mapping from port numbers to __iomem space is pretty easy. + */ + +/* These two have to be extern inline because of the extern prototype from + <asm-generic/iomap.h>. It is not legal to mix "extern" and "static" for + the same declaration. */ +extern inline void __iomem *ioport_map(unsigned long port, unsigned int size) +{ + return IO_CONCAT(__IO_PREFIX,ioportmap) (port); +} + +extern inline void ioport_unmap(void __iomem *addr) +{ +} + +static inline void __iomem *ioremap(unsigned long port, unsigned long size) +{ + return IO_CONCAT(__IO_PREFIX,ioremap) (port, size); +} + +static inline void __iomem *__ioremap(unsigned long port, unsigned long size, + unsigned long flags) +{ + return ioremap(port, size); +} + +static inline void __iomem * ioremap_nocache(unsigned long offset, + unsigned long size) +{ + return ioremap(offset, size); +} + +static inline void iounmap(volatile void __iomem *addr) +{ + IO_CONCAT(__IO_PREFIX,iounmap)(addr); +} + +static inline int __is_ioaddr(unsigned long addr) +{ + return IO_CONCAT(__IO_PREFIX,is_ioaddr)(addr); +} +#define __is_ioaddr(a) __is_ioaddr((unsigned long)(a)) + +static inline int __is_mmio(const volatile void __iomem *addr) +{ + return IO_CONCAT(__IO_PREFIX,is_mmio)(addr); +} + + +/* + * If the actual I/O bits are sufficiently trivial, then expand inline. + */ + +#if IO_CONCAT(__IO_PREFIX,trivial_io_bw) +extern inline unsigned int ioread8(void __iomem *addr) +{ + unsigned int ret = IO_CONCAT(__IO_PREFIX,ioread8)(addr); + mb(); + return ret; +} + +extern inline unsigned int ioread16(void __iomem *addr) +{ + unsigned int ret = IO_CONCAT(__IO_PREFIX,ioread16)(addr); + mb(); + return ret; +} + +extern inline void iowrite8(u8 b, void __iomem *addr) +{ + IO_CONCAT(__IO_PREFIX,iowrite8)(b, addr); + mb(); +} + +extern inline void iowrite16(u16 b, void __iomem *addr) +{ + IO_CONCAT(__IO_PREFIX,iowrite16)(b, addr); + mb(); +} + +extern inline u8 inb(unsigned long port) +{ + return ioread8(ioport_map(port, 1)); +} + +extern inline u16 inw(unsigned long port) +{ + return ioread16(ioport_map(port, 2)); +} + +extern inline void outb(u8 b, unsigned long port) +{ + iowrite8(b, ioport_map(port, 1)); +} + +extern inline void outw(u16 b, unsigned long port) +{ + iowrite16(b, ioport_map(port, 2)); +} +#endif + +#if IO_CONCAT(__IO_PREFIX,trivial_io_lq) +extern inline unsigned int ioread32(void __iomem *addr) +{ + unsigned int ret = IO_CONCAT(__IO_PREFIX,ioread32)(addr); + mb(); + return ret; +} + +extern inline void iowrite32(u32 b, void __iomem *addr) +{ + IO_CONCAT(__IO_PREFIX,iowrite32)(b, addr); + mb(); +} + +extern inline u32 inl(unsigned long port) +{ + return ioread32(ioport_map(port, 4)); +} + +extern inline void outl(u32 b, unsigned long port) +{ + iowrite32(b, ioport_map(port, 4)); +} +#endif + +#if IO_CONCAT(__IO_PREFIX,trivial_rw_bw) == 1 +extern inline u8 __raw_readb(const volatile void __iomem *addr) +{ + return IO_CONCAT(__IO_PREFIX,readb)(addr); +} + +extern inline u16 __raw_readw(const volatile void __iomem *addr) +{ + return IO_CONCAT(__IO_PREFIX,readw)(addr); +} + +extern inline void __raw_writeb(u8 b, volatile void __iomem *addr) +{ + IO_CONCAT(__IO_PREFIX,writeb)(b, addr); +} + +extern inline void __raw_writew(u16 b, volatile void __iomem *addr) +{ + IO_CONCAT(__IO_PREFIX,writew)(b, addr); +} + +extern inline u8 readb(const volatile void __iomem *addr) +{ + u8 ret = __raw_readb(addr); + mb(); + return ret; +} + +extern inline u16 readw(const volatile void __iomem *addr) +{ + u16 ret = __raw_readw(addr); + mb(); + return ret; +} + +extern inline void writeb(u8 b, volatile void __iomem *addr) +{ + __raw_writeb(b, addr); + mb(); +} + +extern inline void writew(u16 b, volatile void __iomem *addr) +{ + __raw_writew(b, addr); + mb(); +} +#endif + +#if IO_CONCAT(__IO_PREFIX,trivial_rw_lq) == 1 +extern inline u32 __raw_readl(const volatile void __iomem *addr) +{ + return IO_CONCAT(__IO_PREFIX,readl)(addr); +} + +extern inline u64 __raw_readq(const volatile void __iomem *addr) +{ + return IO_CONCAT(__IO_PREFIX,readq)(addr); +} + +extern inline void __raw_writel(u32 b, volatile void __iomem *addr) +{ + IO_CONCAT(__IO_PREFIX,writel)(b, addr); +} + +extern inline void __raw_writeq(u64 b, volatile void __iomem *addr) +{ + IO_CONCAT(__IO_PREFIX,writeq)(b, addr); +} + +extern inline u32 readl(const volatile void __iomem *addr) +{ + u32 ret = __raw_readl(addr); + mb(); + return ret; +} + +extern inline u64 readq(const volatile void __iomem *addr) +{ + u64 ret = __raw_readq(addr); + mb(); + return ret; +} + +extern inline void writel(u32 b, volatile void __iomem *addr) +{ + __raw_writel(b, addr); + mb(); +} + +extern inline void writeq(u64 b, volatile void __iomem *addr) +{ + __raw_writeq(b, addr); + mb(); +} +#endif + +#define inb_p inb +#define inw_p inw +#define inl_p inl +#define outb_p outb +#define outw_p outw +#define outl_p outl +#define readb_relaxed(addr) __raw_readb(addr) +#define readw_relaxed(addr) __raw_readw(addr) +#define readl_relaxed(addr) __raw_readl(addr) +#define readq_relaxed(addr) __raw_readq(addr) + +#define mmiowb() + +/* + * String version of IO memory access ops: + */ +extern void memcpy_fromio(void *, const volatile void __iomem *, long); +extern void memcpy_toio(volatile void __iomem *, const void *, long); +extern void _memset_c_io(volatile void __iomem *, unsigned long, long); + +static inline void memset_io(volatile void __iomem *addr, u8 c, long len) +{ + _memset_c_io(addr, 0x0101010101010101UL * c, len); +} + +#define __HAVE_ARCH_MEMSETW_IO +static inline void memsetw_io(volatile void __iomem *addr, u16 c, long len) +{ + _memset_c_io(addr, 0x0001000100010001UL * c, len); +} + +/* + * String versions of in/out ops: + */ +extern void insb (unsigned long port, void *dst, unsigned long count); +extern void insw (unsigned long port, void *dst, unsigned long count); +extern void insl (unsigned long port, void *dst, unsigned long count); +extern void outsb (unsigned long port, const void *src, unsigned long count); +extern void outsw (unsigned long port, const void *src, unsigned long count); +extern void outsl (unsigned long port, const void *src, unsigned long count); + +/* + * The Alpha Jensen hardware for some rather strange reason puts + * the RTC clock at 0x170 instead of 0x70. Probably due to some + * misguided idea about using 0x70 for NMI stuff. + * + * These defines will override the defaults when doing RTC queries + */ + +#ifdef CONFIG_ALPHA_GENERIC +# define RTC_PORT(x) ((x) + alpha_mv.rtc_port) +#else +# ifdef CONFIG_ALPHA_JENSEN +# define RTC_PORT(x) (0x170+(x)) +# else +# define RTC_PORT(x) (0x70 + (x)) +# endif +#endif +#define RTC_ALWAYS_BCD 0 + +/* + * Some mucking forons use if[n]def writeq to check if platform has it. + * It's a bloody bad idea and we probably want ARCH_HAS_WRITEQ for them + * to play with; for now just use cpp anti-recursion logics and make sure + * that damn thing is defined and expands to itself. + */ + +#define writeq writeq +#define readq readq + +/* + * Convert a physical pointer to a virtual kernel pointer for /dev/mem + * access + */ +#define xlate_dev_mem_ptr(p) __va(p) + +/* + * Convert a virtual cached pointer to an uncached pointer + */ +#define xlate_dev_kmem_ptr(p) p + +#endif /* __KERNEL__ */ + +#endif /* __ALPHA_IO_H */ diff --git a/arch/alpha/include/asm/io_trivial.h b/arch/alpha/include/asm/io_trivial.h new file mode 100644 index 00000000..1c77f10b --- /dev/null +++ b/arch/alpha/include/asm/io_trivial.h @@ -0,0 +1,131 @@ +/* Trivial implementations of basic i/o routines. Assumes that all + of the hard work has been done by ioremap and ioportmap, and that + access to i/o space is linear. */ + +/* This file may be included multiple times. */ + +#if IO_CONCAT(__IO_PREFIX,trivial_io_bw) +__EXTERN_INLINE unsigned int +IO_CONCAT(__IO_PREFIX,ioread8)(void __iomem *a) +{ + return __kernel_ldbu(*(volatile u8 __force *)a); +} + +__EXTERN_INLINE unsigned int +IO_CONCAT(__IO_PREFIX,ioread16)(void __iomem *a) +{ + return __kernel_ldwu(*(volatile u16 __force *)a); +} + +__EXTERN_INLINE void +IO_CONCAT(__IO_PREFIX,iowrite8)(u8 b, void __iomem *a) +{ + __kernel_stb(b, *(volatile u8 __force *)a); +} + +__EXTERN_INLINE void +IO_CONCAT(__IO_PREFIX,iowrite16)(u16 b, void __iomem *a) +{ + __kernel_stw(b, *(volatile u16 __force *)a); +} +#endif + +#if IO_CONCAT(__IO_PREFIX,trivial_io_lq) +__EXTERN_INLINE unsigned int +IO_CONCAT(__IO_PREFIX,ioread32)(void __iomem *a) +{ + return *(volatile u32 __force *)a; +} + +__EXTERN_INLINE void +IO_CONCAT(__IO_PREFIX,iowrite32)(u32 b, void __iomem *a) +{ + *(volatile u32 __force *)a = b; +} +#endif + +#if IO_CONCAT(__IO_PREFIX,trivial_rw_bw) == 1 +__EXTERN_INLINE u8 +IO_CONCAT(__IO_PREFIX,readb)(const volatile void __iomem *a) +{ + return __kernel_ldbu(*(const volatile u8 __force *)a); +} + +__EXTERN_INLINE u16 +IO_CONCAT(__IO_PREFIX,readw)(const volatile void __iomem *a) +{ + return __kernel_ldwu(*(const volatile u16 __force *)a); +} + +__EXTERN_INLINE void +IO_CONCAT(__IO_PREFIX,writeb)(u8 b, volatile void __iomem *a) +{ + __kernel_stb(b, *(volatile u8 __force *)a); +} + +__EXTERN_INLINE void +IO_CONCAT(__IO_PREFIX,writew)(u16 b, volatile void __iomem *a) +{ + __kernel_stw(b, *(volatile u16 __force *)a); +} +#elif IO_CONCAT(__IO_PREFIX,trivial_rw_bw) == 2 +__EXTERN_INLINE u8 +IO_CONCAT(__IO_PREFIX,readb)(const volatile void __iomem *a) +{ + void __iomem *addr = (void __iomem *)a; + return IO_CONCAT(__IO_PREFIX,ioread8)(addr); +} + +__EXTERN_INLINE u16 +IO_CONCAT(__IO_PREFIX,readw)(const volatile void __iomem *a) +{ + void __iomem *addr = (void __iomem *)a; + return IO_CONCAT(__IO_PREFIX,ioread16)(addr); +} + +__EXTERN_INLINE void +IO_CONCAT(__IO_PREFIX,writeb)(u8 b, volatile void __iomem *a) +{ + void __iomem *addr = (void __iomem *)a; + IO_CONCAT(__IO_PREFIX,iowrite8)(b, addr); +} + +__EXTERN_INLINE void +IO_CONCAT(__IO_PREFIX,writew)(u16 b, volatile void __iomem *a) +{ + void __iomem *addr = (void __iomem *)a; + IO_CONCAT(__IO_PREFIX,iowrite16)(b, addr); +} +#endif + +#if IO_CONCAT(__IO_PREFIX,trivial_rw_lq) == 1 +__EXTERN_INLINE u32 +IO_CONCAT(__IO_PREFIX,readl)(const volatile void __iomem *a) +{ + return *(const volatile u32 __force *)a; +} + +__EXTERN_INLINE u64 +IO_CONCAT(__IO_PREFIX,readq)(const volatile void __iomem *a) +{ + return *(const volatile u64 __force *)a; +} + +__EXTERN_INLINE void +IO_CONCAT(__IO_PREFIX,writel)(u32 b, volatile void __iomem *a) +{ + *(volatile u32 __force *)a = b; +} + +__EXTERN_INLINE void +IO_CONCAT(__IO_PREFIX,writeq)(u64 b, volatile void __iomem *a) +{ + *(volatile u64 __force *)a = b; +} +#endif + +#if IO_CONCAT(__IO_PREFIX,trivial_iounmap) +__EXTERN_INLINE void IO_CONCAT(__IO_PREFIX,iounmap)(volatile void __iomem *a) +{ +} +#endif diff --git a/arch/alpha/include/asm/ioctl.h b/arch/alpha/include/asm/ioctl.h new file mode 100644 index 00000000..fc63727f --- /dev/null +++ b/arch/alpha/include/asm/ioctl.h @@ -0,0 +1,66 @@ +#ifndef _ALPHA_IOCTL_H +#define _ALPHA_IOCTL_H + +/* + * The original linux ioctl numbering scheme was just a general + * "anything goes" setup, where more or less random numbers were + * assigned. Sorry, I was clueless when I started out on this. + * + * On the alpha, we'll try to clean it up a bit, using a more sane + * ioctl numbering, and also trying to be compatible with OSF/1 in + * the process. I'd like to clean it up for the i386 as well, but + * it's so painful recognizing both the new and the old numbers.. + */ + +#define _IOC_NRBITS 8 +#define _IOC_TYPEBITS 8 +#define _IOC_SIZEBITS 13 +#define _IOC_DIRBITS 3 + +#define _IOC_NRMASK ((1 << _IOC_NRBITS)-1) +#define _IOC_TYPEMASK ((1 << _IOC_TYPEBITS)-1) +#define _IOC_SIZEMASK ((1 << _IOC_SIZEBITS)-1) +#define _IOC_DIRMASK ((1 << _IOC_DIRBITS)-1) + +#define _IOC_NRSHIFT 0 +#define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS) +#define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS) +#define _IOC_DIRSHIFT (_IOC_SIZESHIFT+_IOC_SIZEBITS) + +/* + * Direction bits _IOC_NONE could be 0, but OSF/1 gives it a bit. + * And this turns out useful to catch old ioctl numbers in header + * files for us. + */ +#define _IOC_NONE 1U +#define _IOC_READ 2U +#define _IOC_WRITE 4U + +#define _IOC(dir,type,nr,size) \ + ((unsigned int) \ + (((dir) << _IOC_DIRSHIFT) | \ + ((type) << _IOC_TYPESHIFT) | \ + ((nr) << _IOC_NRSHIFT) | \ + ((size) << _IOC_SIZESHIFT))) + +/* used to create numbers */ +#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0) +#define _IOR(type,nr,size) _IOC(_IOC_READ,(type),(nr),sizeof(size)) +#define _IOW(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),sizeof(size)) +#define _IOWR(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size)) + +/* used to decode them.. */ +#define _IOC_DIR(nr) (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK) +#define _IOC_TYPE(nr) (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK) +#define _IOC_NR(nr) (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK) +#define _IOC_SIZE(nr) (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK) + +/* ...and for the drivers/sound files... */ + +#define IOC_IN (_IOC_WRITE << _IOC_DIRSHIFT) +#define IOC_OUT (_IOC_READ << _IOC_DIRSHIFT) +#define IOC_INOUT ((_IOC_WRITE|_IOC_READ) << _IOC_DIRSHIFT) +#define IOCSIZE_MASK (_IOC_SIZEMASK << _IOC_SIZESHIFT) +#define IOCSIZE_SHIFT (_IOC_SIZESHIFT) + +#endif /* _ALPHA_IOCTL_H */ diff --git a/arch/alpha/include/asm/ioctls.h b/arch/alpha/include/asm/ioctls.h new file mode 100644 index 00000000..80e1cee9 --- /dev/null +++ b/arch/alpha/include/asm/ioctls.h @@ -0,0 +1,114 @@ +#ifndef _ASM_ALPHA_IOCTLS_H +#define _ASM_ALPHA_IOCTLS_H + +#include <asm/ioctl.h> + +#define FIOCLEX _IO('f', 1) +#define FIONCLEX _IO('f', 2) +#define FIOASYNC _IOW('f', 125, int) +#define FIONBIO _IOW('f', 126, int) +#define FIONREAD _IOR('f', 127, int) +#define TIOCINQ FIONREAD +#define FIOQSIZE _IOR('f', 128, loff_t) + +#define TIOCGETP _IOR('t', 8, struct sgttyb) +#define TIOCSETP _IOW('t', 9, struct sgttyb) +#define TIOCSETN _IOW('t', 10, struct sgttyb) /* TIOCSETP wo flush */ + +#define TIOCSETC _IOW('t', 17, struct tchars) +#define TIOCGETC _IOR('t', 18, struct tchars) +#define TCGETS _IOR('t', 19, struct termios) +#define TCSETS _IOW('t', 20, struct termios) +#define TCSETSW _IOW('t', 21, struct termios) +#define TCSETSF _IOW('t', 22, struct termios) + +#define TCGETA _IOR('t', 23, struct termio) +#define TCSETA _IOW('t', 24, struct termio) +#define TCSETAW _IOW('t', 25, struct termio) +#define TCSETAF _IOW('t', 28, struct termio) + +#define TCSBRK _IO('t', 29) +#define TCXONC _IO('t', 30) +#define TCFLSH _IO('t', 31) + +#define TIOCSWINSZ _IOW('t', 103, struct winsize) +#define TIOCGWINSZ _IOR('t', 104, struct winsize) +#define TIOCSTART _IO('t', 110) /* start output, like ^Q */ +#define TIOCSTOP _IO('t', 111) /* stop output, like ^S */ +#define TIOCOUTQ _IOR('t', 115, int) /* output queue size */ + +#define TIOCGLTC _IOR('t', 116, struct ltchars) +#define TIOCSLTC _IOW('t', 117, struct ltchars) +#define TIOCSPGRP _IOW('t', 118, int) +#define TIOCGPGRP _IOR('t', 119, int) + +#define TIOCEXCL 0x540C +#define TIOCNXCL 0x540D +#define TIOCSCTTY 0x540E + +#define TIOCSTI 0x5412 +#define TIOCMGET 0x5415 +#define TIOCMBIS 0x5416 +#define TIOCMBIC 0x5417 +#define TIOCMSET 0x5418 +# define TIOCM_LE 0x001 +# define TIOCM_DTR 0x002 +# define TIOCM_RTS 0x004 +# define TIOCM_ST 0x008 +# define TIOCM_SR 0x010 +# define TIOCM_CTS 0x020 +# define TIOCM_CAR 0x040 +# define TIOCM_RNG 0x080 +# define TIOCM_DSR 0x100 +# define TIOCM_CD TIOCM_CAR +# define TIOCM_RI TIOCM_RNG +# define TIOCM_OUT1 0x2000 +# define TIOCM_OUT2 0x4000 +# define TIOCM_LOOP 0x8000 + +#define TIOCGSOFTCAR 0x5419 +#define TIOCSSOFTCAR 0x541A +#define TIOCLINUX 0x541C +#define TIOCCONS 0x541D +#define TIOCGSERIAL 0x541E +#define TIOCSSERIAL 0x541F +#define TIOCPKT 0x5420 +# define TIOCPKT_DATA 0 +# define TIOCPKT_FLUSHREAD 1 +# define TIOCPKT_FLUSHWRITE 2 +# define TIOCPKT_STOP 4 +# define TIOCPKT_START 8 +# define TIOCPKT_NOSTOP 16 +# define TIOCPKT_DOSTOP 32 +# define TIOCPKT_IOCTL 64 + + +#define TIOCNOTTY 0x5422 +#define TIOCSETD 0x5423 +#define TIOCGETD 0x5424 +#define TCSBRKP 0x5425 /* Needed for POSIX tcsendbreak() */ +#define TIOCSBRK 0x5427 /* BSD compatibility */ +#define TIOCCBRK 0x5428 /* BSD compatibility */ +#define TIOCGSID 0x5429 /* Return the session ID of FD */ +#define TIOCGPTN _IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */ +#define TIOCSPTLCK _IOW('T',0x31, int) /* Lock/unlock Pty */ +#define TIOCGDEV _IOR('T',0x32, unsigned int) /* Get primary device node of /dev/console */ +#define TIOCSIG _IOW('T',0x36, int) /* Generate signal on Pty slave */ +#define TIOCVHANGUP 0x5437 + +#define TIOCSERCONFIG 0x5453 +#define TIOCSERGWILD 0x5454 +#define TIOCSERSWILD 0x5455 +#define TIOCGLCKTRMIOS 0x5456 +#define TIOCSLCKTRMIOS 0x5457 +#define TIOCSERGSTRUCT 0x5458 /* For debugging only */ +#define TIOCSERGETLSR 0x5459 /* Get line status register */ + /* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ +# define TIOCSER_TEMT 0x01 /* Transmitter physically empty */ +#define TIOCSERGETMULTI 0x545A /* Get multiport config */ +#define TIOCSERSETMULTI 0x545B /* Set multiport config */ + +#define TIOCMIWAIT 0x545C /* wait for a change on serial input line(s) */ +#define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */ + +#endif /* _ASM_ALPHA_IOCTLS_H */ diff --git a/arch/alpha/include/asm/ipcbuf.h b/arch/alpha/include/asm/ipcbuf.h new file mode 100644 index 00000000..84c7e51c --- /dev/null +++ b/arch/alpha/include/asm/ipcbuf.h @@ -0,0 +1 @@ +#include <asm-generic/ipcbuf.h> diff --git a/arch/alpha/include/asm/irq.h b/arch/alpha/include/asm/irq.h new file mode 100644 index 00000000..06377400 --- /dev/null +++ b/arch/alpha/include/asm/irq.h @@ -0,0 +1,91 @@ +#ifndef _ALPHA_IRQ_H +#define _ALPHA_IRQ_H + +/* + * linux/include/alpha/irq.h + * + * (C) 1994 Linus Torvalds + */ + +#include <linux/linkage.h> + +#if defined(CONFIG_ALPHA_GENERIC) + +/* Here NR_IRQS is not exact, but rather an upper bound. This is used + many places throughout the kernel to size static arrays. That's ok, + we'll use alpha_mv.nr_irqs when we want the real thing. */ + +/* When LEGACY_START_ADDRESS is selected, we leave out: + TITAN + WILDFIRE + MARVEL + + This helps keep the kernel object size reasonable for the majority + of machines. +*/ + +# if defined(CONFIG_ALPHA_LEGACY_START_ADDRESS) +# define NR_IRQS (128) /* max is RAWHIDE/TAKARA */ +# else +# define NR_IRQS (32768 + 16) /* marvel - 32 pids */ +# endif + +#elif defined(CONFIG_ALPHA_CABRIOLET) || \ + defined(CONFIG_ALPHA_EB66P) || \ + defined(CONFIG_ALPHA_EB164) || \ + defined(CONFIG_ALPHA_PC164) || \ + defined(CONFIG_ALPHA_LX164) +# define NR_IRQS 35 + +#elif defined(CONFIG_ALPHA_EB66) || \ + defined(CONFIG_ALPHA_EB64P) || \ + defined(CONFIG_ALPHA_MIKASA) +# define NR_IRQS 32 + +#elif defined(CONFIG_ALPHA_ALCOR) || \ + defined(CONFIG_ALPHA_MIATA) || \ + defined(CONFIG_ALPHA_RUFFIAN) || \ + defined(CONFIG_ALPHA_RX164) || \ + defined(CONFIG_ALPHA_NORITAKE) +# define NR_IRQS 48 + +#elif defined(CONFIG_ALPHA_SABLE) || \ + defined(CONFIG_ALPHA_SX164) +# define NR_IRQS 40 + +#elif defined(CONFIG_ALPHA_DP264) || \ + defined(CONFIG_ALPHA_LYNX) || \ + defined(CONFIG_ALPHA_SHARK) || \ + defined(CONFIG_ALPHA_EIGER) +# define NR_IRQS 64 + +#elif defined(CONFIG_ALPHA_TITAN) +#define NR_IRQS 80 + +#elif defined(CONFIG_ALPHA_RAWHIDE) || \ + defined(CONFIG_ALPHA_TAKARA) +# define NR_IRQS 128 + +#elif defined(CONFIG_ALPHA_WILDFIRE) +# define NR_IRQS 2048 /* enuff for 8 QBBs */ + +#elif defined(CONFIG_ALPHA_MARVEL) +# define NR_IRQS (32768 + 16) /* marvel - 32 pids*/ + +#else /* everyone else */ +# define NR_IRQS 16 +#endif + +static __inline__ int irq_canonicalize(int irq) +{ + /* + * XXX is this true for all Alpha's? The old serial driver + * did it this way for years without any complaints, so.... + */ + return ((irq == 2) ? 9 : irq); +} + +struct pt_regs; +extern void (*perf_irq)(unsigned long, struct pt_regs *); + +#endif /* _ALPHA_IRQ_H */ diff --git a/arch/alpha/include/asm/irq_regs.h b/arch/alpha/include/asm/irq_regs.h new file mode 100644 index 00000000..3dd9c0b7 --- /dev/null +++ b/arch/alpha/include/asm/irq_regs.h @@ -0,0 +1 @@ +#include <asm-generic/irq_regs.h> diff --git a/arch/alpha/include/asm/irqflags.h b/arch/alpha/include/asm/irqflags.h new file mode 100644 index 00000000..ffb17264 --- /dev/null +++ b/arch/alpha/include/asm/irqflags.h @@ -0,0 +1,67 @@ +#ifndef __ALPHA_IRQFLAGS_H +#define __ALPHA_IRQFLAGS_H + +#include <asm/pal.h> + +#define IPL_MIN 0 +#define IPL_SW0 1 +#define IPL_SW1 2 +#define IPL_DEV0 3 +#define IPL_DEV1 4 +#define IPL_TIMER 5 +#define IPL_PERF 6 +#define IPL_POWERFAIL 6 +#define IPL_MCHECK 7 +#define IPL_MAX 7 + +#ifdef CONFIG_ALPHA_BROKEN_IRQ_MASK +#undef IPL_MIN +#define IPL_MIN __min_ipl +extern int __min_ipl; +#endif + +#define getipl() (rdps() & 7) +#define setipl(ipl) ((void) swpipl(ipl)) + +static inline unsigned long arch_local_save_flags(void) +{ + return rdps(); +} + +static inline void arch_local_irq_disable(void) +{ + setipl(IPL_MAX); + barrier(); +} + +static inline unsigned long arch_local_irq_save(void) +{ + unsigned long flags = swpipl(IPL_MAX); + barrier(); + return flags; +} + +static inline void arch_local_irq_enable(void) +{ + barrier(); + setipl(IPL_MIN); +} + +static inline void arch_local_irq_restore(unsigned long flags) +{ + barrier(); + setipl(flags); + barrier(); +} + +static inline bool arch_irqs_disabled_flags(unsigned long flags) +{ + return flags == IPL_MAX; +} + +static inline bool arch_irqs_disabled(void) +{ + return arch_irqs_disabled_flags(getipl()); +} + +#endif /* __ALPHA_IRQFLAGS_H */ diff --git a/arch/alpha/include/asm/jensen.h b/arch/alpha/include/asm/jensen.h new file mode 100644 index 00000000..964b06ea --- /dev/null +++ b/arch/alpha/include/asm/jensen.h @@ -0,0 +1,346 @@ +#ifndef __ALPHA_JENSEN_H +#define __ALPHA_JENSEN_H + +#include <asm/compiler.h> + +/* + * Defines for the AlphaPC EISA IO and memory address space. + */ + +/* + * NOTE! The memory operations do not set any memory barriers, as it's + * not needed for cases like a frame buffer that is essentially memory-like. + * You need to do them by hand if the operations depend on ordering. + * + * Similarly, the port IO operations do a "mb" only after a write operation: + * if an mb is needed before (as in the case of doing memory mapped IO + * first, and then a port IO operation to the same device), it needs to be + * done by hand. + * + * After the above has bitten me 100 times, I'll give up and just do the + * mb all the time, but right now I'm hoping this will work out. Avoiding + * mb's may potentially be a noticeable speed improvement, but I can't + * honestly say I've tested it. + * + * Handling interrupts that need to do mb's to synchronize to non-interrupts + * is another fun race area. Don't do it (because if you do, I'll have to + * do *everything* with interrupts disabled, ugh). + */ + +/* + * EISA Interrupt Acknowledge address + */ +#define EISA_INTA (IDENT_ADDR + 0x100000000UL) + +/* + * FEPROM addresses + */ +#define EISA_FEPROM0 (IDENT_ADDR + 0x180000000UL) +#define EISA_FEPROM1 (IDENT_ADDR + 0x1A0000000UL) + +/* + * VL82C106 base address + */ +#define EISA_VL82C106 (IDENT_ADDR + 0x1C0000000UL) + +/* + * EISA "Host Address Extension" address (bits 25-31 of the EISA address) + */ +#define EISA_HAE (IDENT_ADDR + 0x1D0000000UL) + +/* + * "SYSCTL" register address + */ +#define EISA_SYSCTL (IDENT_ADDR + 0x1E0000000UL) + +/* + * "spare" register address + */ +#define EISA_SPARE (IDENT_ADDR + 0x1F0000000UL) + +/* + * EISA memory address offset + */ +#define EISA_MEM (IDENT_ADDR + 0x200000000UL) + +/* + * EISA IO address offset + */ +#define EISA_IO (IDENT_ADDR + 0x300000000UL) + + +#ifdef __KERNEL__ + +#ifndef __EXTERN_INLINE +#define __EXTERN_INLINE extern inline +#define __IO_EXTERN_INLINE +#endif + +/* + * Handle the "host address register". This needs to be set + * to the high 7 bits of the EISA address. This is also needed + * for EISA IO addresses, which are only 16 bits wide (the + * hae needs to be set to 0). + * + * HAE isn't needed for the local IO operations, though. + */ + +#define JENSEN_HAE_ADDRESS EISA_HAE +#define JENSEN_HAE_MASK 0x1ffffff + +__EXTERN_INLINE void jensen_set_hae(unsigned long addr) +{ + /* hae on the Jensen is bits 31:25 shifted right */ + addr >>= 25; + if (addr != alpha_mv.hae_cache) + set_hae(addr); +} + +#define vuip volatile unsigned int * + +/* + * IO functions + * + * The "local" functions are those that don't go out to the EISA bus, + * but instead act on the VL82C106 chip directly.. This is mainly the + * keyboard, RTC, printer and first two serial lines.. + * + * The local stuff makes for some complications, but it seems to be + * gone in the PCI version. I hope I can get DEC suckered^H^H^H^H^H^H^H^H + * convinced that I need one of the newer machines. + */ + +static inline unsigned int jensen_local_inb(unsigned long addr) +{ + return 0xff & *(vuip)((addr << 9) + EISA_VL82C106); +} + +static inline void jensen_local_outb(u8 b, unsigned long addr) +{ + *(vuip)((addr << 9) + EISA_VL82C106) = b; + mb(); +} + +static inline unsigned int jensen_bus_inb(unsigned long addr) +{ + long result; + + jensen_set_hae(0); + result = *(volatile int *)((addr << 7) + EISA_IO + 0x00); + return __kernel_extbl(result, addr & 3); +} + +static inline void jensen_bus_outb(u8 b, unsigned long addr) +{ + jensen_set_hae(0); + *(vuip)((addr << 7) + EISA_IO + 0x00) = b * 0x01010101; + mb(); +} + +/* + * It seems gcc is not very good at optimizing away logical + * operations that result in operations across inline functions. + * Which is why this is a macro. + */ + +#define jensen_is_local(addr) ( \ +/* keyboard */ (addr == 0x60 || addr == 0x64) || \ +/* RTC */ (addr == 0x170 || addr == 0x171) || \ +/* mb COM2 */ (addr >= 0x2f8 && addr <= 0x2ff) || \ +/* mb LPT1 */ (addr >= 0x3bc && addr <= 0x3be) || \ +/* mb COM2 */ (addr >= 0x3f8 && addr <= 0x3ff)) + +__EXTERN_INLINE u8 jensen_inb(unsigned long addr) +{ + if (jensen_is_local(addr)) + return jensen_local_inb(addr); + else + return jensen_bus_inb(addr); +} + +__EXTERN_INLINE void jensen_outb(u8 b, unsigned long addr) +{ + if (jensen_is_local(addr)) + jensen_local_outb(b, addr); + else + jensen_bus_outb(b, addr); +} + +__EXTERN_INLINE u16 jensen_inw(unsigned long addr) +{ + long result; + + jensen_set_hae(0); + result = *(volatile int *) ((addr << 7) + EISA_IO + 0x20); + result >>= (addr & 3) * 8; + return 0xffffUL & result; +} + +__EXTERN_INLINE u32 jensen_inl(unsigned long addr) +{ + jensen_set_hae(0); + return *(vuip) ((addr << 7) + EISA_IO + 0x60); +} + +__EXTERN_INLINE void jensen_outw(u16 b, unsigned long addr) +{ + jensen_set_hae(0); + *(vuip) ((addr << 7) + EISA_IO + 0x20) = b * 0x00010001; + mb(); +} + +__EXTERN_INLINE void jensen_outl(u32 b, unsigned long addr) +{ + jensen_set_hae(0); + *(vuip) ((addr << 7) + EISA_IO + 0x60) = b; + mb(); +} + +/* + * Memory functions. + */ + +__EXTERN_INLINE u8 jensen_readb(const volatile void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + long result; + + jensen_set_hae(addr); + addr &= JENSEN_HAE_MASK; + result = *(volatile int *) ((addr << 7) + EISA_MEM + 0x00); + result >>= (addr & 3) * 8; + return 0xffUL & result; +} + +__EXTERN_INLINE u16 jensen_readw(const volatile void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + long result; + + jensen_set_hae(addr); + addr &= JENSEN_HAE_MASK; + result = *(volatile int *) ((addr << 7) + EISA_MEM + 0x20); + result >>= (addr & 3) * 8; + return 0xffffUL & result; +} + +__EXTERN_INLINE u32 jensen_readl(const volatile void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + jensen_set_hae(addr); + addr &= JENSEN_HAE_MASK; + return *(vuip) ((addr << 7) + EISA_MEM + 0x60); +} + +__EXTERN_INLINE u64 jensen_readq(const volatile void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + unsigned long r0, r1; + + jensen_set_hae(addr); + addr &= JENSEN_HAE_MASK; + addr = (addr << 7) + EISA_MEM + 0x60; + r0 = *(vuip) (addr); + r1 = *(vuip) (addr + (4 << 7)); + return r1 << 32 | r0; +} + +__EXTERN_INLINE void jensen_writeb(u8 b, volatile void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + jensen_set_hae(addr); + addr &= JENSEN_HAE_MASK; + *(vuip) ((addr << 7) + EISA_MEM + 0x00) = b * 0x01010101; +} + +__EXTERN_INLINE void jensen_writew(u16 b, volatile void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + jensen_set_hae(addr); + addr &= JENSEN_HAE_MASK; + *(vuip) ((addr << 7) + EISA_MEM + 0x20) = b * 0x00010001; +} + +__EXTERN_INLINE void jensen_writel(u32 b, volatile void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + jensen_set_hae(addr); + addr &= JENSEN_HAE_MASK; + *(vuip) ((addr << 7) + EISA_MEM + 0x60) = b; +} + +__EXTERN_INLINE void jensen_writeq(u64 b, volatile void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + jensen_set_hae(addr); + addr &= JENSEN_HAE_MASK; + addr = (addr << 7) + EISA_MEM + 0x60; + *(vuip) (addr) = b; + *(vuip) (addr + (4 << 7)) = b >> 32; +} + +__EXTERN_INLINE void __iomem *jensen_ioportmap(unsigned long addr) +{ + return (void __iomem *)addr; +} + +__EXTERN_INLINE void __iomem *jensen_ioremap(unsigned long addr, + unsigned long size) +{ + return (void __iomem *)(addr + 0x100000000ul); +} + +__EXTERN_INLINE int jensen_is_ioaddr(unsigned long addr) +{ + return (long)addr >= 0; +} + +__EXTERN_INLINE int jensen_is_mmio(const volatile void __iomem *addr) +{ + return (unsigned long)addr >= 0x100000000ul; +} + +/* New-style ioread interface. All the routines are so ugly for Jensen + that it doesn't make sense to merge them. */ + +#define IOPORT(OS, NS) \ +__EXTERN_INLINE unsigned int jensen_ioread##NS(void __iomem *xaddr) \ +{ \ + if (jensen_is_mmio(xaddr)) \ + return jensen_read##OS(xaddr - 0x100000000ul); \ + else \ + return jensen_in##OS((unsigned long)xaddr); \ +} \ +__EXTERN_INLINE void jensen_iowrite##NS(u##NS b, void __iomem *xaddr) \ +{ \ + if (jensen_is_mmio(xaddr)) \ + jensen_write##OS(b, xaddr - 0x100000000ul); \ + else \ + jensen_out##OS(b, (unsigned long)xaddr); \ +} + +IOPORT(b, 8) +IOPORT(w, 16) +IOPORT(l, 32) + +#undef IOPORT + +#undef vuip + +#undef __IO_PREFIX +#define __IO_PREFIX jensen +#define jensen_trivial_rw_bw 0 +#define jensen_trivial_rw_lq 0 +#define jensen_trivial_io_bw 0 +#define jensen_trivial_io_lq 0 +#define jensen_trivial_iounmap 1 +#include <asm/io_trivial.h> + +#ifdef __IO_EXTERN_INLINE +#undef __EXTERN_INLINE +#undef __IO_EXTERN_INLINE +#endif + +#endif /* __KERNEL__ */ + +#endif /* __ALPHA_JENSEN_H */ diff --git a/arch/alpha/include/asm/kdebug.h b/arch/alpha/include/asm/kdebug.h new file mode 100644 index 00000000..6ece1b03 --- /dev/null +++ b/arch/alpha/include/asm/kdebug.h @@ -0,0 +1 @@ +#include <asm-generic/kdebug.h> diff --git a/arch/alpha/include/asm/kmap_types.h b/arch/alpha/include/asm/kmap_types.h new file mode 100644 index 00000000..a8d4ec8e --- /dev/null +++ b/arch/alpha/include/asm/kmap_types.h @@ -0,0 +1,14 @@ +#ifndef _ASM_KMAP_TYPES_H +#define _ASM_KMAP_TYPES_H + +/* Dummy header just to define km_type. */ + +#ifdef CONFIG_DEBUG_HIGHMEM +#define __WITH_KM_FENCE +#endif + +#include <asm-generic/kmap_types.h> + +#undef __WITH_KM_FENCE + +#endif diff --git a/arch/alpha/include/asm/linkage.h b/arch/alpha/include/asm/linkage.h new file mode 100644 index 00000000..291c2d01 --- /dev/null +++ b/arch/alpha/include/asm/linkage.h @@ -0,0 +1,6 @@ +#ifndef __ASM_LINKAGE_H +#define __ASM_LINKAGE_H + +/* Nothing to see here... */ + +#endif diff --git a/arch/alpha/include/asm/local.h b/arch/alpha/include/asm/local.h new file mode 100644 index 00000000..9c94b845 --- /dev/null +++ b/arch/alpha/include/asm/local.h @@ -0,0 +1,101 @@ +#ifndef _ALPHA_LOCAL_H +#define _ALPHA_LOCAL_H + +#include <linux/percpu.h> +#include <linux/atomic.h> + +typedef struct +{ + atomic_long_t a; +} local_t; + +#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) } +#define local_read(l) atomic_long_read(&(l)->a) +#define local_set(l,i) atomic_long_set(&(l)->a, (i)) +#define local_inc(l) atomic_long_inc(&(l)->a) +#define local_dec(l) atomic_long_dec(&(l)->a) +#define local_add(i,l) atomic_long_add((i),(&(l)->a)) +#define local_sub(i,l) atomic_long_sub((i),(&(l)->a)) + +static __inline__ long local_add_return(long i, local_t * l) +{ + long temp, result; + __asm__ __volatile__( + "1: ldq_l %0,%1\n" + " addq %0,%3,%2\n" + " addq %0,%3,%0\n" + " stq_c %0,%1\n" + " beq %0,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + :"=&r" (temp), "=m" (l->a.counter), "=&r" (result) + :"Ir" (i), "m" (l->a.counter) : "memory"); + return result; +} + +static __inline__ long local_sub_return(long i, local_t * l) +{ + long temp, result; + __asm__ __volatile__( + "1: ldq_l %0,%1\n" + " subq %0,%3,%2\n" + " subq %0,%3,%0\n" + " stq_c %0,%1\n" + " beq %0,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + :"=&r" (temp), "=m" (l->a.counter), "=&r" (result) + :"Ir" (i), "m" (l->a.counter) : "memory"); + return result; +} + +#define local_cmpxchg(l, o, n) \ + (cmpxchg_local(&((l)->a.counter), (o), (n))) +#define local_xchg(l, n) (xchg_local(&((l)->a.counter), (n))) + +/** + * local_add_unless - add unless the number is a given value + * @l: pointer of type local_t + * @a: the amount to add to l... + * @u: ...unless l is equal to u. + * + * Atomically adds @a to @l, so long as it was not @u. + * Returns non-zero if @l was not @u, and zero otherwise. + */ +#define local_add_unless(l, a, u) \ +({ \ + long c, old; \ + c = local_read(l); \ + for (;;) { \ + if (unlikely(c == (u))) \ + break; \ + old = local_cmpxchg((l), c, c + (a)); \ + if (likely(old == c)) \ + break; \ + c = old; \ + } \ + c != (u); \ +}) +#define local_inc_not_zero(l) local_add_unless((l), 1, 0) + +#define local_add_negative(a, l) (local_add_return((a), (l)) < 0) + +#define local_dec_return(l) local_sub_return(1,(l)) + +#define local_inc_return(l) local_add_return(1,(l)) + +#define local_sub_and_test(i,l) (local_sub_return((i), (l)) == 0) + +#define local_inc_and_test(l) (local_add_return(1, (l)) == 0) + +#define local_dec_and_test(l) (local_sub_return(1, (l)) == 0) + +/* Verify if faster than atomic ops */ +#define __local_inc(l) ((l)->a.counter++) +#define __local_dec(l) ((l)->a.counter++) +#define __local_add(i,l) ((l)->a.counter+=(i)) +#define __local_sub(i,l) ((l)->a.counter-=(i)) + +#endif /* _ALPHA_LOCAL_H */ diff --git a/arch/alpha/include/asm/local64.h b/arch/alpha/include/asm/local64.h new file mode 100644 index 00000000..36c93b5c --- /dev/null +++ b/arch/alpha/include/asm/local64.h @@ -0,0 +1 @@ +#include <asm-generic/local64.h> diff --git a/arch/alpha/include/asm/machvec.h b/arch/alpha/include/asm/machvec.h new file mode 100644 index 00000000..72dbf235 --- /dev/null +++ b/arch/alpha/include/asm/machvec.h @@ -0,0 +1,138 @@ +#ifndef __ALPHA_MACHVEC_H +#define __ALPHA_MACHVEC_H 1 + +#include <linux/types.h> + +/* + * This file gets pulled in by asm/io.h from user space. We don't + * want most of this escaping. + */ + +#ifdef __KERNEL__ + +/* The following structure vectors all of the I/O and IRQ manipulation + from the generic kernel to the hardware specific backend. */ + +struct task_struct; +struct mm_struct; +struct vm_area_struct; +struct linux_hose_info; +struct pci_dev; +struct pci_ops; +struct pci_controller; +struct _alpha_agp_info; +struct rtc_time; + +struct alpha_machine_vector +{ + /* This "belongs" down below with the rest of the runtime + variables, but it is convenient for entry.S if these + two slots are at the beginning of the struct. */ + unsigned long hae_cache; + unsigned long *hae_register; + + int nr_irqs; + int rtc_port; + unsigned int max_asn; + unsigned long max_isa_dma_address; + unsigned long irq_probe_mask; + unsigned long iack_sc; + unsigned long min_io_address; + unsigned long min_mem_address; + unsigned long pci_dac_offset; + + void (*mv_pci_tbi)(struct pci_controller *hose, + dma_addr_t start, dma_addr_t end); + + unsigned int (*mv_ioread8)(void __iomem *); + unsigned int (*mv_ioread16)(void __iomem *); + unsigned int (*mv_ioread32)(void __iomem *); + + void (*mv_iowrite8)(u8, void __iomem *); + void (*mv_iowrite16)(u16, void __iomem *); + void (*mv_iowrite32)(u32, void __iomem *); + + u8 (*mv_readb)(const volatile void __iomem *); + u16 (*mv_readw)(const volatile void __iomem *); + u32 (*mv_readl)(const volatile void __iomem *); + u64 (*mv_readq)(const volatile void __iomem *); + + void (*mv_writeb)(u8, volatile void __iomem *); + void (*mv_writew)(u16, volatile void __iomem *); + void (*mv_writel)(u32, volatile void __iomem *); + void (*mv_writeq)(u64, volatile void __iomem *); + + void __iomem *(*mv_ioportmap)(unsigned long); + void __iomem *(*mv_ioremap)(unsigned long, unsigned long); + void (*mv_iounmap)(volatile void __iomem *); + int (*mv_is_ioaddr)(unsigned long); + int (*mv_is_mmio)(const volatile void __iomem *); + + void (*mv_switch_mm)(struct mm_struct *, struct mm_struct *, + struct task_struct *); + void (*mv_activate_mm)(struct mm_struct *, struct mm_struct *); + + void (*mv_flush_tlb_current)(struct mm_struct *); + void (*mv_flush_tlb_current_page)(struct mm_struct * mm, + struct vm_area_struct *vma, + unsigned long addr); + + void (*update_irq_hw)(unsigned long, unsigned long, int); + void (*ack_irq)(unsigned long); + void (*device_interrupt)(unsigned long vector); + void (*machine_check)(unsigned long vector, unsigned long la); + + void (*smp_callin)(void); + void (*init_arch)(void); + void (*init_irq)(void); + void (*init_rtc)(void); + void (*init_pci)(void); + void (*kill_arch)(int); + + u8 (*pci_swizzle)(struct pci_dev *, u8 *); + int (*pci_map_irq)(const struct pci_dev *, u8, u8); + struct pci_ops *pci_ops; + + struct _alpha_agp_info *(*agp_info)(void); + + unsigned int (*rtc_get_time)(struct rtc_time *); + int (*rtc_set_time)(struct rtc_time *); + + const char *vector_name; + + /* NUMA information */ + int (*pa_to_nid)(unsigned long); + int (*cpuid_to_nid)(int); + unsigned long (*node_mem_start)(int); + unsigned long (*node_mem_size)(int); + + /* System specific parameters. */ + union { + struct { + unsigned long gru_int_req_bits; + } cia; + + struct { + unsigned long gamma_bias; + } t2; + + struct { + unsigned int route_tab; + } sio; + } sys; +}; + +extern struct alpha_machine_vector alpha_mv; + +#ifdef CONFIG_ALPHA_GENERIC +extern int alpha_using_srm; +#else +#ifdef CONFIG_ALPHA_SRM +#define alpha_using_srm 1 +#else +#define alpha_using_srm 0 +#endif +#endif /* GENERIC */ + +#endif +#endif /* __ALPHA_MACHVEC_H */ diff --git a/arch/alpha/include/asm/mc146818rtc.h b/arch/alpha/include/asm/mc146818rtc.h new file mode 100644 index 00000000..097703f1 --- /dev/null +++ b/arch/alpha/include/asm/mc146818rtc.h @@ -0,0 +1,27 @@ +/* + * Machine dependent access functions for RTC registers. + */ +#ifndef __ASM_ALPHA_MC146818RTC_H +#define __ASM_ALPHA_MC146818RTC_H + +#include <asm/io.h> + +#ifndef RTC_PORT +#define RTC_PORT(x) (0x70 + (x)) +#define RTC_ALWAYS_BCD 1 /* RTC operates in binary mode */ +#endif + +/* + * The yet supported machines all access the RTC index register via + * an ISA port access but the way to access the date register differs ... + */ +#define CMOS_READ(addr) ({ \ +outb_p((addr),RTC_PORT(0)); \ +inb_p(RTC_PORT(1)); \ +}) +#define CMOS_WRITE(val, addr) ({ \ +outb_p((addr),RTC_PORT(0)); \ +outb_p((val),RTC_PORT(1)); \ +}) + +#endif /* __ASM_ALPHA_MC146818RTC_H */ diff --git a/arch/alpha/include/asm/mce.h b/arch/alpha/include/asm/mce.h new file mode 100644 index 00000000..660285b9 --- /dev/null +++ b/arch/alpha/include/asm/mce.h @@ -0,0 +1,83 @@ +#ifndef __ALPHA_MCE_H +#define __ALPHA_MCE_H + +/* + * This is the logout header that should be common to all platforms + * (assuming they are running OSF/1 PALcode, I guess). + */ +struct el_common { + unsigned int size; /* size in bytes of logout area */ + unsigned int sbz1 : 30; /* should be zero */ + unsigned int err2 : 1; /* second error */ + unsigned int retry : 1; /* retry flag */ + unsigned int proc_offset; /* processor-specific offset */ + unsigned int sys_offset; /* system-specific offset */ + unsigned int code; /* machine check code */ + unsigned int frame_rev; /* frame revision */ +}; + +/* Machine Check Frame for uncorrectable errors (Large format) + * --- This is used to log uncorrectable errors such as + * double bit ECC errors. + * --- These errors are detected by both processor and systems. + */ +struct el_common_EV5_uncorrectable_mcheck { + unsigned long shadow[8]; /* Shadow reg. 8-14, 25 */ + unsigned long paltemp[24]; /* PAL TEMP REGS. */ + unsigned long exc_addr; /* Address of excepting instruction*/ + unsigned long exc_sum; /* Summary of arithmetic traps. */ + unsigned long exc_mask; /* Exception mask (from exc_sum). */ + unsigned long pal_base; /* Base address for PALcode. */ + unsigned long isr; /* Interrupt Status Reg. */ + unsigned long icsr; /* CURRENT SETUP OF EV5 IBOX */ + unsigned long ic_perr_stat; /* I-CACHE Reg. <11> set Data parity + <12> set TAG parity*/ + unsigned long dc_perr_stat; /* D-CACHE error Reg. Bits set to 1: + <2> Data error in bank 0 + <3> Data error in bank 1 + <4> Tag error in bank 0 + <5> Tag error in bank 1 */ + unsigned long va; /* Effective VA of fault or miss. */ + unsigned long mm_stat; /* Holds the reason for D-stream + fault or D-cache parity errors */ + unsigned long sc_addr; /* Address that was being accessed + when EV5 detected Secondary cache + failure. */ + unsigned long sc_stat; /* Helps determine if the error was + TAG/Data parity(Secondary Cache)*/ + unsigned long bc_tag_addr; /* Contents of EV5 BC_TAG_ADDR */ + unsigned long ei_addr; /* Physical address of any transfer + that is logged in EV5 EI_STAT */ + unsigned long fill_syndrome; /* For correcting ECC errors. */ + unsigned long ei_stat; /* Helps identify reason of any + processor uncorrectable error + at its external interface. */ + unsigned long ld_lock; /* Contents of EV5 LD_LOCK register*/ +}; + +struct el_common_EV6_mcheck { + unsigned int FrameSize; /* Bytes, including this field */ + unsigned int FrameFlags; /* <31> = Retry, <30> = Second Error */ + unsigned int CpuOffset; /* Offset to CPU-specific info */ + unsigned int SystemOffset; /* Offset to system-specific info */ + unsigned int MCHK_Code; + unsigned int MCHK_Frame_Rev; + unsigned long I_STAT; /* EV6 Internal Processor Registers */ + unsigned long DC_STAT; /* (See the 21264 Spec) */ + unsigned long C_ADDR; + unsigned long DC1_SYNDROME; + unsigned long DC0_SYNDROME; + unsigned long C_STAT; + unsigned long C_STS; + unsigned long MM_STAT; + unsigned long EXC_ADDR; + unsigned long IER_CM; + unsigned long ISUM; + unsigned long RESERVED0; + unsigned long PAL_BASE; + unsigned long I_CTL; + unsigned long PCTX; +}; + + +#endif /* __ALPHA_MCE_H */ diff --git a/arch/alpha/include/asm/mman.h b/arch/alpha/include/asm/mman.h new file mode 100644 index 00000000..cbeb3616 --- /dev/null +++ b/arch/alpha/include/asm/mman.h @@ -0,0 +1,66 @@ +#ifndef __ALPHA_MMAN_H__ +#define __ALPHA_MMAN_H__ + +#define PROT_READ 0x1 /* page can be read */ +#define PROT_WRITE 0x2 /* page can be written */ +#define PROT_EXEC 0x4 /* page can be executed */ +#define PROT_SEM 0x8 /* page may be used for atomic ops */ +#define PROT_NONE 0x0 /* page can not be accessed */ +#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ +#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ + +#define MAP_SHARED 0x01 /* Share changes */ +#define MAP_PRIVATE 0x02 /* Changes are private */ +#define MAP_TYPE 0x0f /* Mask for type of mapping (OSF/1 is _wrong_) */ +#define MAP_FIXED 0x100 /* Interpret addr exactly */ +#define MAP_ANONYMOUS 0x10 /* don't use a file */ + +/* not used by linux, but here to make sure we don't clash with OSF/1 defines */ +#define _MAP_HASSEMAPHORE 0x0200 +#define _MAP_INHERIT 0x0400 +#define _MAP_UNALIGNED 0x0800 + +/* These are linux-specific */ +#define MAP_GROWSDOWN 0x01000 /* stack-like segment */ +#define MAP_DENYWRITE 0x02000 /* ETXTBSY */ +#define MAP_EXECUTABLE 0x04000 /* mark it as an executable */ +#define MAP_LOCKED 0x08000 /* lock the mapping */ +#define MAP_NORESERVE 0x10000 /* don't check for reservations */ +#define MAP_POPULATE 0x20000 /* populate (prefault) pagetables */ +#define MAP_NONBLOCK 0x40000 /* do not block on IO */ +#define MAP_STACK 0x80000 /* give out an address that is best suited for process/thread stacks */ +#define MAP_HUGETLB 0x100000 /* create a huge page mapping */ + +#define MS_ASYNC 1 /* sync memory asynchronously */ +#define MS_SYNC 2 /* synchronous memory sync */ +#define MS_INVALIDATE 4 /* invalidate the caches */ + +#define MCL_CURRENT 8192 /* lock all currently mapped pages */ +#define MCL_FUTURE 16384 /* lock all additions to address space */ + +#define MADV_NORMAL 0 /* no further special treatment */ +#define MADV_RANDOM 1 /* expect random page references */ +#define MADV_SEQUENTIAL 2 /* expect sequential page references */ +#define MADV_WILLNEED 3 /* will need these pages */ +#define MADV_SPACEAVAIL 5 /* ensure resources are available */ +#define MADV_DONTNEED 6 /* don't need these pages */ + +/* common/generic parameters */ +#define MADV_REMOVE 9 /* remove these pages & resources */ +#define MADV_DONTFORK 10 /* don't inherit across fork */ +#define MADV_DOFORK 11 /* do inherit across fork */ + +#define MADV_MERGEABLE 12 /* KSM may merge identical pages */ +#define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */ + +#define MADV_HUGEPAGE 14 /* Worth backing with hugepages */ +#define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages */ + +#define MADV_DONTDUMP 16 /* Explicity exclude from the core dump, + overrides the coredump filter bits */ +#define MADV_DODUMP 17 /* Clear the MADV_NODUMP flag */ + +/* compatibility flags */ +#define MAP_FILE 0 + +#endif /* __ALPHA_MMAN_H__ */ diff --git a/arch/alpha/include/asm/mmu.h b/arch/alpha/include/asm/mmu.h new file mode 100644 index 00000000..3dc12777 --- /dev/null +++ b/arch/alpha/include/asm/mmu.h @@ -0,0 +1,7 @@ +#ifndef __ALPHA_MMU_H +#define __ALPHA_MMU_H + +/* The alpha MMU context is one "unsigned long" bitmap per CPU */ +typedef unsigned long mm_context_t[NR_CPUS]; + +#endif diff --git a/arch/alpha/include/asm/mmu_context.h b/arch/alpha/include/asm/mmu_context.h new file mode 100644 index 00000000..4c51c053 --- /dev/null +++ b/arch/alpha/include/asm/mmu_context.h @@ -0,0 +1,259 @@ +#ifndef __ALPHA_MMU_CONTEXT_H +#define __ALPHA_MMU_CONTEXT_H + +/* + * get a new mmu context.. + * + * Copyright (C) 1996, Linus Torvalds + */ + +#include <asm/machvec.h> +#include <asm/compiler.h> +#include <asm-generic/mm_hooks.h> + +/* + * Force a context reload. This is needed when we change the page + * table pointer or when we update the ASN of the current process. + */ + +/* Don't get into trouble with dueling __EXTERN_INLINEs. */ +#ifndef __EXTERN_INLINE +#include <asm/io.h> +#endif + + +static inline unsigned long +__reload_thread(struct pcb_struct *pcb) +{ + register unsigned long a0 __asm__("$16"); + register unsigned long v0 __asm__("$0"); + + a0 = virt_to_phys(pcb); + __asm__ __volatile__( + "call_pal %2 #__reload_thread" + : "=r"(v0), "=r"(a0) + : "i"(PAL_swpctx), "r"(a0) + : "$1", "$22", "$23", "$24", "$25"); + + return v0; +} + + +/* + * The maximum ASN's the processor supports. On the EV4 this is 63 + * but the PAL-code doesn't actually use this information. On the + * EV5 this is 127, and EV6 has 255. + * + * On the EV4, the ASNs are more-or-less useless anyway, as they are + * only used as an icache tag, not for TB entries. On the EV5 and EV6, + * ASN's also validate the TB entries, and thus make a lot more sense. + * + * The EV4 ASN's don't even match the architecture manual, ugh. And + * I quote: "If a processor implements address space numbers (ASNs), + * and the old PTE has the Address Space Match (ASM) bit clear (ASNs + * in use) and the Valid bit set, then entries can also effectively be + * made coherent by assigning a new, unused ASN to the currently + * running process and not reusing the previous ASN before calling the + * appropriate PALcode routine to invalidate the translation buffer (TB)". + * + * In short, the EV4 has a "kind of" ASN capability, but it doesn't actually + * work correctly and can thus not be used (explaining the lack of PAL-code + * support). + */ +#define EV4_MAX_ASN 63 +#define EV5_MAX_ASN 127 +#define EV6_MAX_ASN 255 + +#ifdef CONFIG_ALPHA_GENERIC +# define MAX_ASN (alpha_mv.max_asn) +#else +# ifdef CONFIG_ALPHA_EV4 +# define MAX_ASN EV4_MAX_ASN +# elif defined(CONFIG_ALPHA_EV5) +# define MAX_ASN EV5_MAX_ASN +# else +# define MAX_ASN EV6_MAX_ASN +# endif +#endif + +/* + * cpu_last_asn(processor): + * 63 0 + * +-------------+----------------+--------------+ + * | asn version | this processor | hardware asn | + * +-------------+----------------+--------------+ + */ + +#include <asm/smp.h> +#ifdef CONFIG_SMP +#define cpu_last_asn(cpuid) (cpu_data[cpuid].last_asn) +#else +extern unsigned long last_asn; +#define cpu_last_asn(cpuid) last_asn +#endif /* CONFIG_SMP */ + +#define WIDTH_HARDWARE_ASN 8 +#define ASN_FIRST_VERSION (1UL << WIDTH_HARDWARE_ASN) +#define HARDWARE_ASN_MASK ((1UL << WIDTH_HARDWARE_ASN) - 1) + +/* + * NOTE! The way this is set up, the high bits of the "asn_cache" (and + * the "mm->context") are the ASN _version_ code. A version of 0 is + * always considered invalid, so to invalidate another process you only + * need to do "p->mm->context = 0". + * + * If we need more ASN's than the processor has, we invalidate the old + * user TLB's (tbiap()) and start a new ASN version. That will automatically + * force a new asn for any other processes the next time they want to + * run. + */ + +#ifndef __EXTERN_INLINE +#define __EXTERN_INLINE extern inline +#define __MMU_EXTERN_INLINE +#endif + +extern inline unsigned long +__get_new_mm_context(struct mm_struct *mm, long cpu) +{ + unsigned long asn = cpu_last_asn(cpu); + unsigned long next = asn + 1; + + if ((asn & HARDWARE_ASN_MASK) >= MAX_ASN) { + tbiap(); + imb(); + next = (asn & ~HARDWARE_ASN_MASK) + ASN_FIRST_VERSION; + } + cpu_last_asn(cpu) = next; + return next; +} + +__EXTERN_INLINE void +ev5_switch_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm, + struct task_struct *next) +{ + /* Check if our ASN is of an older version, and thus invalid. */ + unsigned long asn; + unsigned long mmc; + long cpu = smp_processor_id(); + +#ifdef CONFIG_SMP + cpu_data[cpu].asn_lock = 1; + barrier(); +#endif + asn = cpu_last_asn(cpu); + mmc = next_mm->context[cpu]; + if ((mmc ^ asn) & ~HARDWARE_ASN_MASK) { + mmc = __get_new_mm_context(next_mm, cpu); + next_mm->context[cpu] = mmc; + } +#ifdef CONFIG_SMP + else + cpu_data[cpu].need_new_asn = 1; +#endif + + /* Always update the PCB ASN. Another thread may have allocated + a new mm->context (via flush_tlb_mm) without the ASN serial + number wrapping. We have no way to detect when this is needed. */ + task_thread_info(next)->pcb.asn = mmc & HARDWARE_ASN_MASK; +} + +__EXTERN_INLINE void +ev4_switch_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm, + struct task_struct *next) +{ + /* As described, ASN's are broken for TLB usage. But we can + optimize for switching between threads -- if the mm is + unchanged from current we needn't flush. */ + /* ??? May not be needed because EV4 PALcode recognizes that + ASN's are broken and does a tbiap itself on swpctx, under + the "Must set ASN or flush" rule. At least this is true + for a 1992 SRM, reports Joseph Martin (jmartin@hlo.dec.com). + I'm going to leave this here anyway, just to Be Sure. -- r~ */ + if (prev_mm != next_mm) + tbiap(); + + /* Do continue to allocate ASNs, because we can still use them + to avoid flushing the icache. */ + ev5_switch_mm(prev_mm, next_mm, next); +} + +extern void __load_new_mm_context(struct mm_struct *); + +#ifdef CONFIG_SMP +#define check_mmu_context() \ +do { \ + int cpu = smp_processor_id(); \ + cpu_data[cpu].asn_lock = 0; \ + barrier(); \ + if (cpu_data[cpu].need_new_asn) { \ + struct mm_struct * mm = current->active_mm; \ + cpu_data[cpu].need_new_asn = 0; \ + if (!mm->context[cpu]) \ + __load_new_mm_context(mm); \ + } \ +} while(0) +#else +#define check_mmu_context() do { } while(0) +#endif + +__EXTERN_INLINE void +ev5_activate_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm) +{ + __load_new_mm_context(next_mm); +} + +__EXTERN_INLINE void +ev4_activate_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm) +{ + __load_new_mm_context(next_mm); + tbiap(); +} + +#define deactivate_mm(tsk,mm) do { } while (0) + +#ifdef CONFIG_ALPHA_GENERIC +# define switch_mm(a,b,c) alpha_mv.mv_switch_mm((a),(b),(c)) +# define activate_mm(x,y) alpha_mv.mv_activate_mm((x),(y)) +#else +# ifdef CONFIG_ALPHA_EV4 +# define switch_mm(a,b,c) ev4_switch_mm((a),(b),(c)) +# define activate_mm(x,y) ev4_activate_mm((x),(y)) +# else +# define switch_mm(a,b,c) ev5_switch_mm((a),(b),(c)) +# define activate_mm(x,y) ev5_activate_mm((x),(y)) +# endif +#endif + +static inline int +init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + int i; + + for_each_online_cpu(i) + mm->context[i] = 0; + if (tsk != current) + task_thread_info(tsk)->pcb.ptbr + = ((unsigned long)mm->pgd - IDENT_ADDR) >> PAGE_SHIFT; + return 0; +} + +extern inline void +destroy_context(struct mm_struct *mm) +{ + /* Nothing to do. */ +} + +static inline void +enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) +{ + task_thread_info(tsk)->pcb.ptbr + = ((unsigned long)mm->pgd - IDENT_ADDR) >> PAGE_SHIFT; +} + +#ifdef __MMU_EXTERN_INLINE +#undef __EXTERN_INLINE +#undef __MMU_EXTERN_INLINE +#endif + +#endif /* __ALPHA_MMU_CONTEXT_H */ diff --git a/arch/alpha/include/asm/mmzone.h b/arch/alpha/include/asm/mmzone.h new file mode 100644 index 00000000..445dc42e --- /dev/null +++ b/arch/alpha/include/asm/mmzone.h @@ -0,0 +1,114 @@ +/* + * Written by Kanoj Sarcar (kanoj@sgi.com) Aug 99 + * Adapted for the alpha wildfire architecture Jan 2001. + */ +#ifndef _ASM_MMZONE_H_ +#define _ASM_MMZONE_H_ + +#include <asm/smp.h> + +struct bootmem_data_t; /* stupid forward decl. */ + +/* + * Following are macros that are specific to this numa platform. + */ + +extern pg_data_t node_data[]; + +#define alpha_pa_to_nid(pa) \ + (alpha_mv.pa_to_nid \ + ? alpha_mv.pa_to_nid(pa) \ + : (0)) +#define node_mem_start(nid) \ + (alpha_mv.node_mem_start \ + ? alpha_mv.node_mem_start(nid) \ + : (0UL)) +#define node_mem_size(nid) \ + (alpha_mv.node_mem_size \ + ? alpha_mv.node_mem_size(nid) \ + : ((nid) ? (0UL) : (~0UL))) + +#define pa_to_nid(pa) alpha_pa_to_nid(pa) +#define NODE_DATA(nid) (&node_data[(nid)]) + +#define node_localnr(pfn, nid) ((pfn) - NODE_DATA(nid)->node_start_pfn) + +#if 1 +#define PLAT_NODE_DATA_LOCALNR(p, n) \ + (((p) >> PAGE_SHIFT) - PLAT_NODE_DATA(n)->gendata.node_start_pfn) +#else +static inline unsigned long +PLAT_NODE_DATA_LOCALNR(unsigned long p, int n) +{ + unsigned long temp; + temp = p >> PAGE_SHIFT; + return temp - PLAT_NODE_DATA(n)->gendata.node_start_pfn; +} +#endif + +#ifdef CONFIG_DISCONTIGMEM + +/* + * Following are macros that each numa implementation must define. + */ + +/* + * Given a kernel address, find the home node of the underlying memory. + */ +#define kvaddr_to_nid(kaddr) pa_to_nid(__pa(kaddr)) + +/* + * Given a kaddr, LOCAL_BASE_ADDR finds the owning node of the memory + * and returns the kaddr corresponding to first physical page in the + * node's mem_map. + */ +#define LOCAL_BASE_ADDR(kaddr) \ + ((unsigned long)__va(NODE_DATA(kvaddr_to_nid(kaddr))->node_start_pfn \ + << PAGE_SHIFT)) + +/* XXX: FIXME -- wli */ +#define kern_addr_valid(kaddr) (0) + +#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) + +#define VALID_PAGE(page) (((page) - mem_map) < max_mapnr) + +#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> 32)) +#define pgd_page(pgd) (pfn_to_page(pgd_val(pgd) >> 32)) +#define pte_pfn(pte) (pte_val(pte) >> 32) + +#define mk_pte(page, pgprot) \ +({ \ + pte_t pte; \ + unsigned long pfn; \ + \ + pfn = page_to_pfn(page) << 32; \ + pte_val(pte) = pfn | pgprot_val(pgprot); \ + \ + pte; \ +}) + +#define pte_page(x) \ +({ \ + unsigned long kvirt; \ + struct page * __xx; \ + \ + kvirt = (unsigned long)__va(pte_val(x) >> (32-PAGE_SHIFT)); \ + __xx = virt_to_page(kvirt); \ + \ + __xx; \ +}) + +#define page_to_pa(page) \ + (page_to_pfn(page) << PAGE_SHIFT) + +#define pfn_to_nid(pfn) pa_to_nid(((u64)(pfn) << PAGE_SHIFT)) +#define pfn_valid(pfn) \ + (((pfn) - node_start_pfn(pfn_to_nid(pfn))) < \ + node_spanned_pages(pfn_to_nid(pfn))) \ + +#define virt_addr_valid(kaddr) pfn_valid((__pa(kaddr) >> PAGE_SHIFT)) + +#endif /* CONFIG_DISCONTIGMEM */ + +#endif /* _ASM_MMZONE_H_ */ diff --git a/arch/alpha/include/asm/module.h b/arch/alpha/include/asm/module.h new file mode 100644 index 00000000..7b63743c --- /dev/null +++ b/arch/alpha/include/asm/module.h @@ -0,0 +1,23 @@ +#ifndef _ALPHA_MODULE_H +#define _ALPHA_MODULE_H + +struct mod_arch_specific +{ + unsigned int gotsecindex; +}; + +#define Elf_Sym Elf64_Sym +#define Elf_Shdr Elf64_Shdr +#define Elf_Ehdr Elf64_Ehdr +#define Elf_Phdr Elf64_Phdr +#define Elf_Dyn Elf64_Dyn +#define Elf_Rel Elf64_Rel +#define Elf_Rela Elf64_Rela + +#define ARCH_SHF_SMALL SHF_ALPHA_GPREL + +#ifdef MODULE +asm(".section .got,\"aws\",@progbits; .align 3; .previous"); +#endif + +#endif /*_ALPHA_MODULE_H*/ diff --git a/arch/alpha/include/asm/msgbuf.h b/arch/alpha/include/asm/msgbuf.h new file mode 100644 index 00000000..98496501 --- /dev/null +++ b/arch/alpha/include/asm/msgbuf.h @@ -0,0 +1,27 @@ +#ifndef _ALPHA_MSGBUF_H +#define _ALPHA_MSGBUF_H + +/* + * The msqid64_ds structure for alpha architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 2 miscellaneous 64-bit values + */ + +struct msqid64_ds { + struct ipc64_perm msg_perm; + __kernel_time_t msg_stime; /* last msgsnd time */ + __kernel_time_t msg_rtime; /* last msgrcv time */ + __kernel_time_t msg_ctime; /* last change time */ + unsigned long msg_cbytes; /* current number of bytes on queue */ + unsigned long msg_qnum; /* number of messages in queue */ + unsigned long msg_qbytes; /* max number of bytes on queue */ + __kernel_pid_t msg_lspid; /* pid of last msgsnd */ + __kernel_pid_t msg_lrpid; /* last receive pid */ + unsigned long __unused1; + unsigned long __unused2; +}; + +#endif /* _ALPHA_MSGBUF_H */ diff --git a/arch/alpha/include/asm/mutex.h b/arch/alpha/include/asm/mutex.h new file mode 100644 index 00000000..458c1f7f --- /dev/null +++ b/arch/alpha/include/asm/mutex.h @@ -0,0 +1,9 @@ +/* + * Pull in the generic implementation for the mutex fastpath. + * + * TODO: implement optimized primitives instead, or leave the generic + * implementation in place, or pick the atomic_xchg() based generic + * implementation. (see asm-generic/mutex-xchg.h for details) + */ + +#include <asm-generic/mutex-dec.h> diff --git a/arch/alpha/include/asm/page.h b/arch/alpha/include/asm/page.h new file mode 100644 index 00000000..07af0625 --- /dev/null +++ b/arch/alpha/include/asm/page.h @@ -0,0 +1,98 @@ +#ifndef _ALPHA_PAGE_H +#define _ALPHA_PAGE_H + +#include <linux/const.h> +#include <asm/pal.h> + +/* PAGE_SHIFT determines the page size */ +#define PAGE_SHIFT 13 +#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) + +#ifndef __ASSEMBLY__ + +#define STRICT_MM_TYPECHECKS + +extern void clear_page(void *page); +#define clear_user_page(page, vaddr, pg) clear_page(page) + +#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ + alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vmaddr) +#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE + +extern void copy_page(void * _to, void * _from); +#define copy_user_page(to, from, vaddr, pg) copy_page(to, from) + +#ifdef STRICT_MM_TYPECHECKS +/* + * These are used to make use of C type-checking.. + */ +typedef struct { unsigned long pte; } pte_t; +typedef struct { unsigned long pmd; } pmd_t; +typedef struct { unsigned long pgd; } pgd_t; +typedef struct { unsigned long pgprot; } pgprot_t; + +#define pte_val(x) ((x).pte) +#define pmd_val(x) ((x).pmd) +#define pgd_val(x) ((x).pgd) +#define pgprot_val(x) ((x).pgprot) + +#define __pte(x) ((pte_t) { (x) } ) +#define __pmd(x) ((pmd_t) { (x) } ) +#define __pgd(x) ((pgd_t) { (x) } ) +#define __pgprot(x) ((pgprot_t) { (x) } ) + +#else +/* + * .. while these make it easier on the compiler + */ +typedef unsigned long pte_t; +typedef unsigned long pmd_t; +typedef unsigned long pgd_t; +typedef unsigned long pgprot_t; + +#define pte_val(x) (x) +#define pmd_val(x) (x) +#define pgd_val(x) (x) +#define pgprot_val(x) (x) + +#define __pte(x) (x) +#define __pgd(x) (x) +#define __pgprot(x) (x) + +#endif /* STRICT_MM_TYPECHECKS */ + +typedef struct page *pgtable_t; + +#ifdef USE_48_BIT_KSEG +#define PAGE_OFFSET 0xffff800000000000UL +#else +#define PAGE_OFFSET 0xfffffc0000000000UL +#endif + +#else + +#ifdef USE_48_BIT_KSEG +#define PAGE_OFFSET 0xffff800000000000 +#else +#define PAGE_OFFSET 0xfffffc0000000000 +#endif + +#endif /* !__ASSEMBLY__ */ + +#define __pa(x) ((unsigned long) (x) - PAGE_OFFSET) +#define __va(x) ((void *)((unsigned long) (x) + PAGE_OFFSET)) +#ifndef CONFIG_DISCONTIGMEM +#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) + +#define pfn_valid(pfn) ((pfn) < max_mapnr) +#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) +#endif /* CONFIG_DISCONTIGMEM */ + +#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#include <asm-generic/memory_model.h> +#include <asm-generic/getorder.h> + +#endif /* _ALPHA_PAGE_H */ diff --git a/arch/alpha/include/asm/pal.h b/arch/alpha/include/asm/pal.h new file mode 100644 index 00000000..6699ee58 --- /dev/null +++ b/arch/alpha/include/asm/pal.h @@ -0,0 +1,163 @@ +#ifndef __ALPHA_PAL_H +#define __ALPHA_PAL_H + +/* + * Common PAL-code + */ +#define PAL_halt 0 +#define PAL_cflush 1 +#define PAL_draina 2 +#define PAL_bpt 128 +#define PAL_bugchk 129 +#define PAL_chmk 131 +#define PAL_callsys 131 +#define PAL_imb 134 +#define PAL_rduniq 158 +#define PAL_wruniq 159 +#define PAL_gentrap 170 +#define PAL_nphalt 190 + +/* + * VMS specific PAL-code + */ +#define PAL_swppal 10 +#define PAL_mfpr_vptb 41 + +/* + * OSF specific PAL-code + */ +#define PAL_cserve 9 +#define PAL_wripir 13 +#define PAL_rdmces 16 +#define PAL_wrmces 17 +#define PAL_wrfen 43 +#define PAL_wrvptptr 45 +#define PAL_jtopal 46 +#define PAL_swpctx 48 +#define PAL_wrval 49 +#define PAL_rdval 50 +#define PAL_tbi 51 +#define PAL_wrent 52 +#define PAL_swpipl 53 +#define PAL_rdps 54 +#define PAL_wrkgp 55 +#define PAL_wrusp 56 +#define PAL_wrperfmon 57 +#define PAL_rdusp 58 +#define PAL_whami 60 +#define PAL_retsys 61 +#define PAL_rti 63 + +#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ + +extern void halt(void) __attribute__((noreturn)); +#define __halt() __asm__ __volatile__ ("call_pal %0 #halt" : : "i" (PAL_halt)) + +#define imb() \ +__asm__ __volatile__ ("call_pal %0 #imb" : : "i" (PAL_imb) : "memory") + +#define draina() \ +__asm__ __volatile__ ("call_pal %0 #draina" : : "i" (PAL_draina) : "memory") + +#define __CALL_PAL_R0(NAME, TYPE) \ +extern inline TYPE NAME(void) \ +{ \ + register TYPE __r0 __asm__("$0"); \ + __asm__ __volatile__( \ + "call_pal %1 # " #NAME \ + :"=r" (__r0) \ + :"i" (PAL_ ## NAME) \ + :"$1", "$16", "$22", "$23", "$24", "$25"); \ + return __r0; \ +} + +#define __CALL_PAL_W1(NAME, TYPE0) \ +extern inline void NAME(TYPE0 arg0) \ +{ \ + register TYPE0 __r16 __asm__("$16") = arg0; \ + __asm__ __volatile__( \ + "call_pal %1 # "#NAME \ + : "=r"(__r16) \ + : "i"(PAL_ ## NAME), "0"(__r16) \ + : "$1", "$22", "$23", "$24", "$25"); \ +} + +#define __CALL_PAL_W2(NAME, TYPE0, TYPE1) \ +extern inline void NAME(TYPE0 arg0, TYPE1 arg1) \ +{ \ + register TYPE0 __r16 __asm__("$16") = arg0; \ + register TYPE1 __r17 __asm__("$17") = arg1; \ + __asm__ __volatile__( \ + "call_pal %2 # "#NAME \ + : "=r"(__r16), "=r"(__r17) \ + : "i"(PAL_ ## NAME), "0"(__r16), "1"(__r17) \ + : "$1", "$22", "$23", "$24", "$25"); \ +} + +#define __CALL_PAL_RW1(NAME, RTYPE, TYPE0) \ +extern inline RTYPE NAME(TYPE0 arg0) \ +{ \ + register RTYPE __r0 __asm__("$0"); \ + register TYPE0 __r16 __asm__("$16") = arg0; \ + __asm__ __volatile__( \ + "call_pal %2 # "#NAME \ + : "=r"(__r16), "=r"(__r0) \ + : "i"(PAL_ ## NAME), "0"(__r16) \ + : "$1", "$22", "$23", "$24", "$25"); \ + return __r0; \ +} + +#define __CALL_PAL_RW2(NAME, RTYPE, TYPE0, TYPE1) \ +extern inline RTYPE NAME(TYPE0 arg0, TYPE1 arg1) \ +{ \ + register RTYPE __r0 __asm__("$0"); \ + register TYPE0 __r16 __asm__("$16") = arg0; \ + register TYPE1 __r17 __asm__("$17") = arg1; \ + __asm__ __volatile__( \ + "call_pal %3 # "#NAME \ + : "=r"(__r16), "=r"(__r17), "=r"(__r0) \ + : "i"(PAL_ ## NAME), "0"(__r16), "1"(__r17) \ + : "$1", "$22", "$23", "$24", "$25"); \ + return __r0; \ +} + +__CALL_PAL_W1(cflush, unsigned long); +__CALL_PAL_R0(rdmces, unsigned long); +__CALL_PAL_R0(rdps, unsigned long); +__CALL_PAL_R0(rdusp, unsigned long); +__CALL_PAL_RW1(swpipl, unsigned long, unsigned long); +__CALL_PAL_R0(whami, unsigned long); +__CALL_PAL_W2(wrent, void*, unsigned long); +__CALL_PAL_W1(wripir, unsigned long); +__CALL_PAL_W1(wrkgp, unsigned long); +__CALL_PAL_W1(wrmces, unsigned long); +__CALL_PAL_RW2(wrperfmon, unsigned long, unsigned long, unsigned long); +__CALL_PAL_W1(wrusp, unsigned long); +__CALL_PAL_W1(wrvptptr, unsigned long); + +/* + * TB routines.. + */ +#define __tbi(nr,arg,arg1...) \ +({ \ + register unsigned long __r16 __asm__("$16") = (nr); \ + register unsigned long __r17 __asm__("$17"); arg; \ + __asm__ __volatile__( \ + "call_pal %3 #__tbi" \ + :"=r" (__r16),"=r" (__r17) \ + :"0" (__r16),"i" (PAL_tbi) ,##arg1 \ + :"$0", "$1", "$22", "$23", "$24", "$25"); \ +}) + +#define tbi(x,y) __tbi(x,__r17=(y),"1" (__r17)) +#define tbisi(x) __tbi(1,__r17=(x),"1" (__r17)) +#define tbisd(x) __tbi(2,__r17=(x),"1" (__r17)) +#define tbis(x) __tbi(3,__r17=(x),"1" (__r17)) +#define tbiap() __tbi(-1, /* no second argument */) +#define tbia() __tbi(-2, /* no second argument */) + +#endif /* !__ASSEMBLY__ */ +#endif /* __KERNEL__ */ + +#endif /* __ALPHA_PAL_H */ diff --git a/arch/alpha/include/asm/param.h b/arch/alpha/include/asm/param.h new file mode 100644 index 00000000..e691ecfe --- /dev/null +++ b/arch/alpha/include/asm/param.h @@ -0,0 +1,27 @@ +#ifndef _ASM_ALPHA_PARAM_H +#define _ASM_ALPHA_PARAM_H + +/* ??? Gross. I don't want to parameterize this, and supposedly the + hardware ignores reprogramming. We also need userland buy-in to the + change in HZ, since this is visible in the wait4 resources etc. */ + +#ifdef __KERNEL__ +#define HZ CONFIG_HZ +#define USER_HZ HZ +#else +#define HZ 1024 +#endif + +#define EXEC_PAGESIZE 8192 + +#ifndef NOGROUP +#define NOGROUP (-1) +#endif + +#define MAXHOSTNAMELEN 64 /* max length of hostname */ + +#ifdef __KERNEL__ +# define CLOCKS_PER_SEC HZ /* frequency at which times() counts */ +#endif + +#endif /* _ASM_ALPHA_PARAM_H */ diff --git a/arch/alpha/include/asm/parport.h b/arch/alpha/include/asm/parport.h new file mode 100644 index 00000000..c5ee7cbb --- /dev/null +++ b/arch/alpha/include/asm/parport.h @@ -0,0 +1,18 @@ +/* + * parport.h: platform-specific PC-style parport initialisation + * + * Copyright (C) 1999, 2000 Tim Waugh <tim@cyberelk.demon.co.uk> + * + * This file should only be included by drivers/parport/parport_pc.c. + */ + +#ifndef _ASM_AXP_PARPORT_H +#define _ASM_AXP_PARPORT_H 1 + +static int __devinit parport_pc_find_isa_ports (int autoirq, int autodma); +static int __devinit parport_pc_find_nonpci_ports (int autoirq, int autodma) +{ + return parport_pc_find_isa_ports (autoirq, autodma); +} + +#endif /* !(_ASM_AXP_PARPORT_H) */ diff --git a/arch/alpha/include/asm/pci.h b/arch/alpha/include/asm/pci.h new file mode 100644 index 00000000..d01afb78 --- /dev/null +++ b/arch/alpha/include/asm/pci.h @@ -0,0 +1,138 @@ +#ifndef __ALPHA_PCI_H +#define __ALPHA_PCI_H + +#ifdef __KERNEL__ + +#include <linux/spinlock.h> +#include <linux/dma-mapping.h> +#include <asm/scatterlist.h> +#include <asm/machvec.h> +#include <asm-generic/pci-bridge.h> + +/* + * The following structure is used to manage multiple PCI busses. + */ + +struct pci_dev; +struct pci_bus; +struct resource; +struct pci_iommu_arena; +struct page; + +/* A controller. Used to manage multiple PCI busses. */ + +struct pci_controller { + struct pci_controller *next; + struct pci_bus *bus; + struct resource *io_space; + struct resource *mem_space; + + /* The following are for reporting to userland. The invariant is + that if we report a BWX-capable dense memory, we do not report + a sparse memory at all, even if it exists. */ + unsigned long sparse_mem_base; + unsigned long dense_mem_base; + unsigned long sparse_io_base; + unsigned long dense_io_base; + + /* This one's for the kernel only. It's in KSEG somewhere. */ + unsigned long config_space_base; + + unsigned int index; + /* For compatibility with current (as of July 2003) pciutils + and XFree86. Eventually will be removed. */ + unsigned int need_domain_info; + + struct pci_iommu_arena *sg_pci; + struct pci_iommu_arena *sg_isa; + + void *sysdata; +}; + +/* Override the logic in pci_scan_bus for skipping already-configured + bus numbers. */ + +#define pcibios_assign_all_busses() 1 + +#define PCIBIOS_MIN_IO alpha_mv.min_io_address +#define PCIBIOS_MIN_MEM alpha_mv.min_mem_address + +extern void pcibios_set_master(struct pci_dev *dev); + +extern inline void pcibios_penalize_isa_irq(int irq, int active) +{ + /* We don't do dynamic PCI IRQ allocation */ +} + +/* IOMMU controls. */ + +/* The PCI address space does not equal the physical memory address space. + The networking and block device layers use this boolean for bounce buffer + decisions. */ +#define PCI_DMA_BUS_IS_PHYS 0 + +#ifdef CONFIG_PCI + +/* implement the pci_ DMA API in terms of the generic device dma_ one */ +#include <asm-generic/pci-dma-compat.h> + +static inline void pci_dma_burst_advice(struct pci_dev *pdev, + enum pci_dma_burst_strategy *strat, + unsigned long *strategy_parameter) +{ + unsigned long cacheline_size; + u8 byte; + + pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &byte); + if (byte == 0) + cacheline_size = 1024; + else + cacheline_size = (int) byte * 4; + + *strat = PCI_DMA_BURST_BOUNDARY; + *strategy_parameter = cacheline_size; +} +#endif + +/* TODO: integrate with include/asm-generic/pci.h ? */ +static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) +{ + return channel ? 15 : 14; +} + +#define pci_domain_nr(bus) ((struct pci_controller *)(bus)->sysdata)->index + +static inline int pci_proc_domain(struct pci_bus *bus) +{ + struct pci_controller *hose = bus->sysdata; + return hose->need_domain_info; +} + +#endif /* __KERNEL__ */ + +/* Values for the `which' argument to sys_pciconfig_iobase. */ +#define IOBASE_HOSE 0 +#define IOBASE_SPARSE_MEM 1 +#define IOBASE_DENSE_MEM 2 +#define IOBASE_SPARSE_IO 3 +#define IOBASE_DENSE_IO 4 +#define IOBASE_ROOT_BUS 5 +#define IOBASE_FROM_HOSE 0x10000 + +extern struct pci_dev *isa_bridge; + +extern int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, + size_t count); +extern int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val, + size_t count); +extern int pci_mmap_legacy_page_range(struct pci_bus *bus, + struct vm_area_struct *vma, + enum pci_mmap_state mmap_state); +extern void pci_adjust_legacy_attr(struct pci_bus *bus, + enum pci_mmap_state mmap_type); +#define HAVE_PCI_LEGACY 1 + +extern int pci_create_resource_files(struct pci_dev *dev); +extern void pci_remove_resource_files(struct pci_dev *dev); + +#endif /* __ALPHA_PCI_H */ diff --git a/arch/alpha/include/asm/percpu.h b/arch/alpha/include/asm/percpu.h new file mode 100644 index 00000000..2c12378e --- /dev/null +++ b/arch/alpha/include/asm/percpu.h @@ -0,0 +1,18 @@ +#ifndef __ALPHA_PERCPU_H +#define __ALPHA_PERCPU_H + +/* + * To calculate addresses of locally defined variables, GCC uses + * 32-bit displacement from the GP. Which doesn't work for per cpu + * variables in modules, as an offset to the kernel per cpu area is + * way above 4G. + * + * Always use weak definitions for percpu variables in modules. + */ +#if defined(MODULE) && defined(CONFIG_SMP) +#define ARCH_NEEDS_WEAK_PER_CPU +#endif + +#include <asm-generic/percpu.h> + +#endif /* __ALPHA_PERCPU_H */ diff --git a/arch/alpha/include/asm/perf_event.h b/arch/alpha/include/asm/perf_event.h new file mode 100644 index 00000000..5996e7a6 --- /dev/null +++ b/arch/alpha/include/asm/perf_event.h @@ -0,0 +1,4 @@ +#ifndef __ASM_ALPHA_PERF_EVENT_H +#define __ASM_ALPHA_PERF_EVENT_H + +#endif /* __ASM_ALPHA_PERF_EVENT_H */ diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h new file mode 100644 index 00000000..bc2a0daf --- /dev/null +++ b/arch/alpha/include/asm/pgalloc.h @@ -0,0 +1,88 @@ +#ifndef _ALPHA_PGALLOC_H +#define _ALPHA_PGALLOC_H + +#include <linux/mm.h> +#include <linux/mmzone.h> + +/* + * Allocate and free page tables. The xxx_kernel() versions are + * used to allocate a kernel page table - this turns on ASN bits + * if any. + */ + +static inline void +pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte) +{ + pmd_set(pmd, (pte_t *)(page_to_pa(pte) + PAGE_OFFSET)); +} +#define pmd_pgtable(pmd) pmd_page(pmd) + +static inline void +pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) +{ + pmd_set(pmd, pte); +} + +static inline void +pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd) +{ + pgd_set(pgd, pmd); +} + +extern pgd_t *pgd_alloc(struct mm_struct *mm); + +static inline void +pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + free_page((unsigned long)pgd); +} + +static inline pmd_t * +pmd_alloc_one(struct mm_struct *mm, unsigned long address) +{ + pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + return ret; +} + +static inline void +pmd_free(struct mm_struct *mm, pmd_t *pmd) +{ + free_page((unsigned long)pmd); +} + +static inline pte_t * +pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) +{ + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + return pte; +} + +static inline void +pte_free_kernel(struct mm_struct *mm, pte_t *pte) +{ + free_page((unsigned long)pte); +} + +static inline pgtable_t +pte_alloc_one(struct mm_struct *mm, unsigned long address) +{ + pte_t *pte = pte_alloc_one_kernel(mm, address); + struct page *page; + + if (!pte) + return NULL; + page = virt_to_page(pte); + pgtable_page_ctor(page); + return page; +} + +static inline void +pte_free(struct mm_struct *mm, pgtable_t page) +{ + pgtable_page_dtor(page); + __free_page(page); +} + +#define check_pgt_cache() do { } while (0) + +#endif /* _ALPHA_PGALLOC_H */ diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h new file mode 100644 index 00000000..81a4342d --- /dev/null +++ b/arch/alpha/include/asm/pgtable.h @@ -0,0 +1,379 @@ +#ifndef _ALPHA_PGTABLE_H +#define _ALPHA_PGTABLE_H + +#include <asm-generic/4level-fixup.h> + +/* + * This file contains the functions and defines necessary to modify and use + * the Alpha page table tree. + * + * This hopefully works with any standard Alpha page-size, as defined + * in <asm/page.h> (currently 8192). + */ +#include <linux/mmzone.h> + +#include <asm/page.h> +#include <asm/processor.h> /* For TASK_SIZE */ +#include <asm/machvec.h> +#include <asm/setup.h> + +struct mm_struct; +struct vm_area_struct; + +/* Certain architectures need to do special things when PTEs + * within a page table are directly modified. Thus, the following + * hook is made available. + */ +#define set_pte(pteptr, pteval) ((*(pteptr)) = (pteval)) +#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) + +/* PMD_SHIFT determines the size of the area a second-level page table can map */ +#define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3)) +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) + +/* PGDIR_SHIFT determines what a third-level page table entry can map */ +#define PGDIR_SHIFT (PAGE_SHIFT + 2*(PAGE_SHIFT-3)) +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +/* + * Entries per page directory level: the Alpha is three-level, with + * all levels having a one-page page table. + */ +#define PTRS_PER_PTE (1UL << (PAGE_SHIFT-3)) +#define PTRS_PER_PMD (1UL << (PAGE_SHIFT-3)) +#define PTRS_PER_PGD (1UL << (PAGE_SHIFT-3)) +#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) +#define FIRST_USER_ADDRESS 0 + +/* Number of pointers that fit on a page: this will go away. */ +#define PTRS_PER_PAGE (1UL << (PAGE_SHIFT-3)) + +#ifdef CONFIG_ALPHA_LARGE_VMALLOC +#define VMALLOC_START 0xfffffe0000000000 +#else +#define VMALLOC_START (-2*PGDIR_SIZE) +#endif +#define VMALLOC_END (-PGDIR_SIZE) + +/* + * OSF/1 PAL-code-imposed page table bits + */ +#define _PAGE_VALID 0x0001 +#define _PAGE_FOR 0x0002 /* used for page protection (fault on read) */ +#define _PAGE_FOW 0x0004 /* used for page protection (fault on write) */ +#define _PAGE_FOE 0x0008 /* used for page protection (fault on exec) */ +#define _PAGE_ASM 0x0010 +#define _PAGE_KRE 0x0100 /* xxx - see below on the "accessed" bit */ +#define _PAGE_URE 0x0200 /* xxx */ +#define _PAGE_KWE 0x1000 /* used to do the dirty bit in software */ +#define _PAGE_UWE 0x2000 /* used to do the dirty bit in software */ + +/* .. and these are ours ... */ +#define _PAGE_DIRTY 0x20000 +#define _PAGE_ACCESSED 0x40000 +#define _PAGE_FILE 0x80000 /* set:pagecache, unset:swap */ + +/* + * NOTE! The "accessed" bit isn't necessarily exact: it can be kept exactly + * by software (use the KRE/URE/KWE/UWE bits appropriately), but I'll fake it. + * Under Linux/AXP, the "accessed" bit just means "read", and I'll just use + * the KRE/URE bits to watch for it. That way we don't need to overload the + * KWE/UWE bits with both handling dirty and accessed. + * + * Note that the kernel uses the accessed bit just to check whether to page + * out a page or not, so it doesn't have to be exact anyway. + */ + +#define __DIRTY_BITS (_PAGE_DIRTY | _PAGE_KWE | _PAGE_UWE) +#define __ACCESS_BITS (_PAGE_ACCESSED | _PAGE_KRE | _PAGE_URE) + +#define _PFN_MASK 0xFFFFFFFF00000000UL + +#define _PAGE_TABLE (_PAGE_VALID | __DIRTY_BITS | __ACCESS_BITS) +#define _PAGE_CHG_MASK (_PFN_MASK | __DIRTY_BITS | __ACCESS_BITS) + +/* + * All the normal masks have the "page accessed" bits on, as any time they are used, + * the page is accessed. They are cleared only by the page-out routines + */ +#define PAGE_NONE __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOR | _PAGE_FOW | _PAGE_FOE) +#define PAGE_SHARED __pgprot(_PAGE_VALID | __ACCESS_BITS) +#define PAGE_COPY __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW) +#define PAGE_READONLY __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW) +#define PAGE_KERNEL __pgprot(_PAGE_VALID | _PAGE_ASM | _PAGE_KRE | _PAGE_KWE) + +#define _PAGE_NORMAL(x) __pgprot(_PAGE_VALID | __ACCESS_BITS | (x)) + +#define _PAGE_P(x) _PAGE_NORMAL((x) | (((x) & _PAGE_FOW)?0:_PAGE_FOW)) +#define _PAGE_S(x) _PAGE_NORMAL(x) + +/* + * The hardware can handle write-only mappings, but as the Alpha + * architecture does byte-wide writes with a read-modify-write + * sequence, it's not practical to have write-without-read privs. + * Thus the "-w- -> rw-" and "-wx -> rwx" mapping here (and in + * arch/alpha/mm/fault.c) + */ + /* xwr */ +#define __P000 _PAGE_P(_PAGE_FOE | _PAGE_FOW | _PAGE_FOR) +#define __P001 _PAGE_P(_PAGE_FOE | _PAGE_FOW) +#define __P010 _PAGE_P(_PAGE_FOE) +#define __P011 _PAGE_P(_PAGE_FOE) +#define __P100 _PAGE_P(_PAGE_FOW | _PAGE_FOR) +#define __P101 _PAGE_P(_PAGE_FOW) +#define __P110 _PAGE_P(0) +#define __P111 _PAGE_P(0) + +#define __S000 _PAGE_S(_PAGE_FOE | _PAGE_FOW | _PAGE_FOR) +#define __S001 _PAGE_S(_PAGE_FOE | _PAGE_FOW) +#define __S010 _PAGE_S(_PAGE_FOE) +#define __S011 _PAGE_S(_PAGE_FOE) +#define __S100 _PAGE_S(_PAGE_FOW | _PAGE_FOR) +#define __S101 _PAGE_S(_PAGE_FOW) +#define __S110 _PAGE_S(0) +#define __S111 _PAGE_S(0) + +/* + * pgprot_noncached() is only for infiniband pci support, and a real + * implementation for RAM would be more complicated. + */ +#define pgprot_noncached(prot) (prot) + +/* + * BAD_PAGETABLE is used when we need a bogus page-table, while + * BAD_PAGE is used for a bogus page. + * + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. + */ +extern pte_t __bad_page(void); +extern pmd_t * __bad_pagetable(void); + +extern unsigned long __zero_page(void); + +#define BAD_PAGETABLE __bad_pagetable() +#define BAD_PAGE __bad_page() +#define ZERO_PAGE(vaddr) (virt_to_page(ZERO_PGE)) + +/* number of bits that fit into a memory pointer */ +#define BITS_PER_PTR (8*sizeof(unsigned long)) + +/* to align the pointer to a pointer address */ +#define PTR_MASK (~(sizeof(void*)-1)) + +/* sizeof(void*)==1<<SIZEOF_PTR_LOG2 */ +#define SIZEOF_PTR_LOG2 3 + +/* to find an entry in a page-table */ +#define PAGE_PTR(address) \ + ((unsigned long)(address)>>(PAGE_SHIFT-SIZEOF_PTR_LOG2)&PTR_MASK&~PAGE_MASK) + +/* + * On certain platforms whose physical address space can overlap KSEG, + * namely EV6 and above, we must re-twiddle the physaddr to restore the + * correct high-order bits. + * + * This is extremely confusing until you realize that this is actually + * just working around a userspace bug. The X server was intending to + * provide the physical address but instead provided the KSEG address. + * Or tried to, except it's not representable. + * + * On Tsunami there's nothing meaningful at 0x40000000000, so this is + * a safe thing to do. Come the first core logic that does put something + * in this area -- memory or whathaveyou -- then this hack will have + * to go away. So be prepared! + */ + +#if defined(CONFIG_ALPHA_GENERIC) && defined(USE_48_BIT_KSEG) +#error "EV6-only feature in a generic kernel" +#endif +#if defined(CONFIG_ALPHA_GENERIC) || \ + (defined(CONFIG_ALPHA_EV6) && !defined(USE_48_BIT_KSEG)) +#define KSEG_PFN (0xc0000000000UL >> PAGE_SHIFT) +#define PHYS_TWIDDLE(pfn) \ + ((((pfn) & KSEG_PFN) == (0x40000000000UL >> PAGE_SHIFT)) \ + ? ((pfn) ^= KSEG_PFN) : (pfn)) +#else +#define PHYS_TWIDDLE(pfn) (pfn) +#endif + +/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + */ +#ifndef CONFIG_DISCONTIGMEM +#define page_to_pa(page) (((page) - mem_map) << PAGE_SHIFT) + +#define pte_pfn(pte) (pte_val(pte) >> 32) +#define pte_page(pte) pfn_to_page(pte_pfn(pte)) +#define mk_pte(page, pgprot) \ +({ \ + pte_t pte; \ + \ + pte_val(pte) = (page_to_pfn(page) << 32) | pgprot_val(pgprot); \ + pte; \ +}) +#endif + +extern inline pte_t pfn_pte(unsigned long physpfn, pgprot_t pgprot) +{ pte_t pte; pte_val(pte) = (PHYS_TWIDDLE(physpfn) << 32) | pgprot_val(pgprot); return pte; } + +extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot) +{ pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot); return pte; } + +extern inline void pmd_set(pmd_t * pmdp, pte_t * ptep) +{ pmd_val(*pmdp) = _PAGE_TABLE | ((((unsigned long) ptep) - PAGE_OFFSET) << (32-PAGE_SHIFT)); } + +extern inline void pgd_set(pgd_t * pgdp, pmd_t * pmdp) +{ pgd_val(*pgdp) = _PAGE_TABLE | ((((unsigned long) pmdp) - PAGE_OFFSET) << (32-PAGE_SHIFT)); } + + +extern inline unsigned long +pmd_page_vaddr(pmd_t pmd) +{ + return ((pmd_val(pmd) & _PFN_MASK) >> (32-PAGE_SHIFT)) + PAGE_OFFSET; +} + +#ifndef CONFIG_DISCONTIGMEM +#define pmd_page(pmd) (mem_map + ((pmd_val(pmd) & _PFN_MASK) >> 32)) +#define pgd_page(pgd) (mem_map + ((pgd_val(pgd) & _PFN_MASK) >> 32)) +#endif + +extern inline unsigned long pgd_page_vaddr(pgd_t pgd) +{ return PAGE_OFFSET + ((pgd_val(pgd) & _PFN_MASK) >> (32-PAGE_SHIFT)); } + +extern inline int pte_none(pte_t pte) { return !pte_val(pte); } +extern inline int pte_present(pte_t pte) { return pte_val(pte) & _PAGE_VALID; } +extern inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + pte_val(*ptep) = 0; +} + +extern inline int pmd_none(pmd_t pmd) { return !pmd_val(pmd); } +extern inline int pmd_bad(pmd_t pmd) { return (pmd_val(pmd) & ~_PFN_MASK) != _PAGE_TABLE; } +extern inline int pmd_present(pmd_t pmd) { return pmd_val(pmd) & _PAGE_VALID; } +extern inline void pmd_clear(pmd_t * pmdp) { pmd_val(*pmdp) = 0; } + +extern inline int pgd_none(pgd_t pgd) { return !pgd_val(pgd); } +extern inline int pgd_bad(pgd_t pgd) { return (pgd_val(pgd) & ~_PFN_MASK) != _PAGE_TABLE; } +extern inline int pgd_present(pgd_t pgd) { return pgd_val(pgd) & _PAGE_VALID; } +extern inline void pgd_clear(pgd_t * pgdp) { pgd_val(*pgdp) = 0; } + +/* + * The following only work if pte_present() is true. + * Undefined behaviour if not.. + */ +extern inline int pte_write(pte_t pte) { return !(pte_val(pte) & _PAGE_FOW); } +extern inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } +extern inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } +extern inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } +extern inline int pte_special(pte_t pte) { return 0; } + +extern inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) |= _PAGE_FOW; return pte; } +extern inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~(__DIRTY_BITS); return pte; } +extern inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~(__ACCESS_BITS); return pte; } +extern inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) &= ~_PAGE_FOW; return pte; } +extern inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= __DIRTY_BITS; return pte; } +extern inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= __ACCESS_BITS; return pte; } +extern inline pte_t pte_mkspecial(pte_t pte) { return pte; } + +#define PAGE_DIR_OFFSET(tsk,address) pgd_offset((tsk),(address)) + +/* to find an entry in a kernel page-table-directory */ +#define pgd_offset_k(address) pgd_offset(&init_mm, (address)) + +/* to find an entry in a page-table-directory. */ +#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) +#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address)) + +/* + * The smp_read_barrier_depends() in the following functions are required to + * order the load of *dir (the pointer in the top level page table) with any + * subsequent load of the returned pmd_t *ret (ret is data dependent on *dir). + * + * If this ordering is not enforced, the CPU might load an older value of + * *ret, which may be uninitialized data. See mm/memory.c:__pte_alloc for + * more details. + * + * Note that we never change the mm->pgd pointer after the task is running, so + * pgd_offset does not require such a barrier. + */ + +/* Find an entry in the second-level page table.. */ +extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) +{ + pmd_t *ret = (pmd_t *) pgd_page_vaddr(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1)); + smp_read_barrier_depends(); /* see above */ + return ret; +} + +/* Find an entry in the third-level page table.. */ +extern inline pte_t * pte_offset_kernel(pmd_t * dir, unsigned long address) +{ + pte_t *ret = (pte_t *) pmd_page_vaddr(*dir) + + ((address >> PAGE_SHIFT) & (PTRS_PER_PAGE - 1)); + smp_read_barrier_depends(); /* see above */ + return ret; +} + +#define pte_offset_map(dir,addr) pte_offset_kernel((dir),(addr)) +#define pte_unmap(pte) do { } while (0) + +extern pgd_t swapper_pg_dir[1024]; + +/* + * The Alpha doesn't have any external MMU info: the kernel page + * tables contain all the necessary information. + */ +extern inline void update_mmu_cache(struct vm_area_struct * vma, + unsigned long address, pte_t *ptep) +{ +} + +/* + * Non-present pages: high 24 bits are offset, next 8 bits type, + * low 32 bits zero. + */ +extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) +{ pte_t pte; pte_val(pte) = (type << 32) | (offset << 40); return pte; } + +#define __swp_type(x) (((x).val >> 32) & 0xff) +#define __swp_offset(x) ((x).val >> 40) +#define __swp_entry(type, off) ((swp_entry_t) { pte_val(mk_swap_pte((type), (off))) }) +#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) +#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) + +#define pte_to_pgoff(pte) (pte_val(pte) >> 32) +#define pgoff_to_pte(off) ((pte_t) { ((off) << 32) | _PAGE_FILE }) + +#define PTE_FILE_MAX_BITS 32 + +#ifndef CONFIG_DISCONTIGMEM +#define kern_addr_valid(addr) (1) +#endif + +#define io_remap_pfn_range(vma, start, pfn, size, prot) \ + remap_pfn_range(vma, start, pfn, size, prot) + +#define pte_ERROR(e) \ + printk("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e)) +#define pmd_ERROR(e) \ + printk("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e)) +#define pgd_ERROR(e) \ + printk("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e)) + +extern void paging_init(void); + +#include <asm-generic/pgtable.h> + +/* + * No page table caches to initialise + */ +#define pgtable_cache_init() do { } while (0) + +/* We have our own get_unmapped_area to cope with ADDR_LIMIT_32BIT. */ +#define HAVE_ARCH_UNMAPPED_AREA + +#endif /* _ALPHA_PGTABLE_H */ diff --git a/arch/alpha/include/asm/poll.h b/arch/alpha/include/asm/poll.h new file mode 100644 index 00000000..c98509d3 --- /dev/null +++ b/arch/alpha/include/asm/poll.h @@ -0,0 +1 @@ +#include <asm-generic/poll.h> diff --git a/arch/alpha/include/asm/posix_types.h b/arch/alpha/include/asm/posix_types.h new file mode 100644 index 00000000..24779fc9 --- /dev/null +++ b/arch/alpha/include/asm/posix_types.h @@ -0,0 +1,20 @@ +#ifndef _ALPHA_POSIX_TYPES_H +#define _ALPHA_POSIX_TYPES_H + +/* + * This file is generally used by user-level software, so you need to + * be a little careful about namespace pollution etc. Also, we cannot + * assume GCC is being used. + */ + +typedef unsigned int __kernel_ino_t; +#define __kernel_ino_t __kernel_ino_t + +typedef unsigned int __kernel_nlink_t; +#define __kernel_nlink_t __kernel_nlink_t + +typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ + +#include <asm-generic/posix_types.h> + +#endif /* _ALPHA_POSIX_TYPES_H */ diff --git a/arch/alpha/include/asm/processor.h b/arch/alpha/include/asm/processor.h new file mode 100644 index 00000000..94afe585 --- /dev/null +++ b/arch/alpha/include/asm/processor.h @@ -0,0 +1,93 @@ +/* + * include/asm-alpha/processor.h + * + * Copyright (C) 1994 Linus Torvalds + */ + +#ifndef __ASM_ALPHA_PROCESSOR_H +#define __ASM_ALPHA_PROCESSOR_H + +#include <linux/personality.h> /* for ADDR_LIMIT_32BIT */ + +/* + * Returns current instruction pointer ("program counter"). + */ +#define current_text_addr() \ + ({ void *__pc; __asm__ ("br %0,.+4" : "=r"(__pc)); __pc; }) + +/* + * We have a 42-bit user address space: 4TB user VM... + */ +#define TASK_SIZE (0x40000000000UL) + +#define STACK_TOP \ + (current->personality & ADDR_LIMIT_32BIT ? 0x80000000 : 0x00120000000UL) + +#define STACK_TOP_MAX 0x00120000000UL + +/* This decides where the kernel will search for a free chunk of vm + * space during mmap's. + */ +#define TASK_UNMAPPED_BASE \ + ((current->personality & ADDR_LIMIT_32BIT) ? 0x40000000 : TASK_SIZE / 2) + +typedef struct { + unsigned long seg; +} mm_segment_t; + +/* This is dead. Everything has been moved to thread_info. */ +struct thread_struct { }; +#define INIT_THREAD { } + +/* Return saved PC of a blocked thread. */ +struct task_struct; +extern unsigned long thread_saved_pc(struct task_struct *); + +/* Do necessary setup to start up a newly executed thread. */ +extern void start_thread(struct pt_regs *, unsigned long, unsigned long); + +/* Free all resources held by a thread. */ +extern void release_thread(struct task_struct *); + +/* Prepare to copy thread state - unlazy all lazy status */ +#define prepare_to_copy(tsk) do { } while (0) + +/* Create a kernel thread without removing it from tasklists. */ +extern long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); + +unsigned long get_wchan(struct task_struct *p); + +#define KSTK_EIP(tsk) (task_pt_regs(tsk)->pc) + +#define KSTK_ESP(tsk) \ + ((tsk) == current ? rdusp() : task_thread_info(tsk)->pcb.usp) + +#define cpu_relax() barrier() + +#define ARCH_HAS_PREFETCH +#define ARCH_HAS_PREFETCHW +#define ARCH_HAS_SPINLOCK_PREFETCH + +#ifndef CONFIG_SMP +/* Nothing to prefetch. */ +#define spin_lock_prefetch(lock) do { } while (0) +#endif + +extern inline void prefetch(const void *ptr) +{ + __builtin_prefetch(ptr, 0, 3); +} + +extern inline void prefetchw(const void *ptr) +{ + __builtin_prefetch(ptr, 1, 3); +} + +#ifdef CONFIG_SMP +extern inline void spin_lock_prefetch(const void *ptr) +{ + __builtin_prefetch(ptr, 1, 3); +} +#endif + +#endif /* __ASM_ALPHA_PROCESSOR_H */ diff --git a/arch/alpha/include/asm/ptrace.h b/arch/alpha/include/asm/ptrace.h new file mode 100644 index 00000000..fd698a17 --- /dev/null +++ b/arch/alpha/include/asm/ptrace.h @@ -0,0 +1,83 @@ +#ifndef _ASMAXP_PTRACE_H +#define _ASMAXP_PTRACE_H + + +/* + * This struct defines the way the registers are stored on the + * kernel stack during a system call or other kernel entry + * + * NOTE! I want to minimize the overhead of system calls, so this + * struct has as little information as possible. I does not have + * + * - floating point regs: the kernel doesn't change those + * - r9-15: saved by the C compiler + * + * This makes "fork()" and "exec()" a bit more complex, but should + * give us low system call latency. + */ + +struct pt_regs { + unsigned long r0; + unsigned long r1; + unsigned long r2; + unsigned long r3; + unsigned long r4; + unsigned long r5; + unsigned long r6; + unsigned long r7; + unsigned long r8; + unsigned long r19; + unsigned long r20; + unsigned long r21; + unsigned long r22; + unsigned long r23; + unsigned long r24; + unsigned long r25; + unsigned long r26; + unsigned long r27; + unsigned long r28; + unsigned long hae; +/* JRP - These are the values provided to a0-a2 by PALcode */ + unsigned long trap_a0; + unsigned long trap_a1; + unsigned long trap_a2; +/* These are saved by PAL-code: */ + unsigned long ps; + unsigned long pc; + unsigned long gp; + unsigned long r16; + unsigned long r17; + unsigned long r18; +}; + +/* + * This is the extended stack used by signal handlers and the context + * switcher: it's pushed after the normal "struct pt_regs". + */ +struct switch_stack { + unsigned long r9; + unsigned long r10; + unsigned long r11; + unsigned long r12; + unsigned long r13; + unsigned long r14; + unsigned long r15; + unsigned long r26; + unsigned long fp[32]; /* fp[31] is fpcr */ +}; + +#ifdef __KERNEL__ + +#define arch_has_single_step() (1) +#define user_mode(regs) (((regs)->ps & 8) != 0) +#define instruction_pointer(regs) ((regs)->pc) +#define profile_pc(regs) instruction_pointer(regs) + +#define task_pt_regs(task) \ + ((struct pt_regs *) (task_stack_page(task) + 2*PAGE_SIZE) - 1) + +#define force_successful_syscall_return() (task_pt_regs(current)->r0 = 0) + +#endif + +#endif diff --git a/arch/alpha/include/asm/reg.h b/arch/alpha/include/asm/reg.h new file mode 100644 index 00000000..86ff916f --- /dev/null +++ b/arch/alpha/include/asm/reg.h @@ -0,0 +1,52 @@ +#ifndef __reg_h__ +#define __reg_h__ + +/* + * Exception frame offsets. + */ +#define EF_V0 0 +#define EF_T0 1 +#define EF_T1 2 +#define EF_T2 3 +#define EF_T3 4 +#define EF_T4 5 +#define EF_T5 6 +#define EF_T6 7 +#define EF_T7 8 +#define EF_S0 9 +#define EF_S1 10 +#define EF_S2 11 +#define EF_S3 12 +#define EF_S4 13 +#define EF_S5 14 +#define EF_S6 15 +#define EF_A3 16 +#define EF_A4 17 +#define EF_A5 18 +#define EF_T8 19 +#define EF_T9 20 +#define EF_T10 21 +#define EF_T11 22 +#define EF_RA 23 +#define EF_T12 24 +#define EF_AT 25 +#define EF_SP 26 +#define EF_PS 27 +#define EF_PC 28 +#define EF_GP 29 +#define EF_A0 30 +#define EF_A1 31 +#define EF_A2 32 + +#define EF_SIZE (33*8) +#define HWEF_SIZE (6*8) /* size of PAL frame (PS-A2) */ + +#define EF_SSIZE (EF_SIZE - HWEF_SIZE) + +/* + * Map register number into core file offset. + */ +#define CORE_REG(reg, ubase) \ + (((unsigned long *)((unsigned long)(ubase)))[reg]) + +#endif /* __reg_h__ */ diff --git a/arch/alpha/include/asm/regdef.h b/arch/alpha/include/asm/regdef.h new file mode 100644 index 00000000..142df9c4 --- /dev/null +++ b/arch/alpha/include/asm/regdef.h @@ -0,0 +1,44 @@ +#ifndef __alpha_regdef_h__ +#define __alpha_regdef_h__ + +#define v0 $0 /* function return value */ + +#define t0 $1 /* temporary registers (caller-saved) */ +#define t1 $2 +#define t2 $3 +#define t3 $4 +#define t4 $5 +#define t5 $6 +#define t6 $7 +#define t7 $8 + +#define s0 $9 /* saved-registers (callee-saved registers) */ +#define s1 $10 +#define s2 $11 +#define s3 $12 +#define s4 $13 +#define s5 $14 +#define s6 $15 +#define fp s6 /* frame-pointer (s6 in frame-less procedures) */ + +#define a0 $16 /* argument registers (caller-saved) */ +#define a1 $17 +#define a2 $18 +#define a3 $19 +#define a4 $20 +#define a5 $21 + +#define t8 $22 /* more temps (caller-saved) */ +#define t9 $23 +#define t10 $24 +#define t11 $25 +#define ra $26 /* return address register */ +#define t12 $27 + +#define pv t12 /* procedure-variable register */ +#define AT $at /* assembler temporary */ +#define gp $29 /* global pointer */ +#define sp $30 /* stack pointer */ +#define zero $31 /* reads as zero, writes are noops */ + +#endif /* __alpha_regdef_h__ */ diff --git a/arch/alpha/include/asm/resource.h b/arch/alpha/include/asm/resource.h new file mode 100644 index 00000000..c10874ff --- /dev/null +++ b/arch/alpha/include/asm/resource.h @@ -0,0 +1,22 @@ +#ifndef _ALPHA_RESOURCE_H +#define _ALPHA_RESOURCE_H + +/* + * Alpha/Linux-specific ordering of these four resource limit IDs, + * the rest comes from the generic header: + */ +#define RLIMIT_NOFILE 6 /* max number of open files */ +#define RLIMIT_AS 7 /* address space limit */ +#define RLIMIT_NPROC 8 /* max number of processes */ +#define RLIMIT_MEMLOCK 9 /* max locked-in-memory address space */ + +/* + * SuS says limits have to be unsigned. Fine, it's unsigned, but + * we retain the old value for compatibility, especially with DU. + * When you run into the 2^63 barrier, you call me. + */ +#define RLIM_INFINITY 0x7ffffffffffffffful + +#include <asm-generic/resource.h> + +#endif /* _ALPHA_RESOURCE_H */ diff --git a/arch/alpha/include/asm/rtc.h b/arch/alpha/include/asm/rtc.h new file mode 100644 index 00000000..d70408d3 --- /dev/null +++ b/arch/alpha/include/asm/rtc.h @@ -0,0 +1,12 @@ +#ifndef _ALPHA_RTC_H +#define _ALPHA_RTC_H + +#if defined(CONFIG_ALPHA_MARVEL) && defined(CONFIG_SMP) \ + || defined(CONFIG_ALPHA_GENERIC) +# define get_rtc_time alpha_mv.rtc_get_time +# define set_rtc_time alpha_mv.rtc_set_time +#endif + +#include <asm-generic/rtc.h> + +#endif diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h new file mode 100644 index 00000000..a83bbea6 --- /dev/null +++ b/arch/alpha/include/asm/rwsem.h @@ -0,0 +1,223 @@ +#ifndef _ALPHA_RWSEM_H +#define _ALPHA_RWSEM_H + +/* + * Written by Ivan Kokshaysky <ink@jurassic.park.msu.ru>, 2001. + * Based on asm-alpha/semaphore.h and asm-i386/rwsem.h + */ + +#ifndef _LINUX_RWSEM_H +#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead" +#endif + +#ifdef __KERNEL__ + +#include <linux/compiler.h> + +#define RWSEM_UNLOCKED_VALUE 0x0000000000000000L +#define RWSEM_ACTIVE_BIAS 0x0000000000000001L +#define RWSEM_ACTIVE_MASK 0x00000000ffffffffL +#define RWSEM_WAITING_BIAS (-0x0000000100000000L) +#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS +#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) + +static inline void __down_read(struct rw_semaphore *sem) +{ + long oldcount; +#ifndef CONFIG_SMP + oldcount = sem->count; + sem->count += RWSEM_ACTIVE_READ_BIAS; +#else + long temp; + __asm__ __volatile__( + "1: ldq_l %0,%1\n" + " addq %0,%3,%2\n" + " stq_c %2,%1\n" + " beq %2,2f\n" + " mb\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp) + :"Ir" (RWSEM_ACTIVE_READ_BIAS), "m" (sem->count) : "memory"); +#endif + if (unlikely(oldcount < 0)) + rwsem_down_read_failed(sem); +} + +/* + * trylock for reading -- returns 1 if successful, 0 if contention + */ +static inline int __down_read_trylock(struct rw_semaphore *sem) +{ + long old, new, res; + + res = sem->count; + do { + new = res + RWSEM_ACTIVE_READ_BIAS; + if (new <= 0) + break; + old = res; + res = cmpxchg(&sem->count, old, new); + } while (res != old); + return res >= 0 ? 1 : 0; +} + +static inline void __down_write(struct rw_semaphore *sem) +{ + long oldcount; +#ifndef CONFIG_SMP + oldcount = sem->count; + sem->count += RWSEM_ACTIVE_WRITE_BIAS; +#else + long temp; + __asm__ __volatile__( + "1: ldq_l %0,%1\n" + " addq %0,%3,%2\n" + " stq_c %2,%1\n" + " beq %2,2f\n" + " mb\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp) + :"Ir" (RWSEM_ACTIVE_WRITE_BIAS), "m" (sem->count) : "memory"); +#endif + if (unlikely(oldcount)) + rwsem_down_write_failed(sem); +} + +/* + * trylock for writing -- returns 1 if successful, 0 if contention + */ +static inline int __down_write_trylock(struct rw_semaphore *sem) +{ + long ret = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE, + RWSEM_ACTIVE_WRITE_BIAS); + if (ret == RWSEM_UNLOCKED_VALUE) + return 1; + return 0; +} + +static inline void __up_read(struct rw_semaphore *sem) +{ + long oldcount; +#ifndef CONFIG_SMP + oldcount = sem->count; + sem->count -= RWSEM_ACTIVE_READ_BIAS; +#else + long temp; + __asm__ __volatile__( + " mb\n" + "1: ldq_l %0,%1\n" + " subq %0,%3,%2\n" + " stq_c %2,%1\n" + " beq %2,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp) + :"Ir" (RWSEM_ACTIVE_READ_BIAS), "m" (sem->count) : "memory"); +#endif + if (unlikely(oldcount < 0)) + if ((int)oldcount - RWSEM_ACTIVE_READ_BIAS == 0) + rwsem_wake(sem); +} + +static inline void __up_write(struct rw_semaphore *sem) +{ + long count; +#ifndef CONFIG_SMP + sem->count -= RWSEM_ACTIVE_WRITE_BIAS; + count = sem->count; +#else + long temp; + __asm__ __volatile__( + " mb\n" + "1: ldq_l %0,%1\n" + " subq %0,%3,%2\n" + " stq_c %2,%1\n" + " beq %2,2f\n" + " subq %0,%3,%0\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + :"=&r" (count), "=m" (sem->count), "=&r" (temp) + :"Ir" (RWSEM_ACTIVE_WRITE_BIAS), "m" (sem->count) : "memory"); +#endif + if (unlikely(count)) + if ((int)count == 0) + rwsem_wake(sem); +} + +/* + * downgrade write lock to read lock + */ +static inline void __downgrade_write(struct rw_semaphore *sem) +{ + long oldcount; +#ifndef CONFIG_SMP + oldcount = sem->count; + sem->count -= RWSEM_WAITING_BIAS; +#else + long temp; + __asm__ __volatile__( + "1: ldq_l %0,%1\n" + " addq %0,%3,%2\n" + " stq_c %2,%1\n" + " beq %2,2f\n" + " mb\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp) + :"Ir" (-RWSEM_WAITING_BIAS), "m" (sem->count) : "memory"); +#endif + if (unlikely(oldcount < 0)) + rwsem_downgrade_wake(sem); +} + +static inline void rwsem_atomic_add(long val, struct rw_semaphore *sem) +{ +#ifndef CONFIG_SMP + sem->count += val; +#else + long temp; + __asm__ __volatile__( + "1: ldq_l %0,%1\n" + " addq %0,%2,%0\n" + " stq_c %0,%1\n" + " beq %0,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + :"=&r" (temp), "=m" (sem->count) + :"Ir" (val), "m" (sem->count)); +#endif +} + +static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem) +{ +#ifndef CONFIG_SMP + sem->count += val; + return sem->count; +#else + long ret, temp; + __asm__ __volatile__( + "1: ldq_l %0,%1\n" + " addq %0,%3,%2\n" + " addq %0,%3,%0\n" + " stq_c %2,%1\n" + " beq %2,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + :"=&r" (ret), "=m" (sem->count), "=&r" (temp) + :"Ir" (val), "m" (sem->count)); + + return ret; +#endif +} + +#endif /* __KERNEL__ */ +#endif /* _ALPHA_RWSEM_H */ diff --git a/arch/alpha/include/asm/scatterlist.h b/arch/alpha/include/asm/scatterlist.h new file mode 100644 index 00000000..017d7471 --- /dev/null +++ b/arch/alpha/include/asm/scatterlist.h @@ -0,0 +1,6 @@ +#ifndef _ALPHA_SCATTERLIST_H +#define _ALPHA_SCATTERLIST_H + +#include <asm-generic/scatterlist.h> + +#endif /* !(_ALPHA_SCATTERLIST_H) */ diff --git a/arch/alpha/include/asm/sections.h b/arch/alpha/include/asm/sections.h new file mode 100644 index 00000000..43b40edd --- /dev/null +++ b/arch/alpha/include/asm/sections.h @@ -0,0 +1,7 @@ +#ifndef _ALPHA_SECTIONS_H +#define _ALPHA_SECTIONS_H + +/* nothing to see, move along */ +#include <asm-generic/sections.h> + +#endif diff --git a/arch/alpha/include/asm/segment.h b/arch/alpha/include/asm/segment.h new file mode 100644 index 00000000..0453d97d --- /dev/null +++ b/arch/alpha/include/asm/segment.h @@ -0,0 +1,6 @@ +#ifndef __ALPHA_SEGMENT_H +#define __ALPHA_SEGMENT_H + +/* Only here because we have some old header files that expect it.. */ + +#endif diff --git a/arch/alpha/include/asm/sembuf.h b/arch/alpha/include/asm/sembuf.h new file mode 100644 index 00000000..7b38b153 --- /dev/null +++ b/arch/alpha/include/asm/sembuf.h @@ -0,0 +1,22 @@ +#ifndef _ALPHA_SEMBUF_H +#define _ALPHA_SEMBUF_H + +/* + * The semid64_ds structure for alpha architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 2 miscellaneous 64-bit values + */ + +struct semid64_ds { + struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ + __kernel_time_t sem_otime; /* last semop time */ + __kernel_time_t sem_ctime; /* last change time */ + unsigned long sem_nsems; /* no. of semaphores in array */ + unsigned long __unused1; + unsigned long __unused2; +}; + +#endif /* _ALPHA_SEMBUF_H */ diff --git a/arch/alpha/include/asm/serial.h b/arch/alpha/include/asm/serial.h new file mode 100644 index 00000000..9d263e8d --- /dev/null +++ b/arch/alpha/include/asm/serial.h @@ -0,0 +1,29 @@ +/* + * include/asm-alpha/serial.h + */ + + +/* + * This assumes you have a 1.8432 MHz clock for your UART. + * + * It'd be nice if someone built a serial card with a 24.576 MHz + * clock, since the 16550A is capable of handling a top speed of 1.5 + * megabits/second; but this requires the faster clock. + */ +#define BASE_BAUD ( 1843200 / 16 ) + +/* Standard COM flags (except for COM4, because of the 8514 problem) */ +#ifdef CONFIG_SERIAL_DETECT_IRQ +#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST | ASYNC_AUTO_IRQ) +#define STD_COM4_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_AUTO_IRQ) +#else +#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST) +#define STD_COM4_FLAGS ASYNC_BOOT_AUTOCONF +#endif + +#define SERIAL_PORT_DFNS \ + /* UART CLK PORT IRQ FLAGS */ \ + { 0, BASE_BAUD, 0x3F8, 4, STD_COM_FLAGS }, /* ttyS0 */ \ + { 0, BASE_BAUD, 0x2F8, 3, STD_COM_FLAGS }, /* ttyS1 */ \ + { 0, BASE_BAUD, 0x3E8, 4, STD_COM_FLAGS }, /* ttyS2 */ \ + { 0, BASE_BAUD, 0x2E8, 3, STD_COM4_FLAGS }, /* ttyS3 */ diff --git a/arch/alpha/include/asm/setup.h b/arch/alpha/include/asm/setup.h new file mode 100644 index 00000000..b50014b3 --- /dev/null +++ b/arch/alpha/include/asm/setup.h @@ -0,0 +1,42 @@ +#ifndef __ALPHA_SETUP_H +#define __ALPHA_SETUP_H + +#define COMMAND_LINE_SIZE 256 + +/* + * We leave one page for the initial stack page, and one page for + * the initial process structure. Also, the console eats 3 MB for + * the initial bootloader (one of which we can reclaim later). + */ +#define BOOT_PCB 0x20000000 +#define BOOT_ADDR 0x20000000 +/* Remove when official MILO sources have ELF support: */ +#define BOOT_SIZE (16*1024) + +#ifdef CONFIG_ALPHA_LEGACY_START_ADDRESS +#define KERNEL_START_PHYS 0x300000 /* Old bootloaders hardcoded this. */ +#else +#define KERNEL_START_PHYS 0x1000000 /* required: Wildfire/Titan/Marvel */ +#endif + +#define KERNEL_START (PAGE_OFFSET+KERNEL_START_PHYS) +#define SWAPPER_PGD KERNEL_START +#define INIT_STACK (PAGE_OFFSET+KERNEL_START_PHYS+0x02000) +#define EMPTY_PGT (PAGE_OFFSET+KERNEL_START_PHYS+0x04000) +#define EMPTY_PGE (PAGE_OFFSET+KERNEL_START_PHYS+0x08000) +#define ZERO_PGE (PAGE_OFFSET+KERNEL_START_PHYS+0x0A000) + +#define START_ADDR (PAGE_OFFSET+KERNEL_START_PHYS+0x10000) + +/* + * This is setup by the secondary bootstrap loader. Because + * the zero page is zeroed out as soon as the vm system is + * initialized, we need to copy things out into a more permanent + * place. + */ +#define PARAM ZERO_PGE +#define COMMAND_LINE ((char*)(PARAM + 0x0000)) +#define INITRD_START (*(unsigned long *) (PARAM+0x100)) +#define INITRD_SIZE (*(unsigned long *) (PARAM+0x108)) + +#endif diff --git a/arch/alpha/include/asm/sfp-machine.h b/arch/alpha/include/asm/sfp-machine.h new file mode 100644 index 00000000..5fe63afb --- /dev/null +++ b/arch/alpha/include/asm/sfp-machine.h @@ -0,0 +1,82 @@ +/* Machine-dependent software floating-point definitions. + Alpha kernel version. + Copyright (C) 1997,1998,1999 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com), + Jakub Jelinek (jakub@redhat.com) and + David S. Miller (davem@redhat.com). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, write to the Free Software Foundation, Inc., + 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#ifndef _SFP_MACHINE_H +#define _SFP_MACHINE_H + +#define _FP_W_TYPE_SIZE 64 +#define _FP_W_TYPE unsigned long +#define _FP_WS_TYPE signed long +#define _FP_I_TYPE long + +#define _FP_MUL_MEAT_S(R,X,Y) \ + _FP_MUL_MEAT_1_imm(_FP_WFRACBITS_S,R,X,Y) +#define _FP_MUL_MEAT_D(R,X,Y) \ + _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_Q(R,X,Y) \ + _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) + +#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_imm(S,R,X,Y,_FP_DIV_HELP_imm) +#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_1_udiv(D,R,X,Y) +#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_2_udiv(Q,R,X,Y) + +#define _FP_NANFRAC_S _FP_QNANBIT_S +#define _FP_NANFRAC_D _FP_QNANBIT_D +#define _FP_NANFRAC_Q _FP_QNANBIT_Q +#define _FP_NANSIGN_S 1 +#define _FP_NANSIGN_D 1 +#define _FP_NANSIGN_Q 1 + +#define _FP_KEEPNANFRACP 1 + +/* Alpha Architecture Handbook, 4.7.10.4 sais that + * we should prefer any type of NaN in Fb, then Fa. + */ +#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ + do { \ + R##_s = Y##_s; \ + _FP_FRAC_COPY_##wc(R,X); \ + R##_c = FP_CLS_NAN; \ + } while (0) + +/* Obtain the current rounding mode. */ +#define FP_ROUNDMODE mode +#define FP_RND_NEAREST (FPCR_DYN_NORMAL >> FPCR_DYN_SHIFT) +#define FP_RND_ZERO (FPCR_DYN_CHOPPED >> FPCR_DYN_SHIFT) +#define FP_RND_PINF (FPCR_DYN_PLUS >> FPCR_DYN_SHIFT) +#define FP_RND_MINF (FPCR_DYN_MINUS >> FPCR_DYN_SHIFT) + +/* Exception flags. */ +#define FP_EX_INVALID IEEE_TRAP_ENABLE_INV +#define FP_EX_OVERFLOW IEEE_TRAP_ENABLE_OVF +#define FP_EX_UNDERFLOW IEEE_TRAP_ENABLE_UNF +#define FP_EX_DIVZERO IEEE_TRAP_ENABLE_DZE +#define FP_EX_INEXACT IEEE_TRAP_ENABLE_INE +#define FP_EX_DENORM IEEE_TRAP_ENABLE_DNO + +#define FP_DENORM_ZERO (swcr & IEEE_MAP_DMZ) + +/* We write the results always */ +#define FP_INHIBIT_RESULTS 0 + +#endif diff --git a/arch/alpha/include/asm/shmbuf.h b/arch/alpha/include/asm/shmbuf.h new file mode 100644 index 00000000..37ee84f0 --- /dev/null +++ b/arch/alpha/include/asm/shmbuf.h @@ -0,0 +1,38 @@ +#ifndef _ALPHA_SHMBUF_H +#define _ALPHA_SHMBUF_H + +/* + * The shmid64_ds structure for alpha architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 2 miscellaneous 64-bit values + */ + +struct shmid64_ds { + struct ipc64_perm shm_perm; /* operation perms */ + size_t shm_segsz; /* size of segment (bytes) */ + __kernel_time_t shm_atime; /* last attach time */ + __kernel_time_t shm_dtime; /* last detach time */ + __kernel_time_t shm_ctime; /* last change time */ + __kernel_pid_t shm_cpid; /* pid of creator */ + __kernel_pid_t shm_lpid; /* pid of last operator */ + unsigned long shm_nattch; /* no. of current attaches */ + unsigned long __unused1; + unsigned long __unused2; +}; + +struct shminfo64 { + unsigned long shmmax; + unsigned long shmmin; + unsigned long shmmni; + unsigned long shmseg; + unsigned long shmall; + unsigned long __unused1; + unsigned long __unused2; + unsigned long __unused3; + unsigned long __unused4; +}; + +#endif /* _ALPHA_SHMBUF_H */ diff --git a/arch/alpha/include/asm/shmparam.h b/arch/alpha/include/asm/shmparam.h new file mode 100644 index 00000000..cc901d58 --- /dev/null +++ b/arch/alpha/include/asm/shmparam.h @@ -0,0 +1,6 @@ +#ifndef _ASMAXP_SHMPARAM_H +#define _ASMAXP_SHMPARAM_H + +#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */ + +#endif /* _ASMAXP_SHMPARAM_H */ diff --git a/arch/alpha/include/asm/sigcontext.h b/arch/alpha/include/asm/sigcontext.h new file mode 100644 index 00000000..323cdb02 --- /dev/null +++ b/arch/alpha/include/asm/sigcontext.h @@ -0,0 +1,34 @@ +#ifndef _ASMAXP_SIGCONTEXT_H +#define _ASMAXP_SIGCONTEXT_H + +struct sigcontext { + /* + * What should we have here? I'd probably better use the same + * stack layout as OSF/1, just in case we ever want to try + * running their binaries.. + * + * This is the basic layout, but I don't know if we'll ever + * actually fill in all the values.. + */ + long sc_onstack; + long sc_mask; + long sc_pc; + long sc_ps; + long sc_regs[32]; + long sc_ownedfp; + long sc_fpregs[32]; + unsigned long sc_fpcr; + unsigned long sc_fp_control; + unsigned long sc_reserved1, sc_reserved2; + unsigned long sc_ssize; + char * sc_sbase; + unsigned long sc_traparg_a0; + unsigned long sc_traparg_a1; + unsigned long sc_traparg_a2; + unsigned long sc_fp_trap_pc; + unsigned long sc_fp_trigger_sum; + unsigned long sc_fp_trigger_inst; +}; + + +#endif diff --git a/arch/alpha/include/asm/siginfo.h b/arch/alpha/include/asm/siginfo.h new file mode 100644 index 00000000..9822362a --- /dev/null +++ b/arch/alpha/include/asm/siginfo.h @@ -0,0 +1,9 @@ +#ifndef _ALPHA_SIGINFO_H +#define _ALPHA_SIGINFO_H + +#define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) +#define __ARCH_SI_TRAPNO + +#include <asm-generic/siginfo.h> + +#endif diff --git a/arch/alpha/include/asm/signal.h b/arch/alpha/include/asm/signal.h new file mode 100644 index 00000000..a9388300 --- /dev/null +++ b/arch/alpha/include/asm/signal.h @@ -0,0 +1,172 @@ +#ifndef _ASMAXP_SIGNAL_H +#define _ASMAXP_SIGNAL_H + +#include <linux/types.h> + +/* Avoid too many header ordering problems. */ +struct siginfo; + +#ifdef __KERNEL__ +/* Digital Unix defines 64 signals. Most things should be clean enough + to redefine this at will, if care is taken to make libc match. */ + +#define _NSIG 64 +#define _NSIG_BPW 64 +#define _NSIG_WORDS (_NSIG / _NSIG_BPW) + +typedef unsigned long old_sigset_t; /* at least 32 bits */ + +typedef struct { + unsigned long sig[_NSIG_WORDS]; +} sigset_t; + +#else +/* Here we must cater to libcs that poke about in kernel headers. */ + +#define NSIG 32 +typedef unsigned long sigset_t; + +#endif /* __KERNEL__ */ + + +/* + * Linux/AXP has different signal numbers that Linux/i386: I'm trying + * to make it OSF/1 binary compatible, at least for normal binaries. + */ +#define SIGHUP 1 +#define SIGINT 2 +#define SIGQUIT 3 +#define SIGILL 4 +#define SIGTRAP 5 +#define SIGABRT 6 +#define SIGEMT 7 +#define SIGFPE 8 +#define SIGKILL 9 +#define SIGBUS 10 +#define SIGSEGV 11 +#define SIGSYS 12 +#define SIGPIPE 13 +#define SIGALRM 14 +#define SIGTERM 15 +#define SIGURG 16 +#define SIGSTOP 17 +#define SIGTSTP 18 +#define SIGCONT 19 +#define SIGCHLD 20 +#define SIGTTIN 21 +#define SIGTTOU 22 +#define SIGIO 23 +#define SIGXCPU 24 +#define SIGXFSZ 25 +#define SIGVTALRM 26 +#define SIGPROF 27 +#define SIGWINCH 28 +#define SIGINFO 29 +#define SIGUSR1 30 +#define SIGUSR2 31 + +#define SIGPOLL SIGIO +#define SIGPWR SIGINFO +#define SIGIOT SIGABRT + +/* These should not be considered constants from userland. */ +#define SIGRTMIN 32 +#define SIGRTMAX _NSIG + +/* + * SA_FLAGS values: + * + * SA_ONSTACK indicates that a registered stack_t will be used. + * SA_RESTART flag to get restarting signals (which were the default long ago) + * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop. + * SA_RESETHAND clears the handler when the signal is delivered. + * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies. + * SA_NODEFER prevents the current signal from being masked in the handler. + * + * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single + * Unix names RESETHAND and NODEFER respectively. + */ + +#define SA_ONSTACK 0x00000001 +#define SA_RESTART 0x00000002 +#define SA_NOCLDSTOP 0x00000004 +#define SA_NODEFER 0x00000008 +#define SA_RESETHAND 0x00000010 +#define SA_NOCLDWAIT 0x00000020 +#define SA_SIGINFO 0x00000040 + +#define SA_ONESHOT SA_RESETHAND +#define SA_NOMASK SA_NODEFER + +/* + * sigaltstack controls + */ +#define SS_ONSTACK 1 +#define SS_DISABLE 2 + +#define MINSIGSTKSZ 4096 +#define SIGSTKSZ 16384 + +#define SIG_BLOCK 1 /* for blocking signals */ +#define SIG_UNBLOCK 2 /* for unblocking signals */ +#define SIG_SETMASK 3 /* for setting the signal mask */ + +#include <asm-generic/signal-defs.h> + +#ifdef __KERNEL__ +struct osf_sigaction { + __sighandler_t sa_handler; + old_sigset_t sa_mask; + int sa_flags; +}; + +struct sigaction { + __sighandler_t sa_handler; + unsigned long sa_flags; + sigset_t sa_mask; /* mask last for extensibility */ +}; + +struct k_sigaction { + struct sigaction sa; + __sigrestore_t ka_restorer; +}; +#else +/* Here we must cater to libcs that poke about in kernel headers. */ + +struct sigaction { + union { + __sighandler_t _sa_handler; + void (*_sa_sigaction)(int, struct siginfo *, void *); + } _u; + sigset_t sa_mask; + int sa_flags; +}; + +#define sa_handler _u._sa_handler +#define sa_sigaction _u._sa_sigaction + +#endif /* __KERNEL__ */ + +typedef struct sigaltstack { + void __user *ss_sp; + int ss_flags; + size_t ss_size; +} stack_t; + +/* sigstack(2) is deprecated, and will be withdrawn in a future version + of the X/Open CAE Specification. Use sigaltstack instead. It is only + implemented here for OSF/1 compatibility. */ + +struct sigstack { + void __user *ss_sp; + int ss_onstack; +}; + +#ifdef __KERNEL__ +#include <asm/sigcontext.h> + +#define ptrace_signal_deliver(regs, cookie) do { } while (0) + +#endif + +#endif diff --git a/arch/alpha/include/asm/smp.h b/arch/alpha/include/asm/smp.h new file mode 100644 index 00000000..c46e714a --- /dev/null +++ b/arch/alpha/include/asm/smp.h @@ -0,0 +1,59 @@ +#ifndef __ASM_SMP_H +#define __ASM_SMP_H + +#include <linux/threads.h> +#include <linux/cpumask.h> +#include <linux/bitops.h> +#include <asm/pal.h> + +/* HACK: Cabrio WHAMI return value is bogus if more than 8 bits used.. :-( */ + +static __inline__ unsigned char +__hard_smp_processor_id(void) +{ + register unsigned char __r0 __asm__("$0"); + __asm__ __volatile__( + "call_pal %1 #whami" + : "=r"(__r0) + :"i" (PAL_whami) + : "$1", "$22", "$23", "$24", "$25"); + return __r0; +} + +#ifdef CONFIG_SMP + +#include <asm/irq.h> + +struct cpuinfo_alpha { + unsigned long loops_per_jiffy; + unsigned long last_asn; + int need_new_asn; + int asn_lock; + unsigned long ipi_count; + unsigned long prof_multiplier; + unsigned long prof_counter; + unsigned char mcheck_expected; + unsigned char mcheck_taken; + unsigned char mcheck_extra; +} __attribute__((aligned(64))); + +extern struct cpuinfo_alpha cpu_data[NR_CPUS]; + +#define hard_smp_processor_id() __hard_smp_processor_id() +#define raw_smp_processor_id() (current_thread_info()->cpu) + +extern int smp_num_cpus; + +extern void arch_send_call_function_single_ipi(int cpu); +extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); + +#else /* CONFIG_SMP */ + +#define hard_smp_processor_id() 0 +#define smp_call_function_on_cpu(func,info,wait,cpu) ({ 0; }) + +#endif /* CONFIG_SMP */ + +#define NO_PROC_ID (-1) + +#endif diff --git a/arch/alpha/include/asm/socket.h b/arch/alpha/include/asm/socket.h new file mode 100644 index 00000000..dcb221a4 --- /dev/null +++ b/arch/alpha/include/asm/socket.h @@ -0,0 +1,84 @@ +#ifndef _ASM_SOCKET_H +#define _ASM_SOCKET_H + +#include <asm/sockios.h> + +/* For setsockopt(2) */ +/* + * Note: we only bother about making the SOL_SOCKET options + * same as OSF/1, as that's all that "normal" programs are + * likely to set. We don't necessarily want to be binary + * compatible with _everything_. + */ +#define SOL_SOCKET 0xffff + +#define SO_DEBUG 0x0001 +#define SO_REUSEADDR 0x0004 +#define SO_KEEPALIVE 0x0008 +#define SO_DONTROUTE 0x0010 +#define SO_BROADCAST 0x0020 +#define SO_LINGER 0x0080 +#define SO_OOBINLINE 0x0100 +/* To add :#define SO_REUSEPORT 0x0200 */ + +#define SO_TYPE 0x1008 +#define SO_ERROR 0x1007 +#define SO_SNDBUF 0x1001 +#define SO_RCVBUF 0x1002 +#define SO_SNDBUFFORCE 0x100a +#define SO_RCVBUFFORCE 0x100b +#define SO_RCVLOWAT 0x1010 +#define SO_SNDLOWAT 0x1011 +#define SO_RCVTIMEO 0x1012 +#define SO_SNDTIMEO 0x1013 +#define SO_ACCEPTCONN 0x1014 +#define SO_PROTOCOL 0x1028 +#define SO_DOMAIN 0x1029 + +/* linux-specific, might as well be the same as on i386 */ +#define SO_NO_CHECK 11 +#define SO_PRIORITY 12 +#define SO_BSDCOMPAT 14 + +#define SO_PASSCRED 17 +#define SO_PEERCRED 18 +#define SO_BINDTODEVICE 25 + +/* Socket filtering */ +#define SO_ATTACH_FILTER 26 +#define SO_DETACH_FILTER 27 + +#define SO_PEERNAME 28 +#define SO_TIMESTAMP 29 +#define SCM_TIMESTAMP SO_TIMESTAMP + +#define SO_PEERSEC 30 +#define SO_PASSSEC 34 +#define SO_TIMESTAMPNS 35 +#define SCM_TIMESTAMPNS SO_TIMESTAMPNS + +/* Security levels - as per NRL IPv6 - don't actually do anything */ +#define SO_SECURITY_AUTHENTICATION 19 +#define SO_SECURITY_ENCRYPTION_TRANSPORT 20 +#define SO_SECURITY_ENCRYPTION_NETWORK 21 + +#define SO_MARK 36 + +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + +#define SO_RXQ_OVFL 40 + +#define SO_WIFI_STATUS 41 +#define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 + +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + +/* O_NONBLOCK clashes with the bits used for socket types. Therefore we + * have to define SOCK_NONBLOCK to a different value here. + */ +#define SOCK_NONBLOCK 0x40000000 + +#endif /* _ASM_SOCKET_H */ diff --git a/arch/alpha/include/asm/sockios.h b/arch/alpha/include/asm/sockios.h new file mode 100644 index 00000000..7932c7ab --- /dev/null +++ b/arch/alpha/include/asm/sockios.h @@ -0,0 +1,16 @@ +#ifndef _ASM_ALPHA_SOCKIOS_H +#define _ASM_ALPHA_SOCKIOS_H + +/* Socket-level I/O control calls. */ + +#define FIOGETOWN _IOR('f', 123, int) +#define FIOSETOWN _IOW('f', 124, int) + +#define SIOCATMARK _IOR('s', 7, int) +#define SIOCSPGRP _IOW('s', 8, pid_t) +#define SIOCGPGRP _IOR('s', 9, pid_t) + +#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */ +#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */ + +#endif /* _ASM_ALPHA_SOCKIOS_H */ diff --git a/arch/alpha/include/asm/special_insns.h b/arch/alpha/include/asm/special_insns.h new file mode 100644 index 00000000..88d3452b --- /dev/null +++ b/arch/alpha/include/asm/special_insns.h @@ -0,0 +1,41 @@ +#ifndef __ALPHA_SPECIAL_INSNS_H +#define __ALPHA_SPECIAL_INSNS_H + +enum implver_enum { + IMPLVER_EV4, + IMPLVER_EV5, + IMPLVER_EV6 +}; + +#ifdef CONFIG_ALPHA_GENERIC +#define implver() \ +({ unsigned long __implver; \ + __asm__ ("implver %0" : "=r"(__implver)); \ + (enum implver_enum) __implver; }) +#else +/* Try to eliminate some dead code. */ +#ifdef CONFIG_ALPHA_EV4 +#define implver() IMPLVER_EV4 +#endif +#ifdef CONFIG_ALPHA_EV5 +#define implver() IMPLVER_EV5 +#endif +#if defined(CONFIG_ALPHA_EV6) +#define implver() IMPLVER_EV6 +#endif +#endif + +enum amask_enum { + AMASK_BWX = (1UL << 0), + AMASK_FIX = (1UL << 1), + AMASK_CIX = (1UL << 2), + AMASK_MAX = (1UL << 8), + AMASK_PRECISE_TRAP = (1UL << 9), +}; + +#define amask(mask) \ +({ unsigned long __amask, __input = (mask); \ + __asm__ ("amask %1,%0" : "=r"(__amask) : "rI"(__input)); \ + __amask; }) + +#endif /* __ALPHA_SPECIAL_INSNS_H */ diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h new file mode 100644 index 00000000..3bba21e4 --- /dev/null +++ b/arch/alpha/include/asm/spinlock.h @@ -0,0 +1,175 @@ +#ifndef _ALPHA_SPINLOCK_H +#define _ALPHA_SPINLOCK_H + +#include <linux/kernel.h> +#include <asm/current.h> + +/* + * Simple spin lock operations. There are two variants, one clears IRQ's + * on the local processor, one does not. + * + * We make no fairness assumptions. They have a cost. + */ + +#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) +#define arch_spin_is_locked(x) ((x)->lock != 0) +#define arch_spin_unlock_wait(x) \ + do { cpu_relax(); } while ((x)->lock) + +static inline void arch_spin_unlock(arch_spinlock_t * lock) +{ + mb(); + lock->lock = 0; +} + +static inline void arch_spin_lock(arch_spinlock_t * lock) +{ + long tmp; + + __asm__ __volatile__( + "1: ldl_l %0,%1\n" + " bne %0,2f\n" + " lda %0,1\n" + " stl_c %0,%1\n" + " beq %0,2f\n" + " mb\n" + ".subsection 2\n" + "2: ldl %0,%1\n" + " bne %0,2b\n" + " br 1b\n" + ".previous" + : "=&r" (tmp), "=m" (lock->lock) + : "m"(lock->lock) : "memory"); +} + +static inline int arch_spin_trylock(arch_spinlock_t *lock) +{ + return !test_and_set_bit(0, &lock->lock); +} + +/***********************************************************/ + +static inline int arch_read_can_lock(arch_rwlock_t *lock) +{ + return (lock->lock & 1) == 0; +} + +static inline int arch_write_can_lock(arch_rwlock_t *lock) +{ + return lock->lock == 0; +} + +static inline void arch_read_lock(arch_rwlock_t *lock) +{ + long regx; + + __asm__ __volatile__( + "1: ldl_l %1,%0\n" + " blbs %1,6f\n" + " subl %1,2,%1\n" + " stl_c %1,%0\n" + " beq %1,6f\n" + " mb\n" + ".subsection 2\n" + "6: ldl %1,%0\n" + " blbs %1,6b\n" + " br 1b\n" + ".previous" + : "=m" (*lock), "=&r" (regx) + : "m" (*lock) : "memory"); +} + +static inline void arch_write_lock(arch_rwlock_t *lock) +{ + long regx; + + __asm__ __volatile__( + "1: ldl_l %1,%0\n" + " bne %1,6f\n" + " lda %1,1\n" + " stl_c %1,%0\n" + " beq %1,6f\n" + " mb\n" + ".subsection 2\n" + "6: ldl %1,%0\n" + " bne %1,6b\n" + " br 1b\n" + ".previous" + : "=m" (*lock), "=&r" (regx) + : "m" (*lock) : "memory"); +} + +static inline int arch_read_trylock(arch_rwlock_t * lock) +{ + long regx; + int success; + + __asm__ __volatile__( + "1: ldl_l %1,%0\n" + " lda %2,0\n" + " blbs %1,2f\n" + " subl %1,2,%2\n" + " stl_c %2,%0\n" + " beq %2,6f\n" + "2: mb\n" + ".subsection 2\n" + "6: br 1b\n" + ".previous" + : "=m" (*lock), "=&r" (regx), "=&r" (success) + : "m" (*lock) : "memory"); + + return success; +} + +static inline int arch_write_trylock(arch_rwlock_t * lock) +{ + long regx; + int success; + + __asm__ __volatile__( + "1: ldl_l %1,%0\n" + " lda %2,0\n" + " bne %1,2f\n" + " lda %2,1\n" + " stl_c %2,%0\n" + " beq %2,6f\n" + "2: mb\n" + ".subsection 2\n" + "6: br 1b\n" + ".previous" + : "=m" (*lock), "=&r" (regx), "=&r" (success) + : "m" (*lock) : "memory"); + + return success; +} + +static inline void arch_read_unlock(arch_rwlock_t * lock) +{ + long regx; + __asm__ __volatile__( + " mb\n" + "1: ldl_l %1,%0\n" + " addl %1,2,%1\n" + " stl_c %1,%0\n" + " beq %1,6f\n" + ".subsection 2\n" + "6: br 1b\n" + ".previous" + : "=m" (*lock), "=&r" (regx) + : "m" (*lock) : "memory"); +} + +static inline void arch_write_unlock(arch_rwlock_t * lock) +{ + mb(); + lock->lock = 0; +} + +#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) +#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) + +#define arch_spin_relax(lock) cpu_relax() +#define arch_read_relax(lock) cpu_relax() +#define arch_write_relax(lock) cpu_relax() + +#endif /* _ALPHA_SPINLOCK_H */ diff --git a/arch/alpha/include/asm/spinlock_types.h b/arch/alpha/include/asm/spinlock_types.h new file mode 100644 index 00000000..54c2afce --- /dev/null +++ b/arch/alpha/include/asm/spinlock_types.h @@ -0,0 +1,20 @@ +#ifndef _ALPHA_SPINLOCK_TYPES_H +#define _ALPHA_SPINLOCK_TYPES_H + +#ifndef __LINUX_SPINLOCK_TYPES_H +# error "please don't include this file directly" +#endif + +typedef struct { + volatile unsigned int lock; +} arch_spinlock_t; + +#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } + +typedef struct { + volatile unsigned int lock; +} arch_rwlock_t; + +#define __ARCH_RW_LOCK_UNLOCKED { 0 } + +#endif diff --git a/arch/alpha/include/asm/stat.h b/arch/alpha/include/asm/stat.h new file mode 100644 index 00000000..07ad3e6b --- /dev/null +++ b/arch/alpha/include/asm/stat.h @@ -0,0 +1,48 @@ +#ifndef _ALPHA_STAT_H +#define _ALPHA_STAT_H + +struct stat { + unsigned int st_dev; + unsigned int st_ino; + unsigned int st_mode; + unsigned int st_nlink; + unsigned int st_uid; + unsigned int st_gid; + unsigned int st_rdev; + long st_size; + unsigned long st_atime; + unsigned long st_mtime; + unsigned long st_ctime; + unsigned int st_blksize; + unsigned int st_blocks; + unsigned int st_flags; + unsigned int st_gen; +}; + +/* The stat64 structure increases the size of dev_t, blkcnt_t, adds + nanosecond resolution times, and padding for expansion. */ + +struct stat64 { + unsigned long st_dev; + unsigned long st_ino; + unsigned long st_rdev; + long st_size; + unsigned long st_blocks; + + unsigned int st_mode; + unsigned int st_uid; + unsigned int st_gid; + unsigned int st_blksize; + unsigned int st_nlink; + unsigned int __pad0; + + unsigned long st_atime; + unsigned long st_atime_nsec; + unsigned long st_mtime; + unsigned long st_mtime_nsec; + unsigned long st_ctime; + unsigned long st_ctime_nsec; + long __unused[3]; +}; + +#endif diff --git a/arch/alpha/include/asm/statfs.h b/arch/alpha/include/asm/statfs.h new file mode 100644 index 00000000..ccd2e186 --- /dev/null +++ b/arch/alpha/include/asm/statfs.h @@ -0,0 +1,12 @@ +#ifndef _ALPHA_STATFS_H +#define _ALPHA_STATFS_H + +#include <linux/types.h> + +/* Alpha is the only 64-bit platform with 32-bit statfs. And doesn't + even seem to implement statfs64 */ +#define __statfs_word __u32 + +#include <asm-generic/statfs.h> + +#endif diff --git a/arch/alpha/include/asm/string.h b/arch/alpha/include/asm/string.h new file mode 100644 index 00000000..b02b8a28 --- /dev/null +++ b/arch/alpha/include/asm/string.h @@ -0,0 +1,66 @@ +#ifndef __ALPHA_STRING_H__ +#define __ALPHA_STRING_H__ + +#ifdef __KERNEL__ + +/* + * GCC of any recent vintage doesn't do stupid things with bcopy. + * EGCS 1.1 knows all about expanding memcpy inline, others don't. + * + * Similarly for a memset with data = 0. + */ + +#define __HAVE_ARCH_MEMCPY +extern void * memcpy(void *, const void *, size_t); +#define __HAVE_ARCH_MEMMOVE +extern void * memmove(void *, const void *, size_t); + +/* For backward compatibility with modules. Unused otherwise. */ +extern void * __memcpy(void *, const void *, size_t); + +#define memcpy __builtin_memcpy + +#define __HAVE_ARCH_MEMSET +extern void * __constant_c_memset(void *, unsigned long, size_t); +extern void * __memset(void *, int, size_t); +extern void * memset(void *, int, size_t); + +#define memset(s, c, n) \ +(__builtin_constant_p(c) \ + ? (__builtin_constant_p(n) && (c) == 0 \ + ? __builtin_memset((s),0,(n)) \ + : __constant_c_memset((s),0x0101010101010101UL*(unsigned char)(c),(n))) \ + : __memset((s),(c),(n))) + +#define __HAVE_ARCH_STRCPY +extern char * strcpy(char *,const char *); +#define __HAVE_ARCH_STRNCPY +extern char * strncpy(char *, const char *, size_t); +#define __HAVE_ARCH_STRCAT +extern char * strcat(char *, const char *); +#define __HAVE_ARCH_STRNCAT +extern char * strncat(char *, const char *, size_t); +#define __HAVE_ARCH_STRCHR +extern char * strchr(const char *,int); +#define __HAVE_ARCH_STRRCHR +extern char * strrchr(const char *,int); +#define __HAVE_ARCH_STRLEN +extern size_t strlen(const char *); +#define __HAVE_ARCH_MEMCHR +extern void * memchr(const void *, int, size_t); + +/* The following routine is like memset except that it writes 16-bit + aligned values. The DEST and COUNT parameters must be even for + correct operation. */ + +#define __HAVE_ARCH_MEMSETW +extern void * __memsetw(void *dest, unsigned short, size_t count); + +#define memsetw(s, c, n) \ +(__builtin_constant_p(c) \ + ? __constant_c_memset((s),0x0001000100010001UL*(unsigned short)(c),(n)) \ + : __memsetw((s),(c),(n))) + +#endif /* __KERNEL__ */ + +#endif /* __ALPHA_STRING_H__ */ diff --git a/arch/alpha/include/asm/swab.h b/arch/alpha/include/asm/swab.h new file mode 100644 index 00000000..4d682b16 --- /dev/null +++ b/arch/alpha/include/asm/swab.h @@ -0,0 +1,42 @@ +#ifndef _ALPHA_SWAB_H +#define _ALPHA_SWAB_H + +#include <linux/types.h> +#include <linux/compiler.h> +#include <asm/compiler.h> + +#ifdef __GNUC__ + +static inline __attribute_const__ __u32 __arch_swab32(__u32 x) +{ + /* + * Unfortunately, we can't use the 6 instruction sequence + * on ev6 since the latency of the UNPKBW is 3, which is + * pretty hard to hide. Just in case a future implementation + * has a lower latency, here's the sequence (also by Mike Burrows) + * + * UNPKBW a0, v0 v0: 00AA00BB00CC00DD + * SLL v0, 24, a0 a0: BB00CC00DD000000 + * BIS v0, a0, a0 a0: BBAACCBBDDCC00DD + * EXTWL a0, 6, v0 v0: 000000000000BBAA + * ZAP a0, 0xf3, a0 a0: 00000000DDCC0000 + * ADDL a0, v0, v0 v0: ssssssssDDCCBBAA + */ + + __u64 t0, t1, t2, t3; + + t0 = __kernel_inslh(x, 7); /* t0 : 0000000000AABBCC */ + t1 = __kernel_inswl(x, 3); /* t1 : 000000CCDD000000 */ + t1 |= t0; /* t1 : 000000CCDDAABBCC */ + t2 = t1 >> 16; /* t2 : 0000000000CCDDAA */ + t0 = t1 & 0xFF00FF00; /* t0 : 00000000DD00BB00 */ + t3 = t2 & 0x00FF00FF; /* t3 : 0000000000CC00AA */ + t1 = t0 + t3; /* t1 : ssssssssDDCCBBAA */ + + return t1; +} +#define __arch_swab32 __arch_swab32 + +#endif /* __GNUC__ */ + +#endif /* _ALPHA_SWAB_H */ diff --git a/arch/alpha/include/asm/switch_to.h b/arch/alpha/include/asm/switch_to.h new file mode 100644 index 00000000..44c0d4f2 --- /dev/null +++ b/arch/alpha/include/asm/switch_to.h @@ -0,0 +1,14 @@ +#ifndef __ALPHA_SWITCH_TO_H +#define __ALPHA_SWITCH_TO_H + + +struct task_struct; +extern struct task_struct *alpha_switch_to(unsigned long, struct task_struct *); + +#define switch_to(P,N,L) \ + do { \ + (L) = alpha_switch_to(virt_to_phys(&task_thread_info(N)->pcb), (P)); \ + check_mmu_context(); \ + } while (0) + +#endif /* __ALPHA_SWITCH_TO_H */ diff --git a/arch/alpha/include/asm/sysinfo.h b/arch/alpha/include/asm/sysinfo.h new file mode 100644 index 00000000..e77d77cd --- /dev/null +++ b/arch/alpha/include/asm/sysinfo.h @@ -0,0 +1,30 @@ +/* + * include/asm-alpha/sysinfo.h + */ + +#ifndef __ASM_ALPHA_SYSINFO_H +#define __ASM_ALPHA_SYSINFO_H + +/* This defines the subset of the OSF/1 getsysinfo/setsysinfo calls + that we support. */ + +#define GSI_UACPROC 8 +#define GSI_IEEE_FP_CONTROL 45 +#define GSI_IEEE_STATE_AT_SIGNAL 46 +#define GSI_PROC_TYPE 60 +#define GSI_GET_HWRPB 101 + +#define SSI_NVPAIRS 1 +#define SSI_IEEE_FP_CONTROL 14 +#define SSI_IEEE_STATE_AT_SIGNAL 15 +#define SSI_IEEE_IGNORE_STATE_AT_SIGNAL 16 +#define SSI_IEEE_RAISE_EXCEPTION 1001 /* linux specific */ + +#define SSIN_UACPROC 6 + +#define UAC_BITMASK 7 +#define UAC_NOPRINT 1 +#define UAC_NOFIX 2 +#define UAC_SIGBUS 4 + +#endif /* __ASM_ALPHA_SYSINFO_H */ diff --git a/arch/alpha/include/asm/termbits.h b/arch/alpha/include/asm/termbits.h new file mode 100644 index 00000000..879dd358 --- /dev/null +++ b/arch/alpha/include/asm/termbits.h @@ -0,0 +1,201 @@ +#ifndef _ALPHA_TERMBITS_H +#define _ALPHA_TERMBITS_H + +#include <linux/posix_types.h> + +typedef unsigned char cc_t; +typedef unsigned int speed_t; +typedef unsigned int tcflag_t; + +/* + * termios type and macro definitions. Be careful about adding stuff + * to this file since it's used in GNU libc and there are strict rules + * concerning namespace pollution. + */ + +#define NCCS 19 +struct termios { + tcflag_t c_iflag; /* input mode flags */ + tcflag_t c_oflag; /* output mode flags */ + tcflag_t c_cflag; /* control mode flags */ + tcflag_t c_lflag; /* local mode flags */ + cc_t c_cc[NCCS]; /* control characters */ + cc_t c_line; /* line discipline (== c_cc[19]) */ + speed_t c_ispeed; /* input speed */ + speed_t c_ospeed; /* output speed */ +}; + +/* Alpha has matching termios and ktermios */ + +struct ktermios { + tcflag_t c_iflag; /* input mode flags */ + tcflag_t c_oflag; /* output mode flags */ + tcflag_t c_cflag; /* control mode flags */ + tcflag_t c_lflag; /* local mode flags */ + cc_t c_cc[NCCS]; /* control characters */ + cc_t c_line; /* line discipline (== c_cc[19]) */ + speed_t c_ispeed; /* input speed */ + speed_t c_ospeed; /* output speed */ +}; + +/* c_cc characters */ +#define VEOF 0 +#define VEOL 1 +#define VEOL2 2 +#define VERASE 3 +#define VWERASE 4 +#define VKILL 5 +#define VREPRINT 6 +#define VSWTC 7 +#define VINTR 8 +#define VQUIT 9 +#define VSUSP 10 +#define VSTART 12 +#define VSTOP 13 +#define VLNEXT 14 +#define VDISCARD 15 +#define VMIN 16 +#define VTIME 17 + +/* c_iflag bits */ +#define IGNBRK 0000001 +#define BRKINT 0000002 +#define IGNPAR 0000004 +#define PARMRK 0000010 +#define INPCK 0000020 +#define ISTRIP 0000040 +#define INLCR 0000100 +#define IGNCR 0000200 +#define ICRNL 0000400 +#define IXON 0001000 +#define IXOFF 0002000 +#define IXANY 0004000 +#define IUCLC 0010000 +#define IMAXBEL 0020000 +#define IUTF8 0040000 + +/* c_oflag bits */ +#define OPOST 0000001 +#define ONLCR 0000002 +#define OLCUC 0000004 + +#define OCRNL 0000010 +#define ONOCR 0000020 +#define ONLRET 0000040 + +#define OFILL 00000100 +#define OFDEL 00000200 +#define NLDLY 00001400 +#define NL0 00000000 +#define NL1 00000400 +#define NL2 00001000 +#define NL3 00001400 +#define TABDLY 00006000 +#define TAB0 00000000 +#define TAB1 00002000 +#define TAB2 00004000 +#define TAB3 00006000 +#define CRDLY 00030000 +#define CR0 00000000 +#define CR1 00010000 +#define CR2 00020000 +#define CR3 00030000 +#define FFDLY 00040000 +#define FF0 00000000 +#define FF1 00040000 +#define BSDLY 00100000 +#define BS0 00000000 +#define BS1 00100000 +#define VTDLY 00200000 +#define VT0 00000000 +#define VT1 00200000 +#define XTABS 01000000 /* Hmm.. Linux/i386 considers this part of TABDLY.. */ + +/* c_cflag bit meaning */ +#define CBAUD 0000037 +#define B0 0000000 /* hang up */ +#define B50 0000001 +#define B75 0000002 +#define B110 0000003 +#define B134 0000004 +#define B150 0000005 +#define B200 0000006 +#define B300 0000007 +#define B600 0000010 +#define B1200 0000011 +#define B1800 0000012 +#define B2400 0000013 +#define B4800 0000014 +#define B9600 0000015 +#define B19200 0000016 +#define B38400 0000017 +#define EXTA B19200 +#define EXTB B38400 +#define CBAUDEX 0000000 +#define B57600 00020 +#define B115200 00021 +#define B230400 00022 +#define B460800 00023 +#define B500000 00024 +#define B576000 00025 +#define B921600 00026 +#define B1000000 00027 +#define B1152000 00030 +#define B1500000 00031 +#define B2000000 00032 +#define B2500000 00033 +#define B3000000 00034 +#define B3500000 00035 +#define B4000000 00036 + +#define CSIZE 00001400 +#define CS5 00000000 +#define CS6 00000400 +#define CS7 00001000 +#define CS8 00001400 + +#define CSTOPB 00002000 +#define CREAD 00004000 +#define PARENB 00010000 +#define PARODD 00020000 +#define HUPCL 00040000 + +#define CLOCAL 00100000 +#define CMSPAR 010000000000 /* mark or space (stick) parity */ +#define CRTSCTS 020000000000 /* flow control */ + +/* c_lflag bits */ +#define ISIG 0x00000080 +#define ICANON 0x00000100 +#define XCASE 0x00004000 +#define ECHO 0x00000008 +#define ECHOE 0x00000002 +#define ECHOK 0x00000004 +#define ECHONL 0x00000010 +#define NOFLSH 0x80000000 +#define TOSTOP 0x00400000 +#define ECHOCTL 0x00000040 +#define ECHOPRT 0x00000020 +#define ECHOKE 0x00000001 +#define FLUSHO 0x00800000 +#define PENDIN 0x20000000 +#define IEXTEN 0x00000400 +#define EXTPROC 0x10000000 + +/* Values for the ACTION argument to `tcflow'. */ +#define TCOOFF 0 +#define TCOON 1 +#define TCIOFF 2 +#define TCION 3 + +/* Values for the QUEUE_SELECTOR argument to `tcflush'. */ +#define TCIFLUSH 0 +#define TCOFLUSH 1 +#define TCIOFLUSH 2 + +/* Values for the OPTIONAL_ACTIONS argument to `tcsetattr'. */ +#define TCSANOW 0 +#define TCSADRAIN 1 +#define TCSAFLUSH 2 + +#endif /* _ALPHA_TERMBITS_H */ diff --git a/arch/alpha/include/asm/termios.h b/arch/alpha/include/asm/termios.h new file mode 100644 index 00000000..fa13716a --- /dev/null +++ b/arch/alpha/include/asm/termios.h @@ -0,0 +1,146 @@ +#ifndef _ALPHA_TERMIOS_H +#define _ALPHA_TERMIOS_H + +#include <asm/ioctls.h> +#include <asm/termbits.h> + +struct sgttyb { + char sg_ispeed; + char sg_ospeed; + char sg_erase; + char sg_kill; + short sg_flags; +}; + +struct tchars { + char t_intrc; + char t_quitc; + char t_startc; + char t_stopc; + char t_eofc; + char t_brkc; +}; + +struct ltchars { + char t_suspc; + char t_dsuspc; + char t_rprntc; + char t_flushc; + char t_werasc; + char t_lnextc; +}; + +struct winsize { + unsigned short ws_row; + unsigned short ws_col; + unsigned short ws_xpixel; + unsigned short ws_ypixel; +}; + +#define NCC 8 +struct termio { + unsigned short c_iflag; /* input mode flags */ + unsigned short c_oflag; /* output mode flags */ + unsigned short c_cflag; /* control mode flags */ + unsigned short c_lflag; /* local mode flags */ + unsigned char c_line; /* line discipline */ + unsigned char c_cc[NCC]; /* control characters */ +}; + +/* + * c_cc characters in the termio structure. Oh, how I love being + * backwardly compatible. Notice that character 4 and 5 are + * interpreted differently depending on whether ICANON is set in + * c_lflag. If it's set, they are used as _VEOF and _VEOL, otherwise + * as _VMIN and V_TIME. This is for compatibility with OSF/1 (which + * is compatible with sysV)... + */ +#define _VINTR 0 +#define _VQUIT 1 +#define _VERASE 2 +#define _VKILL 3 +#define _VEOF 4 +#define _VMIN 4 +#define _VEOL 5 +#define _VTIME 5 +#define _VEOL2 6 +#define _VSWTC 7 + +#ifdef __KERNEL__ +/* eof=^D eol=\0 eol2=\0 erase=del + werase=^W kill=^U reprint=^R sxtc=\0 + intr=^C quit=^\ susp=^Z <OSF/1 VDSUSP> + start=^Q stop=^S lnext=^V discard=^U + vmin=\1 vtime=\0 +*/ +#define INIT_C_CC "\004\000\000\177\027\025\022\000\003\034\032\000\021\023\026\025\001\000" + +/* + * Translate a "termio" structure into a "termios". Ugh. + */ + +#define user_termio_to_kernel_termios(a_termios, u_termio) \ +({ \ + struct ktermios *k_termios = (a_termios); \ + struct termio k_termio; \ + int canon, ret; \ + \ + ret = copy_from_user(&k_termio, u_termio, sizeof(k_termio)); \ + if (!ret) { \ + /* Overwrite only the low bits. */ \ + *(unsigned short *)&k_termios->c_iflag = k_termio.c_iflag; \ + *(unsigned short *)&k_termios->c_oflag = k_termio.c_oflag; \ + *(unsigned short *)&k_termios->c_cflag = k_termio.c_cflag; \ + *(unsigned short *)&k_termios->c_lflag = k_termio.c_lflag; \ + canon = k_termio.c_lflag & ICANON; \ + \ + k_termios->c_cc[VINTR] = k_termio.c_cc[_VINTR]; \ + k_termios->c_cc[VQUIT] = k_termio.c_cc[_VQUIT]; \ + k_termios->c_cc[VERASE] = k_termio.c_cc[_VERASE]; \ + k_termios->c_cc[VKILL] = k_termio.c_cc[_VKILL]; \ + k_termios->c_cc[VEOL2] = k_termio.c_cc[_VEOL2]; \ + k_termios->c_cc[VSWTC] = k_termio.c_cc[_VSWTC]; \ + k_termios->c_cc[canon ? VEOF : VMIN] = k_termio.c_cc[_VEOF]; \ + k_termios->c_cc[canon ? VEOL : VTIME] = k_termio.c_cc[_VEOL]; \ + } \ + ret; \ +}) + +/* + * Translate a "termios" structure into a "termio". Ugh. + * + * Note the "fun" _VMIN overloading. + */ +#define kernel_termios_to_user_termio(u_termio, a_termios) \ +({ \ + struct ktermios *k_termios = (a_termios); \ + struct termio k_termio; \ + int canon; \ + \ + k_termio.c_iflag = k_termios->c_iflag; \ + k_termio.c_oflag = k_termios->c_oflag; \ + k_termio.c_cflag = k_termios->c_cflag; \ + canon = (k_termio.c_lflag = k_termios->c_lflag) & ICANON; \ + \ + k_termio.c_line = k_termios->c_line; \ + k_termio.c_cc[_VINTR] = k_termios->c_cc[VINTR]; \ + k_termio.c_cc[_VQUIT] = k_termios->c_cc[VQUIT]; \ + k_termio.c_cc[_VERASE] = k_termios->c_cc[VERASE]; \ + k_termio.c_cc[_VKILL] = k_termios->c_cc[VKILL]; \ + k_termio.c_cc[_VEOF] = k_termios->c_cc[canon ? VEOF : VMIN]; \ + k_termio.c_cc[_VEOL] = k_termios->c_cc[canon ? VEOL : VTIME]; \ + k_termio.c_cc[_VEOL2] = k_termios->c_cc[VEOL2]; \ + k_termio.c_cc[_VSWTC] = k_termios->c_cc[VSWTC]; \ + \ + copy_to_user(u_termio, &k_termio, sizeof(k_termio)); \ +}) + +#define user_termios_to_kernel_termios(k, u) \ + copy_from_user(k, u, sizeof(struct termios)) + +#define kernel_termios_to_user_termios(u, k) \ + copy_to_user(u, k, sizeof(struct termios)) + +#endif /* __KERNEL__ */ + +#endif /* _ALPHA_TERMIOS_H */ diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h new file mode 100644 index 00000000..28335bd4 --- /dev/null +++ b/arch/alpha/include/asm/thread_info.h @@ -0,0 +1,121 @@ +#ifndef _ALPHA_THREAD_INFO_H +#define _ALPHA_THREAD_INFO_H + +#ifdef __KERNEL__ + +#ifndef __ASSEMBLY__ +#include <asm/processor.h> +#include <asm/types.h> +#include <asm/hwrpb.h> +#endif + +#ifndef __ASSEMBLY__ +struct thread_info { + struct pcb_struct pcb; /* palcode state */ + + struct task_struct *task; /* main task structure */ + unsigned int flags; /* low level flags */ + unsigned int ieee_state; /* see fpu.h */ + + struct exec_domain *exec_domain; /* execution domain */ + mm_segment_t addr_limit; /* thread address space */ + unsigned cpu; /* current CPU */ + int preempt_count; /* 0 => preemptable, <0 => BUG */ + + int bpt_nsaved; + unsigned long bpt_addr[2]; /* breakpoint handling */ + unsigned int bpt_insn[2]; + + struct restart_block restart_block; +}; + +/* + * Macros/functions for gaining access to the thread information structure. + */ +#define INIT_THREAD_INFO(tsk) \ +{ \ + .task = &tsk, \ + .exec_domain = &default_exec_domain, \ + .addr_limit = KERNEL_DS, \ + .preempt_count = INIT_PREEMPT_COUNT, \ + .restart_block = { \ + .fn = do_no_restart_syscall, \ + }, \ +} + +#define init_thread_info (init_thread_union.thread_info) +#define init_stack (init_thread_union.stack) + +/* How to get the thread information struct from C. */ +register struct thread_info *__current_thread_info __asm__("$8"); +#define current_thread_info() __current_thread_info + +#endif /* __ASSEMBLY__ */ + +/* Thread information allocation. */ +#define THREAD_SIZE_ORDER 1 +#define THREAD_SIZE (2*PAGE_SIZE) + +#define PREEMPT_ACTIVE 0x40000000 + +/* + * Thread information flags: + * - these are process state flags and used from assembly + * - pending work-to-be-done flags come first and must be assigned to be + * within bits 0 to 7 to fit in and immediate operand. + * - ALPHA_UAC_SHIFT below must be kept consistent with the unaligned + * control flags. + * + * TIF_SYSCALL_TRACE is known to be 0 via blbs. + */ +#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ +#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ +#define TIF_SIGPENDING 2 /* signal pending */ +#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ +#define TIF_POLLING_NRFLAG 8 /* poll_idle is polling NEED_RESCHED */ +#define TIF_DIE_IF_KERNEL 9 /* dik recursion lock */ +#define TIF_UAC_NOPRINT 10 /* ! Preserve sequence of following */ +#define TIF_UAC_NOFIX 11 /* ! flags as they match */ +#define TIF_UAC_SIGBUS 12 /* ! userspace part of 'osf_sysinfo' */ +#define TIF_MEMDIE 13 /* is terminating due to OOM killer */ +#define TIF_RESTORE_SIGMASK 14 /* restore signal mask in do_signal */ + +#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) +#define _TIF_SIGPENDING (1<<TIF_SIGPENDING) +#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) +#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) +#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) +#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) + +/* Work to do on interrupt/exception return. */ +#define _TIF_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ + _TIF_NOTIFY_RESUME) + +/* Work to do on any return to userspace. */ +#define _TIF_ALLWORK_MASK (_TIF_WORK_MASK \ + | _TIF_SYSCALL_TRACE) + +#define ALPHA_UAC_SHIFT TIF_UAC_NOPRINT +#define ALPHA_UAC_MASK (1 << TIF_UAC_NOPRINT | 1 << TIF_UAC_NOFIX | \ + 1 << TIF_UAC_SIGBUS) + +#define SET_UNALIGN_CTL(task,value) ({ \ + task_thread_info(task)->flags = ((task_thread_info(task)->flags & \ + ~ALPHA_UAC_MASK) \ + | (((value) << ALPHA_UAC_SHIFT) & (1<<TIF_UAC_NOPRINT))\ + | (((value) << (ALPHA_UAC_SHIFT + 1)) & (1<<TIF_UAC_SIGBUS)) \ + | (((value) << (ALPHA_UAC_SHIFT - 1)) & (1<<TIF_UAC_NOFIX)));\ + 0; }) + +#define GET_UNALIGN_CTL(task,value) ({ \ + put_user((task_thread_info(task)->flags & (1 << TIF_UAC_NOPRINT))\ + >> ALPHA_UAC_SHIFT \ + | (task_thread_info(task)->flags & (1 << TIF_UAC_SIGBUS))\ + >> (ALPHA_UAC_SHIFT + 1) \ + | (task_thread_info(task)->flags & (1 << TIF_UAC_NOFIX))\ + >> (ALPHA_UAC_SHIFT - 1), \ + (int __user *)(value)); \ + }) + +#endif /* __KERNEL__ */ +#endif /* _ALPHA_THREAD_INFO_H */ diff --git a/arch/alpha/include/asm/timex.h b/arch/alpha/include/asm/timex.h new file mode 100644 index 00000000..afa0c45e --- /dev/null +++ b/arch/alpha/include/asm/timex.h @@ -0,0 +1,31 @@ +/* + * linux/include/asm-alpha/timex.h + * + * ALPHA architecture timex specifications + */ +#ifndef _ASMALPHA_TIMEX_H +#define _ASMALPHA_TIMEX_H + +/* With only one or two oddballs, we use the RTC as the ticker, selecting + the 32.768kHz reference clock, which nicely divides down to our HZ. */ +#define CLOCK_TICK_RATE 32768 + +/* + * Standard way to access the cycle counter. + * Currently only used on SMP for scheduling. + * + * Only the low 32 bits are available as a continuously counting entity. + * But this only means we'll force a reschedule every 8 seconds or so, + * which isn't an evil thing. + */ + +typedef unsigned int cycles_t; + +static inline cycles_t get_cycles (void) +{ + cycles_t ret; + __asm__ __volatile__ ("rpcc %0" : "=r"(ret)); + return ret; +} + +#endif diff --git a/arch/alpha/include/asm/tlb.h b/arch/alpha/include/asm/tlb.h new file mode 100644 index 00000000..42866759 --- /dev/null +++ b/arch/alpha/include/asm/tlb.h @@ -0,0 +1,15 @@ +#ifndef _ALPHA_TLB_H +#define _ALPHA_TLB_H + +#define tlb_start_vma(tlb, vma) do { } while (0) +#define tlb_end_vma(tlb, vma) do { } while (0) +#define __tlb_remove_tlb_entry(tlb, pte, addr) do { } while (0) + +#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) + +#include <asm-generic/tlb.h> + +#define __pte_free_tlb(tlb, pte, address) pte_free((tlb)->mm, pte) +#define __pmd_free_tlb(tlb, pmd, address) pmd_free((tlb)->mm, pmd) + +#endif diff --git a/arch/alpha/include/asm/tlbflush.h b/arch/alpha/include/asm/tlbflush.h new file mode 100644 index 00000000..e89e0c2e --- /dev/null +++ b/arch/alpha/include/asm/tlbflush.h @@ -0,0 +1,152 @@ +#ifndef _ALPHA_TLBFLUSH_H +#define _ALPHA_TLBFLUSH_H + +#include <linux/mm.h> +#include <linux/sched.h> +#include <asm/compiler.h> +#include <asm/pgalloc.h> + +#ifndef __EXTERN_INLINE +#define __EXTERN_INLINE extern inline +#define __MMU_EXTERN_INLINE +#endif + +extern void __load_new_mm_context(struct mm_struct *); + + +/* Use a few helper functions to hide the ugly broken ASN + numbers on early Alphas (ev4 and ev45). */ + +__EXTERN_INLINE void +ev4_flush_tlb_current(struct mm_struct *mm) +{ + __load_new_mm_context(mm); + tbiap(); +} + +__EXTERN_INLINE void +ev5_flush_tlb_current(struct mm_struct *mm) +{ + __load_new_mm_context(mm); +} + +/* Flush just one page in the current TLB set. We need to be very + careful about the icache here, there is no way to invalidate a + specific icache page. */ + +__EXTERN_INLINE void +ev4_flush_tlb_current_page(struct mm_struct * mm, + struct vm_area_struct *vma, + unsigned long addr) +{ + int tbi_flag = 2; + if (vma->vm_flags & VM_EXEC) { + __load_new_mm_context(mm); + tbi_flag = 3; + } + tbi(tbi_flag, addr); +} + +__EXTERN_INLINE void +ev5_flush_tlb_current_page(struct mm_struct * mm, + struct vm_area_struct *vma, + unsigned long addr) +{ + if (vma->vm_flags & VM_EXEC) + __load_new_mm_context(mm); + else + tbi(2, addr); +} + + +#ifdef CONFIG_ALPHA_GENERIC +# define flush_tlb_current alpha_mv.mv_flush_tlb_current +# define flush_tlb_current_page alpha_mv.mv_flush_tlb_current_page +#else +# ifdef CONFIG_ALPHA_EV4 +# define flush_tlb_current ev4_flush_tlb_current +# define flush_tlb_current_page ev4_flush_tlb_current_page +# else +# define flush_tlb_current ev5_flush_tlb_current +# define flush_tlb_current_page ev5_flush_tlb_current_page +# endif +#endif + +#ifdef __MMU_EXTERN_INLINE +#undef __EXTERN_INLINE +#undef __MMU_EXTERN_INLINE +#endif + +/* Flush current user mapping. */ +static inline void +flush_tlb(void) +{ + flush_tlb_current(current->active_mm); +} + +/* Flush someone else's user mapping. */ +static inline void +flush_tlb_other(struct mm_struct *mm) +{ + unsigned long *mmc = &mm->context[smp_processor_id()]; + /* Check it's not zero first to avoid cacheline ping pong + when possible. */ + if (*mmc) *mmc = 0; +} + +#ifndef CONFIG_SMP +/* Flush everything (kernel mapping may also have changed + due to vmalloc/vfree). */ +static inline void flush_tlb_all(void) +{ + tbia(); +} + +/* Flush a specified user mapping. */ +static inline void +flush_tlb_mm(struct mm_struct *mm) +{ + if (mm == current->active_mm) + flush_tlb_current(mm); + else + flush_tlb_other(mm); +} + +/* Page-granular tlb flush. */ +static inline void +flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) +{ + struct mm_struct *mm = vma->vm_mm; + + if (mm == current->active_mm) + flush_tlb_current_page(mm, vma, addr); + else + flush_tlb_other(mm); +} + +/* Flush a specified range of user mapping. On the Alpha we flush + the whole user tlb. */ +static inline void +flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + flush_tlb_mm(vma->vm_mm); +} + +#else /* CONFIG_SMP */ + +extern void flush_tlb_all(void); +extern void flush_tlb_mm(struct mm_struct *); +extern void flush_tlb_page(struct vm_area_struct *, unsigned long); +extern void flush_tlb_range(struct vm_area_struct *, unsigned long, + unsigned long); + +#endif /* CONFIG_SMP */ + +static inline void flush_tlb_kernel_range(unsigned long start, + unsigned long end) +{ + flush_tlb_all(); +} + +#endif /* _ALPHA_TLBFLUSH_H */ diff --git a/arch/alpha/include/asm/topology.h b/arch/alpha/include/asm/topology.h new file mode 100644 index 00000000..9251e13e --- /dev/null +++ b/arch/alpha/include/asm/topology.h @@ -0,0 +1,49 @@ +#ifndef _ASM_ALPHA_TOPOLOGY_H +#define _ASM_ALPHA_TOPOLOGY_H + +#include <linux/smp.h> +#include <linux/threads.h> +#include <asm/machvec.h> + +#ifdef CONFIG_NUMA +static inline int cpu_to_node(int cpu) +{ + int node; + + if (!alpha_mv.cpuid_to_nid) + return 0; + + node = alpha_mv.cpuid_to_nid(cpu); + +#ifdef DEBUG_NUMA + BUG_ON(node < 0); +#endif + + return node; +} + +extern struct cpumask node_to_cpumask_map[]; +/* FIXME: This is dumb, recalculating every time. But simple. */ +static const struct cpumask *cpumask_of_node(int node) +{ + int cpu; + + if (node == -1) + return cpu_all_mask; + + cpumask_clear(&node_to_cpumask_map[node]); + + for_each_online_cpu(cpu) { + if (cpu_to_node(cpu) == node) + cpumask_set_cpu(cpu, node_to_cpumask_map[node]); + } + + return &node_to_cpumask_map[node]; +} + +#define cpumask_of_pcibus(bus) (cpu_online_mask) + +#endif /* !CONFIG_NUMA */ +# include <asm-generic/topology.h> + +#endif /* _ASM_ALPHA_TOPOLOGY_H */ diff --git a/arch/alpha/include/asm/types.h b/arch/alpha/include/asm/types.h new file mode 100644 index 00000000..0a057907 --- /dev/null +++ b/arch/alpha/include/asm/types.h @@ -0,0 +1,18 @@ +#ifndef _ALPHA_TYPES_H +#define _ALPHA_TYPES_H + +/* + * This file is never included by application software unless + * explicitly requested (e.g., via linux/types.h) in which case the + * application is Linux specific so (user-) name space pollution is + * not a major issue. However, for interoperability, libraries still + * need to be careful to avoid a name clashes. + */ + +#ifdef __KERNEL__ +#include <asm-generic/int-ll64.h> +#else +#include <asm-generic/int-l64.h> +#endif + +#endif /* _ALPHA_TYPES_H */ diff --git a/arch/alpha/include/asm/uaccess.h b/arch/alpha/include/asm/uaccess.h new file mode 100644 index 00000000..b49ec2f8 --- /dev/null +++ b/arch/alpha/include/asm/uaccess.h @@ -0,0 +1,513 @@ +#ifndef __ALPHA_UACCESS_H +#define __ALPHA_UACCESS_H + +#include <linux/errno.h> +#include <linux/sched.h> + + +/* + * The fs value determines whether argument validity checking should be + * performed or not. If get_fs() == USER_DS, checking is performed, with + * get_fs() == KERNEL_DS, checking is bypassed. + * + * Or at least it did once upon a time. Nowadays it is a mask that + * defines which bits of the address space are off limits. This is a + * wee bit faster than the above. + * + * For historical reasons, these macros are grossly misnamed. + */ + +#define KERNEL_DS ((mm_segment_t) { 0UL }) +#define USER_DS ((mm_segment_t) { -0x40000000000UL }) + +#define VERIFY_READ 0 +#define VERIFY_WRITE 1 + +#define get_fs() (current_thread_info()->addr_limit) +#define get_ds() (KERNEL_DS) +#define set_fs(x) (current_thread_info()->addr_limit = (x)) + +#define segment_eq(a,b) ((a).seg == (b).seg) + +/* + * Is a address valid? This does a straightforward calculation rather + * than tests. + * + * Address valid if: + * - "addr" doesn't have any high-bits set + * - AND "size" doesn't have any high-bits set + * - AND "addr+size" doesn't have any high-bits set + * - OR we are in kernel mode. + */ +#define __access_ok(addr,size,segment) \ + (((segment).seg & (addr | size | (addr+size))) == 0) + +#define access_ok(type,addr,size) \ +({ \ + __chk_user_ptr(addr); \ + __access_ok(((unsigned long)(addr)),(size),get_fs()); \ +}) + +/* + * These are the main single-value transfer routines. They automatically + * use the right size if we just have the right pointer type. + * + * As the alpha uses the same address space for kernel and user + * data, we can just do these as direct assignments. (Of course, the + * exception handling means that it's no longer "just"...) + * + * Careful to not + * (a) re-use the arguments for side effects (sizeof/typeof is ok) + * (b) require any knowledge of processes at this stage + */ +#define put_user(x,ptr) \ + __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)),get_fs()) +#define get_user(x,ptr) \ + __get_user_check((x),(ptr),sizeof(*(ptr)),get_fs()) + +/* + * The "__xxx" versions do not do address space checking, useful when + * doing multiple accesses to the same area (the programmer has to do the + * checks by hand with "access_ok()") + */ +#define __put_user(x,ptr) \ + __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr))) +#define __get_user(x,ptr) \ + __get_user_nocheck((x),(ptr),sizeof(*(ptr))) + +/* + * The "lda %1, 2b-1b(%0)" bits are magic to get the assembler to + * encode the bits we need for resolving the exception. See the + * more extensive comments with fixup_inline_exception below for + * more information. + */ + +extern void __get_user_unknown(void); + +#define __get_user_nocheck(x,ptr,size) \ +({ \ + long __gu_err = 0; \ + unsigned long __gu_val; \ + __chk_user_ptr(ptr); \ + switch (size) { \ + case 1: __get_user_8(ptr); break; \ + case 2: __get_user_16(ptr); break; \ + case 4: __get_user_32(ptr); break; \ + case 8: __get_user_64(ptr); break; \ + default: __get_user_unknown(); break; \ + } \ + (x) = (__typeof__(*(ptr))) __gu_val; \ + __gu_err; \ +}) + +#define __get_user_check(x,ptr,size,segment) \ +({ \ + long __gu_err = -EFAULT; \ + unsigned long __gu_val = 0; \ + const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ + if (__access_ok((unsigned long)__gu_addr,size,segment)) { \ + __gu_err = 0; \ + switch (size) { \ + case 1: __get_user_8(__gu_addr); break; \ + case 2: __get_user_16(__gu_addr); break; \ + case 4: __get_user_32(__gu_addr); break; \ + case 8: __get_user_64(__gu_addr); break; \ + default: __get_user_unknown(); break; \ + } \ + } \ + (x) = (__typeof__(*(ptr))) __gu_val; \ + __gu_err; \ +}) + +struct __large_struct { unsigned long buf[100]; }; +#define __m(x) (*(struct __large_struct __user *)(x)) + +#define __get_user_64(addr) \ + __asm__("1: ldq %0,%2\n" \ + "2:\n" \ + ".section __ex_table,\"a\"\n" \ + " .long 1b - .\n" \ + " lda %0, 2b-1b(%1)\n" \ + ".previous" \ + : "=r"(__gu_val), "=r"(__gu_err) \ + : "m"(__m(addr)), "1"(__gu_err)) + +#define __get_user_32(addr) \ + __asm__("1: ldl %0,%2\n" \ + "2:\n" \ + ".section __ex_table,\"a\"\n" \ + " .long 1b - .\n" \ + " lda %0, 2b-1b(%1)\n" \ + ".previous" \ + : "=r"(__gu_val), "=r"(__gu_err) \ + : "m"(__m(addr)), "1"(__gu_err)) + +#ifdef __alpha_bwx__ +/* Those lucky bastards with ev56 and later CPUs can do byte/word moves. */ + +#define __get_user_16(addr) \ + __asm__("1: ldwu %0,%2\n" \ + "2:\n" \ + ".section __ex_table,\"a\"\n" \ + " .long 1b - .\n" \ + " lda %0, 2b-1b(%1)\n" \ + ".previous" \ + : "=r"(__gu_val), "=r"(__gu_err) \ + : "m"(__m(addr)), "1"(__gu_err)) + +#define __get_user_8(addr) \ + __asm__("1: ldbu %0,%2\n" \ + "2:\n" \ + ".section __ex_table,\"a\"\n" \ + " .long 1b - .\n" \ + " lda %0, 2b-1b(%1)\n" \ + ".previous" \ + : "=r"(__gu_val), "=r"(__gu_err) \ + : "m"(__m(addr)), "1"(__gu_err)) +#else +/* Unfortunately, we can't get an unaligned access trap for the sub-word + load, so we have to do a general unaligned operation. */ + +#define __get_user_16(addr) \ +{ \ + long __gu_tmp; \ + __asm__("1: ldq_u %0,0(%3)\n" \ + "2: ldq_u %1,1(%3)\n" \ + " extwl %0,%3,%0\n" \ + " extwh %1,%3,%1\n" \ + " or %0,%1,%0\n" \ + "3:\n" \ + ".section __ex_table,\"a\"\n" \ + " .long 1b - .\n" \ + " lda %0, 3b-1b(%2)\n" \ + " .long 2b - .\n" \ + " lda %0, 3b-2b(%2)\n" \ + ".previous" \ + : "=&r"(__gu_val), "=&r"(__gu_tmp), "=r"(__gu_err) \ + : "r"(addr), "2"(__gu_err)); \ +} + +#define __get_user_8(addr) \ + __asm__("1: ldq_u %0,0(%2)\n" \ + " extbl %0,%2,%0\n" \ + "2:\n" \ + ".section __ex_table,\"a\"\n" \ + " .long 1b - .\n" \ + " lda %0, 2b-1b(%1)\n" \ + ".previous" \ + : "=&r"(__gu_val), "=r"(__gu_err) \ + : "r"(addr), "1"(__gu_err)) +#endif + +extern void __put_user_unknown(void); + +#define __put_user_nocheck(x,ptr,size) \ +({ \ + long __pu_err = 0; \ + __chk_user_ptr(ptr); \ + switch (size) { \ + case 1: __put_user_8(x,ptr); break; \ + case 2: __put_user_16(x,ptr); break; \ + case 4: __put_user_32(x,ptr); break; \ + case 8: __put_user_64(x,ptr); break; \ + default: __put_user_unknown(); break; \ + } \ + __pu_err; \ +}) + +#define __put_user_check(x,ptr,size,segment) \ +({ \ + long __pu_err = -EFAULT; \ + __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ + if (__access_ok((unsigned long)__pu_addr,size,segment)) { \ + __pu_err = 0; \ + switch (size) { \ + case 1: __put_user_8(x,__pu_addr); break; \ + case 2: __put_user_16(x,__pu_addr); break; \ + case 4: __put_user_32(x,__pu_addr); break; \ + case 8: __put_user_64(x,__pu_addr); break; \ + default: __put_user_unknown(); break; \ + } \ + } \ + __pu_err; \ +}) + +/* + * The "__put_user_xx()" macros tell gcc they read from memory + * instead of writing: this is because they do not write to + * any memory gcc knows about, so there are no aliasing issues + */ +#define __put_user_64(x,addr) \ +__asm__ __volatile__("1: stq %r2,%1\n" \ + "2:\n" \ + ".section __ex_table,\"a\"\n" \ + " .long 1b - .\n" \ + " lda $31,2b-1b(%0)\n" \ + ".previous" \ + : "=r"(__pu_err) \ + : "m" (__m(addr)), "rJ" (x), "0"(__pu_err)) + +#define __put_user_32(x,addr) \ +__asm__ __volatile__("1: stl %r2,%1\n" \ + "2:\n" \ + ".section __ex_table,\"a\"\n" \ + " .long 1b - .\n" \ + " lda $31,2b-1b(%0)\n" \ + ".previous" \ + : "=r"(__pu_err) \ + : "m"(__m(addr)), "rJ"(x), "0"(__pu_err)) + +#ifdef __alpha_bwx__ +/* Those lucky bastards with ev56 and later CPUs can do byte/word moves. */ + +#define __put_user_16(x,addr) \ +__asm__ __volatile__("1: stw %r2,%1\n" \ + "2:\n" \ + ".section __ex_table,\"a\"\n" \ + " .long 1b - .\n" \ + " lda $31,2b-1b(%0)\n" \ + ".previous" \ + : "=r"(__pu_err) \ + : "m"(__m(addr)), "rJ"(x), "0"(__pu_err)) + +#define __put_user_8(x,addr) \ +__asm__ __volatile__("1: stb %r2,%1\n" \ + "2:\n" \ + ".section __ex_table,\"a\"\n" \ + " .long 1b - .\n" \ + " lda $31,2b-1b(%0)\n" \ + ".previous" \ + : "=r"(__pu_err) \ + : "m"(__m(addr)), "rJ"(x), "0"(__pu_err)) +#else +/* Unfortunately, we can't get an unaligned access trap for the sub-word + write, so we have to do a general unaligned operation. */ + +#define __put_user_16(x,addr) \ +{ \ + long __pu_tmp1, __pu_tmp2, __pu_tmp3, __pu_tmp4; \ + __asm__ __volatile__( \ + "1: ldq_u %2,1(%5)\n" \ + "2: ldq_u %1,0(%5)\n" \ + " inswh %6,%5,%4\n" \ + " inswl %6,%5,%3\n" \ + " mskwh %2,%5,%2\n" \ + " mskwl %1,%5,%1\n" \ + " or %2,%4,%2\n" \ + " or %1,%3,%1\n" \ + "3: stq_u %2,1(%5)\n" \ + "4: stq_u %1,0(%5)\n" \ + "5:\n" \ + ".section __ex_table,\"a\"\n" \ + " .long 1b - .\n" \ + " lda $31, 5b-1b(%0)\n" \ + " .long 2b - .\n" \ + " lda $31, 5b-2b(%0)\n" \ + " .long 3b - .\n" \ + " lda $31, 5b-3b(%0)\n" \ + " .long 4b - .\n" \ + " lda $31, 5b-4b(%0)\n" \ + ".previous" \ + : "=r"(__pu_err), "=&r"(__pu_tmp1), \ + "=&r"(__pu_tmp2), "=&r"(__pu_tmp3), \ + "=&r"(__pu_tmp4) \ + : "r"(addr), "r"((unsigned long)(x)), "0"(__pu_err)); \ +} + +#define __put_user_8(x,addr) \ +{ \ + long __pu_tmp1, __pu_tmp2; \ + __asm__ __volatile__( \ + "1: ldq_u %1,0(%4)\n" \ + " insbl %3,%4,%2\n" \ + " mskbl %1,%4,%1\n" \ + " or %1,%2,%1\n" \ + "2: stq_u %1,0(%4)\n" \ + "3:\n" \ + ".section __ex_table,\"a\"\n" \ + " .long 1b - .\n" \ + " lda $31, 3b-1b(%0)\n" \ + " .long 2b - .\n" \ + " lda $31, 3b-2b(%0)\n" \ + ".previous" \ + : "=r"(__pu_err), \ + "=&r"(__pu_tmp1), "=&r"(__pu_tmp2) \ + : "r"((unsigned long)(x)), "r"(addr), "0"(__pu_err)); \ +} +#endif + + +/* + * Complex access routines + */ + +/* This little bit of silliness is to get the GP loaded for a function + that ordinarily wouldn't. Otherwise we could have it done by the macro + directly, which can be optimized the linker. */ +#ifdef MODULE +#define __module_address(sym) "r"(sym), +#define __module_call(ra, arg, sym) "jsr $" #ra ",(%" #arg ")," #sym +#else +#define __module_address(sym) +#define __module_call(ra, arg, sym) "bsr $" #ra "," #sym " !samegp" +#endif + +extern void __copy_user(void); + +extern inline long +__copy_tofrom_user_nocheck(void *to, const void *from, long len) +{ + register void * __cu_to __asm__("$6") = to; + register const void * __cu_from __asm__("$7") = from; + register long __cu_len __asm__("$0") = len; + + __asm__ __volatile__( + __module_call(28, 3, __copy_user) + : "=r" (__cu_len), "=r" (__cu_from), "=r" (__cu_to) + : __module_address(__copy_user) + "0" (__cu_len), "1" (__cu_from), "2" (__cu_to) + : "$1","$2","$3","$4","$5","$28","memory"); + + return __cu_len; +} + +extern inline long +__copy_tofrom_user(void *to, const void *from, long len, const void __user *validate) +{ + if (__access_ok((unsigned long)validate, len, get_fs())) + len = __copy_tofrom_user_nocheck(to, from, len); + return len; +} + +#define __copy_to_user(to,from,n) \ +({ \ + __chk_user_ptr(to); \ + __copy_tofrom_user_nocheck((__force void *)(to),(from),(n)); \ +}) +#define __copy_from_user(to,from,n) \ +({ \ + __chk_user_ptr(from); \ + __copy_tofrom_user_nocheck((to),(__force void *)(from),(n)); \ +}) + +#define __copy_to_user_inatomic __copy_to_user +#define __copy_from_user_inatomic __copy_from_user + + +extern inline long +copy_to_user(void __user *to, const void *from, long n) +{ + return __copy_tofrom_user((__force void *)to, from, n, to); +} + +extern inline long +copy_from_user(void *to, const void __user *from, long n) +{ + return __copy_tofrom_user(to, (__force void *)from, n, from); +} + +extern void __do_clear_user(void); + +extern inline long +__clear_user(void __user *to, long len) +{ + register void __user * __cl_to __asm__("$6") = to; + register long __cl_len __asm__("$0") = len; + __asm__ __volatile__( + __module_call(28, 2, __do_clear_user) + : "=r"(__cl_len), "=r"(__cl_to) + : __module_address(__do_clear_user) + "0"(__cl_len), "1"(__cl_to) + : "$1","$2","$3","$4","$5","$28","memory"); + return __cl_len; +} + +extern inline long +clear_user(void __user *to, long len) +{ + if (__access_ok((unsigned long)to, len, get_fs())) + len = __clear_user(to, len); + return len; +} + +#undef __module_address +#undef __module_call + +/* Returns: -EFAULT if exception before terminator, N if the entire + buffer filled, else strlen. */ + +extern long __strncpy_from_user(char *__to, const char __user *__from, long __to_len); + +extern inline long +strncpy_from_user(char *to, const char __user *from, long n) +{ + long ret = -EFAULT; + if (__access_ok((unsigned long)from, 0, get_fs())) + ret = __strncpy_from_user(to, from, n); + return ret; +} + +/* Returns: 0 if bad, string length+1 (memory size) of string if ok */ +extern long __strlen_user(const char __user *); + +extern inline long strlen_user(const char __user *str) +{ + return access_ok(VERIFY_READ,str,0) ? __strlen_user(str) : 0; +} + +/* Returns: 0 if exception before NUL or reaching the supplied limit (N), + * a value greater than N if the limit would be exceeded, else strlen. */ +extern long __strnlen_user(const char __user *, long); + +extern inline long strnlen_user(const char __user *str, long n) +{ + return access_ok(VERIFY_READ,str,0) ? __strnlen_user(str, n) : 0; +} + +/* + * About the exception table: + * + * - insn is a 32-bit pc-relative offset from the faulting insn. + * - nextinsn is a 16-bit offset off of the faulting instruction + * (not off of the *next* instruction as branches are). + * - errreg is the register in which to place -EFAULT. + * - valreg is the final target register for the load sequence + * and will be zeroed. + * + * Either errreg or valreg may be $31, in which case nothing happens. + * + * The exception fixup information "just so happens" to be arranged + * as in a MEM format instruction. This lets us emit our three + * values like so: + * + * lda valreg, nextinsn(errreg) + * + */ + +struct exception_table_entry +{ + signed int insn; + union exception_fixup { + unsigned unit; + struct { + signed int nextinsn : 16; + unsigned int errreg : 5; + unsigned int valreg : 5; + } bits; + } fixup; +}; + +/* Returns the new pc */ +#define fixup_exception(map_reg, _fixup, pc) \ +({ \ + if ((_fixup)->fixup.bits.valreg != 31) \ + map_reg((_fixup)->fixup.bits.valreg) = 0; \ + if ((_fixup)->fixup.bits.errreg != 31) \ + map_reg((_fixup)->fixup.bits.errreg) = -EFAULT; \ + (pc) + (_fixup)->fixup.bits.nextinsn; \ +}) + +#define ARCH_HAS_SORT_EXTABLE +#define ARCH_HAS_SEARCH_EXTABLE + +#endif /* __ALPHA_UACCESS_H */ diff --git a/arch/alpha/include/asm/ucontext.h b/arch/alpha/include/asm/ucontext.h new file mode 100644 index 00000000..47578ab4 --- /dev/null +++ b/arch/alpha/include/asm/ucontext.h @@ -0,0 +1,13 @@ +#ifndef _ASMAXP_UCONTEXT_H +#define _ASMAXP_UCONTEXT_H + +struct ucontext { + unsigned long uc_flags; + struct ucontext *uc_link; + old_sigset_t uc_osf_sigmask; + stack_t uc_stack; + struct sigcontext uc_mcontext; + sigset_t uc_sigmask; /* mask last for extensibility */ +}; + +#endif /* !_ASMAXP_UCONTEXT_H */ diff --git a/arch/alpha/include/asm/unaligned.h b/arch/alpha/include/asm/unaligned.h new file mode 100644 index 00000000..3787c60a --- /dev/null +++ b/arch/alpha/include/asm/unaligned.h @@ -0,0 +1,11 @@ +#ifndef _ASM_ALPHA_UNALIGNED_H +#define _ASM_ALPHA_UNALIGNED_H + +#include <linux/unaligned/le_struct.h> +#include <linux/unaligned/be_byteshift.h> +#include <linux/unaligned/generic.h> + +#define get_unaligned __get_unaligned_le +#define put_unaligned __put_unaligned_le + +#endif /* _ASM_ALPHA_UNALIGNED_H */ diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h new file mode 100644 index 00000000..2207fc61 --- /dev/null +++ b/arch/alpha/include/asm/unistd.h @@ -0,0 +1,491 @@ +#ifndef _ALPHA_UNISTD_H +#define _ALPHA_UNISTD_H + +#define __NR_osf_syscall 0 /* not implemented */ +#define __NR_exit 1 +#define __NR_fork 2 +#define __NR_read 3 +#define __NR_write 4 +#define __NR_osf_old_open 5 /* not implemented */ +#define __NR_close 6 +#define __NR_osf_wait4 7 +#define __NR_osf_old_creat 8 /* not implemented */ +#define __NR_link 9 +#define __NR_unlink 10 +#define __NR_osf_execve 11 /* not implemented */ +#define __NR_chdir 12 +#define __NR_fchdir 13 +#define __NR_mknod 14 +#define __NR_chmod 15 +#define __NR_chown 16 +#define __NR_brk 17 +#define __NR_osf_getfsstat 18 /* not implemented */ +#define __NR_lseek 19 +#define __NR_getxpid 20 +#define __NR_osf_mount 21 +#define __NR_umount 22 +#define __NR_setuid 23 +#define __NR_getxuid 24 +#define __NR_exec_with_loader 25 /* not implemented */ +#define __NR_ptrace 26 +#define __NR_osf_nrecvmsg 27 /* not implemented */ +#define __NR_osf_nsendmsg 28 /* not implemented */ +#define __NR_osf_nrecvfrom 29 /* not implemented */ +#define __NR_osf_naccept 30 /* not implemented */ +#define __NR_osf_ngetpeername 31 /* not implemented */ +#define __NR_osf_ngetsockname 32 /* not implemented */ +#define __NR_access 33 +#define __NR_osf_chflags 34 /* not implemented */ +#define __NR_osf_fchflags 35 /* not implemented */ +#define __NR_sync 36 +#define __NR_kill 37 +#define __NR_osf_old_stat 38 /* not implemented */ +#define __NR_setpgid 39 +#define __NR_osf_old_lstat 40 /* not implemented */ +#define __NR_dup 41 +#define __NR_pipe 42 +#define __NR_osf_set_program_attributes 43 +#define __NR_osf_profil 44 /* not implemented */ +#define __NR_open 45 +#define __NR_osf_old_sigaction 46 /* not implemented */ +#define __NR_getxgid 47 +#define __NR_osf_sigprocmask 48 +#define __NR_osf_getlogin 49 /* not implemented */ +#define __NR_osf_setlogin 50 /* not implemented */ +#define __NR_acct 51 +#define __NR_sigpending 52 + +#define __NR_ioctl 54 +#define __NR_osf_reboot 55 /* not implemented */ +#define __NR_osf_revoke 56 /* not implemented */ +#define __NR_symlink 57 +#define __NR_readlink 58 +#define __NR_execve 59 +#define __NR_umask 60 +#define __NR_chroot 61 +#define __NR_osf_old_fstat 62 /* not implemented */ +#define __NR_getpgrp 63 +#define __NR_getpagesize 64 +#define __NR_osf_mremap 65 /* not implemented */ +#define __NR_vfork 66 +#define __NR_stat 67 +#define __NR_lstat 68 +#define __NR_osf_sbrk 69 /* not implemented */ +#define __NR_osf_sstk 70 /* not implemented */ +#define __NR_mmap 71 /* OSF/1 mmap is superset of Linux */ +#define __NR_osf_old_vadvise 72 /* not implemented */ +#define __NR_munmap 73 +#define __NR_mprotect 74 +#define __NR_madvise 75 +#define __NR_vhangup 76 +#define __NR_osf_kmodcall 77 /* not implemented */ +#define __NR_osf_mincore 78 /* not implemented */ +#define __NR_getgroups 79 +#define __NR_setgroups 80 +#define __NR_osf_old_getpgrp 81 /* not implemented */ +#define __NR_setpgrp 82 /* BSD alias for setpgid */ +#define __NR_osf_setitimer 83 +#define __NR_osf_old_wait 84 /* not implemented */ +#define __NR_osf_table 85 /* not implemented */ +#define __NR_osf_getitimer 86 +#define __NR_gethostname 87 +#define __NR_sethostname 88 +#define __NR_getdtablesize 89 +#define __NR_dup2 90 +#define __NR_fstat 91 +#define __NR_fcntl 92 +#define __NR_osf_select 93 +#define __NR_poll 94 +#define __NR_fsync 95 +#define __NR_setpriority 96 +#define __NR_socket 97 +#define __NR_connect 98 +#define __NR_accept 99 +#define __NR_getpriority 100 +#define __NR_send 101 +#define __NR_recv 102 +#define __NR_sigreturn 103 +#define __NR_bind 104 +#define __NR_setsockopt 105 +#define __NR_listen 106 +#define __NR_osf_plock 107 /* not implemented */ +#define __NR_osf_old_sigvec 108 /* not implemented */ +#define __NR_osf_old_sigblock 109 /* not implemented */ +#define __NR_osf_old_sigsetmask 110 /* not implemented */ +#define __NR_sigsuspend 111 +#define __NR_osf_sigstack 112 +#define __NR_recvmsg 113 +#define __NR_sendmsg 114 +#define __NR_osf_old_vtrace 115 /* not implemented */ +#define __NR_osf_gettimeofday 116 +#define __NR_osf_getrusage 117 +#define __NR_getsockopt 118 + +#define __NR_readv 120 +#define __NR_writev 121 +#define __NR_osf_settimeofday 122 +#define __NR_fchown 123 +#define __NR_fchmod 124 +#define __NR_recvfrom 125 +#define __NR_setreuid 126 +#define __NR_setregid 127 +#define __NR_rename 128 +#define __NR_truncate 129 +#define __NR_ftruncate 130 +#define __NR_flock 131 +#define __NR_setgid 132 +#define __NR_sendto 133 +#define __NR_shutdown 134 +#define __NR_socketpair 135 +#define __NR_mkdir 136 +#define __NR_rmdir 137 +#define __NR_osf_utimes 138 +#define __NR_osf_old_sigreturn 139 /* not implemented */ +#define __NR_osf_adjtime 140 /* not implemented */ +#define __NR_getpeername 141 +#define __NR_osf_gethostid 142 /* not implemented */ +#define __NR_osf_sethostid 143 /* not implemented */ +#define __NR_getrlimit 144 +#define __NR_setrlimit 145 +#define __NR_osf_old_killpg 146 /* not implemented */ +#define __NR_setsid 147 +#define __NR_quotactl 148 +#define __NR_osf_oldquota 149 /* not implemented */ +#define __NR_getsockname 150 + +#define __NR_osf_pid_block 153 /* not implemented */ +#define __NR_osf_pid_unblock 154 /* not implemented */ + +#define __NR_sigaction 156 +#define __NR_osf_sigwaitprim 157 /* not implemented */ +#define __NR_osf_nfssvc 158 /* not implemented */ +#define __NR_osf_getdirentries 159 +#define __NR_osf_statfs 160 +#define __NR_osf_fstatfs 161 + +#define __NR_osf_asynch_daemon 163 /* not implemented */ +#define __NR_osf_getfh 164 /* not implemented */ +#define __NR_osf_getdomainname 165 +#define __NR_setdomainname 166 + +#define __NR_osf_exportfs 169 /* not implemented */ + +#define __NR_osf_alt_plock 181 /* not implemented */ + +#define __NR_osf_getmnt 184 /* not implemented */ + +#define __NR_osf_alt_sigpending 187 /* not implemented */ +#define __NR_osf_alt_setsid 188 /* not implemented */ + +#define __NR_osf_swapon 199 +#define __NR_msgctl 200 +#define __NR_msgget 201 +#define __NR_msgrcv 202 +#define __NR_msgsnd 203 +#define __NR_semctl 204 +#define __NR_semget 205 +#define __NR_semop 206 +#define __NR_osf_utsname 207 +#define __NR_lchown 208 +#define __NR_osf_shmat 209 +#define __NR_shmctl 210 +#define __NR_shmdt 211 +#define __NR_shmget 212 +#define __NR_osf_mvalid 213 /* not implemented */ +#define __NR_osf_getaddressconf 214 /* not implemented */ +#define __NR_osf_msleep 215 /* not implemented */ +#define __NR_osf_mwakeup 216 /* not implemented */ +#define __NR_msync 217 +#define __NR_osf_signal 218 /* not implemented */ +#define __NR_osf_utc_gettime 219 /* not implemented */ +#define __NR_osf_utc_adjtime 220 /* not implemented */ + +#define __NR_osf_security 222 /* not implemented */ +#define __NR_osf_kloadcall 223 /* not implemented */ + +#define __NR_getpgid 233 +#define __NR_getsid 234 +#define __NR_sigaltstack 235 +#define __NR_osf_waitid 236 /* not implemented */ +#define __NR_osf_priocntlset 237 /* not implemented */ +#define __NR_osf_sigsendset 238 /* not implemented */ +#define __NR_osf_set_speculative 239 /* not implemented */ +#define __NR_osf_msfs_syscall 240 /* not implemented */ +#define __NR_osf_sysinfo 241 +#define __NR_osf_uadmin 242 /* not implemented */ +#define __NR_osf_fuser 243 /* not implemented */ +#define __NR_osf_proplist_syscall 244 +#define __NR_osf_ntp_adjtime 245 /* not implemented */ +#define __NR_osf_ntp_gettime 246 /* not implemented */ +#define __NR_osf_pathconf 247 /* not implemented */ +#define __NR_osf_fpathconf 248 /* not implemented */ + +#define __NR_osf_uswitch 250 /* not implemented */ +#define __NR_osf_usleep_thread 251 +#define __NR_osf_audcntl 252 /* not implemented */ +#define __NR_osf_audgen 253 /* not implemented */ +#define __NR_sysfs 254 +#define __NR_osf_subsys_info 255 /* not implemented */ +#define __NR_osf_getsysinfo 256 +#define __NR_osf_setsysinfo 257 +#define __NR_osf_afs_syscall 258 /* not implemented */ +#define __NR_osf_swapctl 259 /* not implemented */ +#define __NR_osf_memcntl 260 /* not implemented */ +#define __NR_osf_fdatasync 261 /* not implemented */ + +/* + * Ignore legacy syscalls that we don't use. + */ +#define __IGNORE_alarm +#define __IGNORE_creat +#define __IGNORE_getegid +#define __IGNORE_geteuid +#define __IGNORE_getgid +#define __IGNORE_getpid +#define __IGNORE_getppid +#define __IGNORE_getuid +#define __IGNORE_pause +#define __IGNORE_time +#define __IGNORE_utime +#define __IGNORE_umount2 + +/* + * Linux-specific system calls begin at 300 + */ +#define __NR_bdflush 300 +#define __NR_sethae 301 +#define __NR_mount 302 +#define __NR_old_adjtimex 303 +#define __NR_swapoff 304 +#define __NR_getdents 305 +#define __NR_create_module 306 +#define __NR_init_module 307 +#define __NR_delete_module 308 +#define __NR_get_kernel_syms 309 +#define __NR_syslog 310 +#define __NR_reboot 311 +#define __NR_clone 312 +#define __NR_uselib 313 +#define __NR_mlock 314 +#define __NR_munlock 315 +#define __NR_mlockall 316 +#define __NR_munlockall 317 +#define __NR_sysinfo 318 +#define __NR__sysctl 319 +/* 320 was sys_idle. */ +#define __NR_oldumount 321 +#define __NR_swapon 322 +#define __NR_times 323 +#define __NR_personality 324 +#define __NR_setfsuid 325 +#define __NR_setfsgid 326 +#define __NR_ustat 327 +#define __NR_statfs 328 +#define __NR_fstatfs 329 +#define __NR_sched_setparam 330 +#define __NR_sched_getparam 331 +#define __NR_sched_setscheduler 332 +#define __NR_sched_getscheduler 333 +#define __NR_sched_yield 334 +#define __NR_sched_get_priority_max 335 +#define __NR_sched_get_priority_min 336 +#define __NR_sched_rr_get_interval 337 +#define __NR_afs_syscall 338 +#define __NR_uname 339 +#define __NR_nanosleep 340 +#define __NR_mremap 341 +#define __NR_nfsservctl 342 +#define __NR_setresuid 343 +#define __NR_getresuid 344 +#define __NR_pciconfig_read 345 +#define __NR_pciconfig_write 346 +#define __NR_query_module 347 +#define __NR_prctl 348 +#define __NR_pread64 349 +#define __NR_pwrite64 350 +#define __NR_rt_sigreturn 351 +#define __NR_rt_sigaction 352 +#define __NR_rt_sigprocmask 353 +#define __NR_rt_sigpending 354 +#define __NR_rt_sigtimedwait 355 +#define __NR_rt_sigqueueinfo 356 +#define __NR_rt_sigsuspend 357 +#define __NR_select 358 +#define __NR_gettimeofday 359 +#define __NR_settimeofday 360 +#define __NR_getitimer 361 +#define __NR_setitimer 362 +#define __NR_utimes 363 +#define __NR_getrusage 364 +#define __NR_wait4 365 +#define __NR_adjtimex 366 +#define __NR_getcwd 367 +#define __NR_capget 368 +#define __NR_capset 369 +#define __NR_sendfile 370 +#define __NR_setresgid 371 +#define __NR_getresgid 372 +#define __NR_dipc 373 +#define __NR_pivot_root 374 +#define __NR_mincore 375 +#define __NR_pciconfig_iobase 376 +#define __NR_getdents64 377 +#define __NR_gettid 378 +#define __NR_readahead 379 +/* 380 is unused */ +#define __NR_tkill 381 +#define __NR_setxattr 382 +#define __NR_lsetxattr 383 +#define __NR_fsetxattr 384 +#define __NR_getxattr 385 +#define __NR_lgetxattr 386 +#define __NR_fgetxattr 387 +#define __NR_listxattr 388 +#define __NR_llistxattr 389 +#define __NR_flistxattr 390 +#define __NR_removexattr 391 +#define __NR_lremovexattr 392 +#define __NR_fremovexattr 393 +#define __NR_futex 394 +#define __NR_sched_setaffinity 395 +#define __NR_sched_getaffinity 396 +#define __NR_tuxcall 397 +#define __NR_io_setup 398 +#define __NR_io_destroy 399 +#define __NR_io_getevents 400 +#define __NR_io_submit 401 +#define __NR_io_cancel 402 +#define __NR_exit_group 405 +#define __NR_lookup_dcookie 406 +#define __NR_epoll_create 407 +#define __NR_epoll_ctl 408 +#define __NR_epoll_wait 409 +/* Feb 2007: These three sys_epoll defines shouldn't be here but culling + * them would break userspace apps ... we'll kill them off in 2010 :) */ +#define __NR_sys_epoll_create __NR_epoll_create +#define __NR_sys_epoll_ctl __NR_epoll_ctl +#define __NR_sys_epoll_wait __NR_epoll_wait +#define __NR_remap_file_pages 410 +#define __NR_set_tid_address 411 +#define __NR_restart_syscall 412 +#define __NR_fadvise64 413 +#define __NR_timer_create 414 +#define __NR_timer_settime 415 +#define __NR_timer_gettime 416 +#define __NR_timer_getoverrun 417 +#define __NR_timer_delete 418 +#define __NR_clock_settime 419 +#define __NR_clock_gettime 420 +#define __NR_clock_getres 421 +#define __NR_clock_nanosleep 422 +#define __NR_semtimedop 423 +#define __NR_tgkill 424 +#define __NR_stat64 425 +#define __NR_lstat64 426 +#define __NR_fstat64 427 +#define __NR_vserver 428 +#define __NR_mbind 429 +#define __NR_get_mempolicy 430 +#define __NR_set_mempolicy 431 +#define __NR_mq_open 432 +#define __NR_mq_unlink 433 +#define __NR_mq_timedsend 434 +#define __NR_mq_timedreceive 435 +#define __NR_mq_notify 436 +#define __NR_mq_getsetattr 437 +#define __NR_waitid 438 +#define __NR_add_key 439 +#define __NR_request_key 440 +#define __NR_keyctl 441 +#define __NR_ioprio_set 442 +#define __NR_ioprio_get 443 +#define __NR_inotify_init 444 +#define __NR_inotify_add_watch 445 +#define __NR_inotify_rm_watch 446 +#define __NR_fdatasync 447 +#define __NR_kexec_load 448 +#define __NR_migrate_pages 449 +#define __NR_openat 450 +#define __NR_mkdirat 451 +#define __NR_mknodat 452 +#define __NR_fchownat 453 +#define __NR_futimesat 454 +#define __NR_fstatat64 455 +#define __NR_unlinkat 456 +#define __NR_renameat 457 +#define __NR_linkat 458 +#define __NR_symlinkat 459 +#define __NR_readlinkat 460 +#define __NR_fchmodat 461 +#define __NR_faccessat 462 +#define __NR_pselect6 463 +#define __NR_ppoll 464 +#define __NR_unshare 465 +#define __NR_set_robust_list 466 +#define __NR_get_robust_list 467 +#define __NR_splice 468 +#define __NR_sync_file_range 469 +#define __NR_tee 470 +#define __NR_vmsplice 471 +#define __NR_move_pages 472 +#define __NR_getcpu 473 +#define __NR_epoll_pwait 474 +#define __NR_utimensat 475 +#define __NR_signalfd 476 +#define __NR_timerfd 477 +#define __NR_eventfd 478 +#define __NR_recvmmsg 479 +#define __NR_fallocate 480 +#define __NR_timerfd_create 481 +#define __NR_timerfd_settime 482 +#define __NR_timerfd_gettime 483 +#define __NR_signalfd4 484 +#define __NR_eventfd2 485 +#define __NR_epoll_create1 486 +#define __NR_dup3 487 +#define __NR_pipe2 488 +#define __NR_inotify_init1 489 +#define __NR_preadv 490 +#define __NR_pwritev 491 +#define __NR_rt_tgsigqueueinfo 492 +#define __NR_perf_event_open 493 +#define __NR_fanotify_init 494 +#define __NR_fanotify_mark 495 +#define __NR_prlimit64 496 +#define __NR_name_to_handle_at 497 +#define __NR_open_by_handle_at 498 +#define __NR_clock_adjtime 499 +#define __NR_syncfs 500 +#define __NR_setns 501 +#define __NR_accept4 502 +#define __NR_sendmmsg 503 + +#ifdef __KERNEL__ + +#define NR_SYSCALLS 504 + +#define __ARCH_WANT_IPC_PARSE_VERSION +#define __ARCH_WANT_OLD_READDIR +#define __ARCH_WANT_STAT64 +#define __ARCH_WANT_SYS_GETHOSTNAME +#define __ARCH_WANT_SYS_FADVISE64 +#define __ARCH_WANT_SYS_GETPGRP +#define __ARCH_WANT_SYS_OLD_GETRLIMIT +#define __ARCH_WANT_SYS_OLDUMOUNT +#define __ARCH_WANT_SYS_SIGPENDING +#define __ARCH_WANT_SYS_RT_SIGSUSPEND + +/* "Conditional" syscalls. What we want is + + __attribute__((weak,alias("sys_ni_syscall"))) + + but that raises the problem of what type to give the symbol. If we use + a prototype, it'll conflict with the definition given in this file and + others. If we use __typeof, we discover that not all symbols actually + have declarations. If we use no prototype, then we get warnings from + -Wstrict-prototypes. Ho hum. */ + +#define cond_syscall(x) asm(".weak\t" #x "\n" #x " = sys_ni_syscall") + +#endif /* __KERNEL__ */ +#endif /* _ALPHA_UNISTD_H */ diff --git a/arch/alpha/include/asm/user.h b/arch/alpha/include/asm/user.h new file mode 100644 index 00000000..a4eb6a4c --- /dev/null +++ b/arch/alpha/include/asm/user.h @@ -0,0 +1,53 @@ +#ifndef _ALPHA_USER_H +#define _ALPHA_USER_H + +#include <linux/sched.h> +#include <linux/ptrace.h> + +#include <asm/page.h> +#include <asm/reg.h> + +/* + * Core file format: The core file is written in such a way that gdb + * can understand it and provide useful information to the user (under + * linux we use the `trad-core' bfd, NOT the osf-core). The file contents + * are as follows: + * + * upage: 1 page consisting of a user struct that tells gdb + * what is present in the file. Directly after this is a + * copy of the task_struct, which is currently not used by gdb, + * but it may come in handy at some point. All of the registers + * are stored as part of the upage. The upage should always be + * only one page long. + * data: The data segment follows next. We use current->end_text to + * current->brk to pick up all of the user variables, plus any memory + * that may have been sbrk'ed. No attempt is made to determine if a + * page is demand-zero or if a page is totally unused, we just cover + * the entire range. All of the addresses are rounded in such a way + * that an integral number of pages is written. + * stack: We need the stack information in order to get a meaningful + * backtrace. We need to write the data from usp to + * current->start_stack, so we round each of these in order to be able + * to write an integer number of pages. + */ +struct user { + unsigned long regs[EF_SIZE/8+32]; /* integer and fp regs */ + size_t u_tsize; /* text size (pages) */ + size_t u_dsize; /* data size (pages) */ + size_t u_ssize; /* stack size (pages) */ + unsigned long start_code; /* text starting address */ + unsigned long start_data; /* data starting address */ + unsigned long start_stack; /* stack starting address */ + long int signal; /* signal causing core dump */ + unsigned long u_ar0; /* help gdb find registers */ + unsigned long magic; /* identifies a core file */ + char u_comm[32]; /* user command name */ +}; + +#define NBPG PAGE_SIZE +#define UPAGES 1 +#define HOST_TEXT_START_ADDR (u.start_code) +#define HOST_DATA_START_ADDR (u.start_data) +#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG) + +#endif /* _ALPHA_USER_H */ diff --git a/arch/alpha/include/asm/vga.h b/arch/alpha/include/asm/vga.h new file mode 100644 index 00000000..c00106ba --- /dev/null +++ b/arch/alpha/include/asm/vga.h @@ -0,0 +1,82 @@ +/* + * Access to VGA videoram + * + * (c) 1998 Martin Mares <mj@ucw.cz> + */ + +#ifndef _LINUX_ASM_VGA_H_ +#define _LINUX_ASM_VGA_H_ + +#include <asm/io.h> + +#define VT_BUF_HAVE_RW +#define VT_BUF_HAVE_MEMSETW +#define VT_BUF_HAVE_MEMCPYW + +static inline void scr_writew(u16 val, volatile u16 *addr) +{ + if (__is_ioaddr(addr)) + __raw_writew(val, (volatile u16 __iomem *) addr); + else + *addr = val; +} + +static inline u16 scr_readw(volatile const u16 *addr) +{ + if (__is_ioaddr(addr)) + return __raw_readw((volatile const u16 __iomem *) addr); + else + return *addr; +} + +static inline void scr_memsetw(u16 *s, u16 c, unsigned int count) +{ + if (__is_ioaddr(s)) + memsetw_io((u16 __iomem *) s, c, count); + else + memsetw(s, c, count); +} + +/* Do not trust that the usage will be correct; analyze the arguments. */ +extern void scr_memcpyw(u16 *d, const u16 *s, unsigned int count); + +/* ??? These are currently only used for downloading character sets. As + such, they don't need memory barriers. Is this all they are intended + to be used for? */ +#define vga_readb(a) readb((u8 __iomem *)(a)) +#define vga_writeb(v,a) writeb(v, (u8 __iomem *)(a)) + +#ifdef CONFIG_VGA_HOSE +#include <linux/ioport.h> +#include <linux/pci.h> + +extern struct pci_controller *pci_vga_hose; + +# define __is_port_vga(a) \ + (((a) >= 0x3b0) && ((a) < 0x3e0) && \ + ((a) != 0x3b3) && ((a) != 0x3d3)) + +# define __is_mem_vga(a) \ + (((a) >= 0xa0000) && ((a) <= 0xc0000)) + +# define FIXUP_IOADDR_VGA(a) do { \ + if (pci_vga_hose && __is_port_vga(a)) \ + (a) += pci_vga_hose->io_space->start; \ + } while(0) + +# define FIXUP_MEMADDR_VGA(a) do { \ + if (pci_vga_hose && __is_mem_vga(a)) \ + (a) += pci_vga_hose->mem_space->start; \ + } while(0) + +#else /* CONFIG_VGA_HOSE */ +# define pci_vga_hose 0 +# define __is_port_vga(a) 0 +# define __is_mem_vga(a) 0 +# define FIXUP_IOADDR_VGA(a) +# define FIXUP_MEMADDR_VGA(a) +#endif /* CONFIG_VGA_HOSE */ + +#define VGA_MAP_MEM(x,s) ((unsigned long) ioremap(x, s)) + +#endif diff --git a/arch/alpha/include/asm/wrperfmon.h b/arch/alpha/include/asm/wrperfmon.h new file mode 100644 index 00000000..319bf678 --- /dev/null +++ b/arch/alpha/include/asm/wrperfmon.h @@ -0,0 +1,93 @@ +/* + * Definitions for use with the Alpha wrperfmon PAL call. + */ + +#ifndef __ALPHA_WRPERFMON_H +#define __ALPHA_WRPERFMON_H + +/* Following commands are implemented on all CPUs */ +#define PERFMON_CMD_DISABLE 0 +#define PERFMON_CMD_ENABLE 1 +#define PERFMON_CMD_DESIRED_EVENTS 2 +#define PERFMON_CMD_LOGGING_OPTIONS 3 +/* Following commands on EV5/EV56/PCA56 only */ +#define PERFMON_CMD_INT_FREQ 4 +#define PERFMON_CMD_ENABLE_CLEAR 7 +/* Following commands are on EV5 and better CPUs */ +#define PERFMON_CMD_READ 5 +#define PERFMON_CMD_WRITE 6 +/* Following command are on EV6 and better CPUs */ +#define PERFMON_CMD_ENABLE_WRITE 7 +/* Following command are on EV67 and better CPUs */ +#define PERFMON_CMD_I_STAT 8 +#define PERFMON_CMD_PMPC 9 + + +/* EV5/EV56/PCA56 Counters */ +#define EV5_PCTR_0 (1UL<<0) +#define EV5_PCTR_1 (1UL<<1) +#define EV5_PCTR_2 (1UL<<2) + +#define EV5_PCTR_0_COUNT_SHIFT 48 +#define EV5_PCTR_1_COUNT_SHIFT 32 +#define EV5_PCTR_2_COUNT_SHIFT 16 + +#define EV5_PCTR_0_COUNT_MASK 0xffffUL +#define EV5_PCTR_1_COUNT_MASK 0xffffUL +#define EV5_PCTR_2_COUNT_MASK 0x3fffUL + +/* EV6 Counters */ +#define EV6_PCTR_0 (1UL<<0) +#define EV6_PCTR_1 (1UL<<1) + +#define EV6_PCTR_0_COUNT_SHIFT 28 +#define EV6_PCTR_1_COUNT_SHIFT 6 + +#define EV6_PCTR_0_COUNT_MASK 0xfffffUL +#define EV6_PCTR_1_COUNT_MASK 0xfffffUL + +/* EV67 (and subsequent) counters */ +#define EV67_PCTR_0 (1UL<<0) +#define EV67_PCTR_1 (1UL<<1) + +#define EV67_PCTR_0_COUNT_SHIFT 28 +#define EV67_PCTR_1_COUNT_SHIFT 6 + +#define EV67_PCTR_0_COUNT_MASK 0xfffffUL +#define EV67_PCTR_1_COUNT_MASK 0xfffffUL + + +/* + * The Alpha Architecure Handbook, vers. 4 (1998) appears to have a misprint + * in Table E-23 regarding the bits that set the event PCTR 1 counts. + * Hopefully what we have here is correct. + */ +#define EV6_PCTR_0_EVENT_MASK 0x10UL +#define EV6_PCTR_1_EVENT_MASK 0x0fUL + +/* EV6 Events */ +#define EV6_PCTR_0_CYCLES (0UL << 4) +#define EV6_PCTR_0_INSTRUCTIONS (1UL << 4) + +#define EV6_PCTR_1_CYCLES 0 +#define EV6_PCTR_1_BRANCHES 1 +#define EV6_PCTR_1_BRANCH_MISPREDICTS 2 +#define EV6_PCTR_1_DTB_SINGLE_MISSES 3 +#define EV6_PCTR_1_DTB_DOUBLE_MISSES 4 +#define EV6_PCTR_1_ITB_MISSES 5 +#define EV6_PCTR_1_UNALIGNED_TRAPS 6 +#define EV6_PCTR_1_REPLY_TRAPS 7 + +/* From the Alpha Architecture Reference Manual, 4th edn., 2002 */ +#define EV67_PCTR_MODE_MASK 0x10UL +#define EV67_PCTR_EVENT_MASK 0x0CUL + +#define EV67_PCTR_MODE_PROFILEME (1UL<<4) +#define EV67_PCTR_MODE_AGGREGATE (0UL<<4) + +#define EV67_PCTR_INSTR_CYCLES (0UL<<2) +#define EV67_PCTR_CYCLES_UNDEF (1UL<<2) +#define EV67_PCTR_INSTR_BCACHEMISS (2UL<<2) +#define EV67_PCTR_CYCLES_MBOX (3UL<<2) + +#endif diff --git a/arch/alpha/include/asm/xchg.h b/arch/alpha/include/asm/xchg.h new file mode 100644 index 00000000..0ca97245 --- /dev/null +++ b/arch/alpha/include/asm/xchg.h @@ -0,0 +1,258 @@ +#ifndef _ALPHA_CMPXCHG_H +#error Do not include xchg.h directly! +#else +/* + * xchg/xchg_local and cmpxchg/cmpxchg_local share the same code + * except that local version do not have the expensive memory barrier. + * So this file is included twice from asm/cmpxchg.h. + */ + +/* + * Atomic exchange. + * Since it can be used to implement critical sections + * it must clobber "memory" (also for interrupts in UP). + */ + +static inline unsigned long +____xchg(_u8, volatile char *m, unsigned long val) +{ + unsigned long ret, tmp, addr64; + + __asm__ __volatile__( + " andnot %4,7,%3\n" + " insbl %1,%4,%1\n" + "1: ldq_l %2,0(%3)\n" + " extbl %2,%4,%0\n" + " mskbl %2,%4,%2\n" + " or %1,%2,%2\n" + " stq_c %2,0(%3)\n" + " beq %2,2f\n" + __ASM__MB + ".subsection 2\n" + "2: br 1b\n" + ".previous" + : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64) + : "r" ((long)m), "1" (val) : "memory"); + + return ret; +} + +static inline unsigned long +____xchg(_u16, volatile short *m, unsigned long val) +{ + unsigned long ret, tmp, addr64; + + __asm__ __volatile__( + " andnot %4,7,%3\n" + " inswl %1,%4,%1\n" + "1: ldq_l %2,0(%3)\n" + " extwl %2,%4,%0\n" + " mskwl %2,%4,%2\n" + " or %1,%2,%2\n" + " stq_c %2,0(%3)\n" + " beq %2,2f\n" + __ASM__MB + ".subsection 2\n" + "2: br 1b\n" + ".previous" + : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64) + : "r" ((long)m), "1" (val) : "memory"); + + return ret; +} + +static inline unsigned long +____xchg(_u32, volatile int *m, unsigned long val) +{ + unsigned long dummy; + + __asm__ __volatile__( + "1: ldl_l %0,%4\n" + " bis $31,%3,%1\n" + " stl_c %1,%2\n" + " beq %1,2f\n" + __ASM__MB + ".subsection 2\n" + "2: br 1b\n" + ".previous" + : "=&r" (val), "=&r" (dummy), "=m" (*m) + : "rI" (val), "m" (*m) : "memory"); + + return val; +} + +static inline unsigned long +____xchg(_u64, volatile long *m, unsigned long val) +{ + unsigned long dummy; + + __asm__ __volatile__( + "1: ldq_l %0,%4\n" + " bis $31,%3,%1\n" + " stq_c %1,%2\n" + " beq %1,2f\n" + __ASM__MB + ".subsection 2\n" + "2: br 1b\n" + ".previous" + : "=&r" (val), "=&r" (dummy), "=m" (*m) + : "rI" (val), "m" (*m) : "memory"); + + return val; +} + +/* This function doesn't exist, so you'll get a linker error + if something tries to do an invalid xchg(). */ +extern void __xchg_called_with_bad_pointer(void); + +static __always_inline unsigned long +____xchg(, volatile void *ptr, unsigned long x, int size) +{ + switch (size) { + case 1: + return ____xchg(_u8, ptr, x); + case 2: + return ____xchg(_u16, ptr, x); + case 4: + return ____xchg(_u32, ptr, x); + case 8: + return ____xchg(_u64, ptr, x); + } + __xchg_called_with_bad_pointer(); + return x; +} + +/* + * Atomic compare and exchange. Compare OLD with MEM, if identical, + * store NEW in MEM. Return the initial value in MEM. Success is + * indicated by comparing RETURN with OLD. + * + * The memory barrier should be placed in SMP only when we actually + * make the change. If we don't change anything (so if the returned + * prev is equal to old) then we aren't acquiring anything new and + * we don't need any memory barrier as far I can tell. + */ + +static inline unsigned long +____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new) +{ + unsigned long prev, tmp, cmp, addr64; + + __asm__ __volatile__( + " andnot %5,7,%4\n" + " insbl %1,%5,%1\n" + "1: ldq_l %2,0(%4)\n" + " extbl %2,%5,%0\n" + " cmpeq %0,%6,%3\n" + " beq %3,2f\n" + " mskbl %2,%5,%2\n" + " or %1,%2,%2\n" + " stq_c %2,0(%4)\n" + " beq %2,3f\n" + __ASM__MB + "2:\n" + ".subsection 2\n" + "3: br 1b\n" + ".previous" + : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64) + : "r" ((long)m), "Ir" (old), "1" (new) : "memory"); + + return prev; +} + +static inline unsigned long +____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new) +{ + unsigned long prev, tmp, cmp, addr64; + + __asm__ __volatile__( + " andnot %5,7,%4\n" + " inswl %1,%5,%1\n" + "1: ldq_l %2,0(%4)\n" + " extwl %2,%5,%0\n" + " cmpeq %0,%6,%3\n" + " beq %3,2f\n" + " mskwl %2,%5,%2\n" + " or %1,%2,%2\n" + " stq_c %2,0(%4)\n" + " beq %2,3f\n" + __ASM__MB + "2:\n" + ".subsection 2\n" + "3: br 1b\n" + ".previous" + : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64) + : "r" ((long)m), "Ir" (old), "1" (new) : "memory"); + + return prev; +} + +static inline unsigned long +____cmpxchg(_u32, volatile int *m, int old, int new) +{ + unsigned long prev, cmp; + + __asm__ __volatile__( + "1: ldl_l %0,%5\n" + " cmpeq %0,%3,%1\n" + " beq %1,2f\n" + " mov %4,%1\n" + " stl_c %1,%2\n" + " beq %1,3f\n" + __ASM__MB + "2:\n" + ".subsection 2\n" + "3: br 1b\n" + ".previous" + : "=&r"(prev), "=&r"(cmp), "=m"(*m) + : "r"((long) old), "r"(new), "m"(*m) : "memory"); + + return prev; +} + +static inline unsigned long +____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new) +{ + unsigned long prev, cmp; + + __asm__ __volatile__( + "1: ldq_l %0,%5\n" + " cmpeq %0,%3,%1\n" + " beq %1,2f\n" + " mov %4,%1\n" + " stq_c %1,%2\n" + " beq %1,3f\n" + __ASM__MB + "2:\n" + ".subsection 2\n" + "3: br 1b\n" + ".previous" + : "=&r"(prev), "=&r"(cmp), "=m"(*m) + : "r"((long) old), "r"(new), "m"(*m) : "memory"); + + return prev; +} + +/* This function doesn't exist, so you'll get a linker error + if something tries to do an invalid cmpxchg(). */ +extern void __cmpxchg_called_with_bad_pointer(void); + +static __always_inline unsigned long +____cmpxchg(, volatile void *ptr, unsigned long old, unsigned long new, + int size) +{ + switch (size) { + case 1: + return ____cmpxchg(_u8, ptr, old, new); + case 2: + return ____cmpxchg(_u16, ptr, old, new); + case 4: + return ____cmpxchg(_u32, ptr, old, new); + case 8: + return ____cmpxchg(_u64, ptr, old, new); + } + __cmpxchg_called_with_bad_pointer(); + return old; +} + +#endif diff --git a/arch/alpha/include/asm/xor.h b/arch/alpha/include/asm/xor.h new file mode 100644 index 00000000..5ee1c2bc --- /dev/null +++ b/arch/alpha/include/asm/xor.h @@ -0,0 +1,855 @@ +/* + * include/asm-alpha/xor.h + * + * Optimized RAID-5 checksumming functions for alpha EV5 and EV6 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * You should have received a copy of the GNU General Public License + * (for example /usr/src/linux/COPYING); if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +extern void xor_alpha_2(unsigned long, unsigned long *, unsigned long *); +extern void xor_alpha_3(unsigned long, unsigned long *, unsigned long *, + unsigned long *); +extern void xor_alpha_4(unsigned long, unsigned long *, unsigned long *, + unsigned long *, unsigned long *); +extern void xor_alpha_5(unsigned long, unsigned long *, unsigned long *, + unsigned long *, unsigned long *, unsigned long *); + +extern void xor_alpha_prefetch_2(unsigned long, unsigned long *, + unsigned long *); +extern void xor_alpha_prefetch_3(unsigned long, unsigned long *, + unsigned long *, unsigned long *); +extern void xor_alpha_prefetch_4(unsigned long, unsigned long *, + unsigned long *, unsigned long *, + unsigned long *); +extern void xor_alpha_prefetch_5(unsigned long, unsigned long *, + unsigned long *, unsigned long *, + unsigned long *, unsigned long *); + +asm(" \n\ + .text \n\ + .align 3 \n\ + .ent xor_alpha_2 \n\ +xor_alpha_2: \n\ + .prologue 0 \n\ + srl $16, 6, $16 \n\ + .align 4 \n\ +2: \n\ + ldq $0,0($17) \n\ + ldq $1,0($18) \n\ + ldq $2,8($17) \n\ + ldq $3,8($18) \n\ + \n\ + ldq $4,16($17) \n\ + ldq $5,16($18) \n\ + ldq $6,24($17) \n\ + ldq $7,24($18) \n\ + \n\ + ldq $19,32($17) \n\ + ldq $20,32($18) \n\ + ldq $21,40($17) \n\ + ldq $22,40($18) \n\ + \n\ + ldq $23,48($17) \n\ + ldq $24,48($18) \n\ + ldq $25,56($17) \n\ + xor $0,$1,$0 # 7 cycles from $1 load \n\ + \n\ + ldq $27,56($18) \n\ + xor $2,$3,$2 \n\ + stq $0,0($17) \n\ + xor $4,$5,$4 \n\ + \n\ + stq $2,8($17) \n\ + xor $6,$7,$6 \n\ + stq $4,16($17) \n\ + xor $19,$20,$19 \n\ + \n\ + stq $6,24($17) \n\ + xor $21,$22,$21 \n\ + stq $19,32($17) \n\ + xor $23,$24,$23 \n\ + \n\ + stq $21,40($17) \n\ + xor $25,$27,$25 \n\ + stq $23,48($17) \n\ + subq $16,1,$16 \n\ + \n\ + stq $25,56($17) \n\ + addq $17,64,$17 \n\ + addq $18,64,$18 \n\ + bgt $16,2b \n\ + \n\ + ret \n\ + .end xor_alpha_2 \n\ + \n\ + .align 3 \n\ + .ent xor_alpha_3 \n\ +xor_alpha_3: \n\ + .prologue 0 \n\ + srl $16, 6, $16 \n\ + .align 4 \n\ +3: \n\ + ldq $0,0($17) \n\ + ldq $1,0($18) \n\ + ldq $2,0($19) \n\ + ldq $3,8($17) \n\ + \n\ + ldq $4,8($18) \n\ + ldq $6,16($17) \n\ + ldq $7,16($18) \n\ + ldq $21,24($17) \n\ + \n\ + ldq $22,24($18) \n\ + ldq $24,32($17) \n\ + ldq $25,32($18) \n\ + ldq $5,8($19) \n\ + \n\ + ldq $20,16($19) \n\ + ldq $23,24($19) \n\ + ldq $27,32($19) \n\ + nop \n\ + \n\ + xor $0,$1,$1 # 8 cycles from $0 load \n\ + xor $3,$4,$4 # 6 cycles from $4 load \n\ + xor $6,$7,$7 # 6 cycles from $7 load \n\ + xor $21,$22,$22 # 5 cycles from $22 load \n\ + \n\ + xor $1,$2,$2 # 9 cycles from $2 load \n\ + xor $24,$25,$25 # 5 cycles from $25 load \n\ + stq $2,0($17) \n\ + xor $4,$5,$5 # 6 cycles from $5 load \n\ + \n\ + stq $5,8($17) \n\ + xor $7,$20,$20 # 7 cycles from $20 load \n\ + stq $20,16($17) \n\ + xor $22,$23,$23 # 7 cycles from $23 load \n\ + \n\ + stq $23,24($17) \n\ + xor $25,$27,$27 # 7 cycles from $27 load \n\ + stq $27,32($17) \n\ + nop \n\ + \n\ + ldq $0,40($17) \n\ + ldq $1,40($18) \n\ + ldq $3,48($17) \n\ + ldq $4,48($18) \n\ + \n\ + ldq $6,56($17) \n\ + ldq $7,56($18) \n\ + ldq $2,40($19) \n\ + ldq $5,48($19) \n\ + \n\ + ldq $20,56($19) \n\ + xor $0,$1,$1 # 4 cycles from $1 load \n\ + xor $3,$4,$4 # 5 cycles from $4 load \n\ + xor $6,$7,$7 # 5 cycles from $7 load \n\ + \n\ + xor $1,$2,$2 # 4 cycles from $2 load \n\ + xor $4,$5,$5 # 5 cycles from $5 load \n\ + stq $2,40($17) \n\ + xor $7,$20,$20 # 4 cycles from $20 load \n\ + \n\ + stq $5,48($17) \n\ + subq $16,1,$16 \n\ + stq $20,56($17) \n\ + addq $19,64,$19 \n\ + \n\ + addq $18,64,$18 \n\ + addq $17,64,$17 \n\ + bgt $16,3b \n\ + ret \n\ + .end xor_alpha_3 \n\ + \n\ + .align 3 \n\ + .ent xor_alpha_4 \n\ +xor_alpha_4: \n\ + .prologue 0 \n\ + srl $16, 6, $16 \n\ + .align 4 \n\ +4: \n\ + ldq $0,0($17) \n\ + ldq $1,0($18) \n\ + ldq $2,0($19) \n\ + ldq $3,0($20) \n\ + \n\ + ldq $4,8($17) \n\ + ldq $5,8($18) \n\ + ldq $6,8($19) \n\ + ldq $7,8($20) \n\ + \n\ + ldq $21,16($17) \n\ + ldq $22,16($18) \n\ + ldq $23,16($19) \n\ + ldq $24,16($20) \n\ + \n\ + ldq $25,24($17) \n\ + xor $0,$1,$1 # 6 cycles from $1 load \n\ + ldq $27,24($18) \n\ + xor $2,$3,$3 # 6 cycles from $3 load \n\ + \n\ + ldq $0,24($19) \n\ + xor $1,$3,$3 \n\ + ldq $1,24($20) \n\ + xor $4,$5,$5 # 7 cycles from $5 load \n\ + \n\ + stq $3,0($17) \n\ + xor $6,$7,$7 \n\ + xor $21,$22,$22 # 7 cycles from $22 load \n\ + xor $5,$7,$7 \n\ + \n\ + stq $7,8($17) \n\ + xor $23,$24,$24 # 7 cycles from $24 load \n\ + ldq $2,32($17) \n\ + xor $22,$24,$24 \n\ + \n\ + ldq $3,32($18) \n\ + ldq $4,32($19) \n\ + ldq $5,32($20) \n\ + xor $25,$27,$27 # 8 cycles from $27 load \n\ + \n\ + ldq $6,40($17) \n\ + ldq $7,40($18) \n\ + ldq $21,40($19) \n\ + ldq $22,40($20) \n\ + \n\ + stq $24,16($17) \n\ + xor $0,$1,$1 # 9 cycles from $1 load \n\ + xor $2,$3,$3 # 5 cycles from $3 load \n\ + xor $27,$1,$1 \n\ + \n\ + stq $1,24($17) \n\ + xor $4,$5,$5 # 5 cycles from $5 load \n\ + ldq $23,48($17) \n\ + ldq $24,48($18) \n\ + \n\ + ldq $25,48($19) \n\ + xor $3,$5,$5 \n\ + ldq $27,48($20) \n\ + ldq $0,56($17) \n\ + \n\ + ldq $1,56($18) \n\ + ldq $2,56($19) \n\ + xor $6,$7,$7 # 8 cycles from $6 load \n\ + ldq $3,56($20) \n\ + \n\ + stq $5,32($17) \n\ + xor $21,$22,$22 # 8 cycles from $22 load \n\ + xor $7,$22,$22 \n\ + xor $23,$24,$24 # 5 cycles from $24 load \n\ + \n\ + stq $22,40($17) \n\ + xor $25,$27,$27 # 5 cycles from $27 load \n\ + xor $24,$27,$27 \n\ + xor $0,$1,$1 # 5 cycles from $1 load \n\ + \n\ + stq $27,48($17) \n\ + xor $2,$3,$3 # 4 cycles from $3 load \n\ + xor $1,$3,$3 \n\ + subq $16,1,$16 \n\ + \n\ + stq $3,56($17) \n\ + addq $20,64,$20 \n\ + addq $19,64,$19 \n\ + addq $18,64,$18 \n\ + \n\ + addq $17,64,$17 \n\ + bgt $16,4b \n\ + ret \n\ + .end xor_alpha_4 \n\ + \n\ + .align 3 \n\ + .ent xor_alpha_5 \n\ +xor_alpha_5: \n\ + .prologue 0 \n\ + srl $16, 6, $16 \n\ + .align 4 \n\ +5: \n\ + ldq $0,0($17) \n\ + ldq $1,0($18) \n\ + ldq $2,0($19) \n\ + ldq $3,0($20) \n\ + \n\ + ldq $4,0($21) \n\ + ldq $5,8($17) \n\ + ldq $6,8($18) \n\ + ldq $7,8($19) \n\ + \n\ + ldq $22,8($20) \n\ + ldq $23,8($21) \n\ + ldq $24,16($17) \n\ + ldq $25,16($18) \n\ + \n\ + ldq $27,16($19) \n\ + xor $0,$1,$1 # 6 cycles from $1 load \n\ + ldq $28,16($20) \n\ + xor $2,$3,$3 # 6 cycles from $3 load \n\ + \n\ + ldq $0,16($21) \n\ + xor $1,$3,$3 \n\ + ldq $1,24($17) \n\ + xor $3,$4,$4 # 7 cycles from $4 load \n\ + \n\ + stq $4,0($17) \n\ + xor $5,$6,$6 # 7 cycles from $6 load \n\ + xor $7,$22,$22 # 7 cycles from $22 load \n\ + xor $6,$23,$23 # 7 cycles from $23 load \n\ + \n\ + ldq $2,24($18) \n\ + xor $22,$23,$23 \n\ + ldq $3,24($19) \n\ + xor $24,$25,$25 # 8 cycles from $25 load \n\ + \n\ + stq $23,8($17) \n\ + xor $25,$27,$27 # 8 cycles from $27 load \n\ + ldq $4,24($20) \n\ + xor $28,$0,$0 # 7 cycles from $0 load \n\ + \n\ + ldq $5,24($21) \n\ + xor $27,$0,$0 \n\ + ldq $6,32($17) \n\ + ldq $7,32($18) \n\ + \n\ + stq $0,16($17) \n\ + xor $1,$2,$2 # 6 cycles from $2 load \n\ + ldq $22,32($19) \n\ + xor $3,$4,$4 # 4 cycles from $4 load \n\ + \n\ + ldq $23,32($20) \n\ + xor $2,$4,$4 \n\ + ldq $24,32($21) \n\ + ldq $25,40($17) \n\ + \n\ + ldq $27,40($18) \n\ + ldq $28,40($19) \n\ + ldq $0,40($20) \n\ + xor $4,$5,$5 # 7 cycles from $5 load \n\ + \n\ + stq $5,24($17) \n\ + xor $6,$7,$7 # 7 cycles from $7 load \n\ + ldq $1,40($21) \n\ + ldq $2,48($17) \n\ + \n\ + ldq $3,48($18) \n\ + xor $7,$22,$22 # 7 cycles from $22 load \n\ + ldq $4,48($19) \n\ + xor $23,$24,$24 # 6 cycles from $24 load \n\ + \n\ + ldq $5,48($20) \n\ + xor $22,$24,$24 \n\ + ldq $6,48($21) \n\ + xor $25,$27,$27 # 7 cycles from $27 load \n\ + \n\ + stq $24,32($17) \n\ + xor $27,$28,$28 # 8 cycles from $28 load \n\ + ldq $7,56($17) \n\ + xor $0,$1,$1 # 6 cycles from $1 load \n\ + \n\ + ldq $22,56($18) \n\ + ldq $23,56($19) \n\ + ldq $24,56($20) \n\ + ldq $25,56($21) \n\ + \n\ + xor $28,$1,$1 \n\ + xor $2,$3,$3 # 9 cycles from $3 load \n\ + xor $3,$4,$4 # 9 cycles from $4 load \n\ + xor $5,$6,$6 # 8 cycles from $6 load \n\ + \n\ + stq $1,40($17) \n\ + xor $4,$6,$6 \n\ + xor $7,$22,$22 # 7 cycles from $22 load \n\ + xor $23,$24,$24 # 6 cycles from $24 load \n\ + \n\ + stq $6,48($17) \n\ + xor $22,$24,$24 \n\ + subq $16,1,$16 \n\ + xor $24,$25,$25 # 8 cycles from $25 load \n\ + \n\ + stq $25,56($17) \n\ + addq $21,64,$21 \n\ + addq $20,64,$20 \n\ + addq $19,64,$19 \n\ + \n\ + addq $18,64,$18 \n\ + addq $17,64,$17 \n\ + bgt $16,5b \n\ + ret \n\ + .end xor_alpha_5 \n\ + \n\ + .align 3 \n\ + .ent xor_alpha_prefetch_2 \n\ +xor_alpha_prefetch_2: \n\ + .prologue 0 \n\ + srl $16, 6, $16 \n\ + \n\ + ldq $31, 0($17) \n\ + ldq $31, 0($18) \n\ + \n\ + ldq $31, 64($17) \n\ + ldq $31, 64($18) \n\ + \n\ + ldq $31, 128($17) \n\ + ldq $31, 128($18) \n\ + \n\ + ldq $31, 192($17) \n\ + ldq $31, 192($18) \n\ + .align 4 \n\ +2: \n\ + ldq $0,0($17) \n\ + ldq $1,0($18) \n\ + ldq $2,8($17) \n\ + ldq $3,8($18) \n\ + \n\ + ldq $4,16($17) \n\ + ldq $5,16($18) \n\ + ldq $6,24($17) \n\ + ldq $7,24($18) \n\ + \n\ + ldq $19,32($17) \n\ + ldq $20,32($18) \n\ + ldq $21,40($17) \n\ + ldq $22,40($18) \n\ + \n\ + ldq $23,48($17) \n\ + ldq $24,48($18) \n\ + ldq $25,56($17) \n\ + ldq $27,56($18) \n\ + \n\ + ldq $31,256($17) \n\ + xor $0,$1,$0 # 8 cycles from $1 load \n\ + ldq $31,256($18) \n\ + xor $2,$3,$2 \n\ + \n\ + stq $0,0($17) \n\ + xor $4,$5,$4 \n\ + stq $2,8($17) \n\ + xor $6,$7,$6 \n\ + \n\ + stq $4,16($17) \n\ + xor $19,$20,$19 \n\ + stq $6,24($17) \n\ + xor $21,$22,$21 \n\ + \n\ + stq $19,32($17) \n\ + xor $23,$24,$23 \n\ + stq $21,40($17) \n\ + xor $25,$27,$25 \n\ + \n\ + stq $23,48($17) \n\ + subq $16,1,$16 \n\ + stq $25,56($17) \n\ + addq $17,64,$17 \n\ + \n\ + addq $18,64,$18 \n\ + bgt $16,2b \n\ + ret \n\ + .end xor_alpha_prefetch_2 \n\ + \n\ + .align 3 \n\ + .ent xor_alpha_prefetch_3 \n\ +xor_alpha_prefetch_3: \n\ + .prologue 0 \n\ + srl $16, 6, $16 \n\ + \n\ + ldq $31, 0($17) \n\ + ldq $31, 0($18) \n\ + ldq $31, 0($19) \n\ + \n\ + ldq $31, 64($17) \n\ + ldq $31, 64($18) \n\ + ldq $31, 64($19) \n\ + \n\ + ldq $31, 128($17) \n\ + ldq $31, 128($18) \n\ + ldq $31, 128($19) \n\ + \n\ + ldq $31, 192($17) \n\ + ldq $31, 192($18) \n\ + ldq $31, 192($19) \n\ + .align 4 \n\ +3: \n\ + ldq $0,0($17) \n\ + ldq $1,0($18) \n\ + ldq $2,0($19) \n\ + ldq $3,8($17) \n\ + \n\ + ldq $4,8($18) \n\ + ldq $6,16($17) \n\ + ldq $7,16($18) \n\ + ldq $21,24($17) \n\ + \n\ + ldq $22,24($18) \n\ + ldq $24,32($17) \n\ + ldq $25,32($18) \n\ + ldq $5,8($19) \n\ + \n\ + ldq $20,16($19) \n\ + ldq $23,24($19) \n\ + ldq $27,32($19) \n\ + nop \n\ + \n\ + xor $0,$1,$1 # 8 cycles from $0 load \n\ + xor $3,$4,$4 # 7 cycles from $4 load \n\ + xor $6,$7,$7 # 6 cycles from $7 load \n\ + xor $21,$22,$22 # 5 cycles from $22 load \n\ + \n\ + xor $1,$2,$2 # 9 cycles from $2 load \n\ + xor $24,$25,$25 # 5 cycles from $25 load \n\ + stq $2,0($17) \n\ + xor $4,$5,$5 # 6 cycles from $5 load \n\ + \n\ + stq $5,8($17) \n\ + xor $7,$20,$20 # 7 cycles from $20 load \n\ + stq $20,16($17) \n\ + xor $22,$23,$23 # 7 cycles from $23 load \n\ + \n\ + stq $23,24($17) \n\ + xor $25,$27,$27 # 7 cycles from $27 load \n\ + stq $27,32($17) \n\ + nop \n\ + \n\ + ldq $0,40($17) \n\ + ldq $1,40($18) \n\ + ldq $3,48($17) \n\ + ldq $4,48($18) \n\ + \n\ + ldq $6,56($17) \n\ + ldq $7,56($18) \n\ + ldq $2,40($19) \n\ + ldq $5,48($19) \n\ + \n\ + ldq $20,56($19) \n\ + ldq $31,256($17) \n\ + ldq $31,256($18) \n\ + ldq $31,256($19) \n\ + \n\ + xor $0,$1,$1 # 6 cycles from $1 load \n\ + xor $3,$4,$4 # 5 cycles from $4 load \n\ + xor $6,$7,$7 # 5 cycles from $7 load \n\ + xor $1,$2,$2 # 4 cycles from $2 load \n\ + \n\ + xor $4,$5,$5 # 5 cycles from $5 load \n\ + xor $7,$20,$20 # 4 cycles from $20 load \n\ + stq $2,40($17) \n\ + subq $16,1,$16 \n\ + \n\ + stq $5,48($17) \n\ + addq $19,64,$19 \n\ + stq $20,56($17) \n\ + addq $18,64,$18 \n\ + \n\ + addq $17,64,$17 \n\ + bgt $16,3b \n\ + ret \n\ + .end xor_alpha_prefetch_3 \n\ + \n\ + .align 3 \n\ + .ent xor_alpha_prefetch_4 \n\ +xor_alpha_prefetch_4: \n\ + .prologue 0 \n\ + srl $16, 6, $16 \n\ + \n\ + ldq $31, 0($17) \n\ + ldq $31, 0($18) \n\ + ldq $31, 0($19) \n\ + ldq $31, 0($20) \n\ + \n\ + ldq $31, 64($17) \n\ + ldq $31, 64($18) \n\ + ldq $31, 64($19) \n\ + ldq $31, 64($20) \n\ + \n\ + ldq $31, 128($17) \n\ + ldq $31, 128($18) \n\ + ldq $31, 128($19) \n\ + ldq $31, 128($20) \n\ + \n\ + ldq $31, 192($17) \n\ + ldq $31, 192($18) \n\ + ldq $31, 192($19) \n\ + ldq $31, 192($20) \n\ + .align 4 \n\ +4: \n\ + ldq $0,0($17) \n\ + ldq $1,0($18) \n\ + ldq $2,0($19) \n\ + ldq $3,0($20) \n\ + \n\ + ldq $4,8($17) \n\ + ldq $5,8($18) \n\ + ldq $6,8($19) \n\ + ldq $7,8($20) \n\ + \n\ + ldq $21,16($17) \n\ + ldq $22,16($18) \n\ + ldq $23,16($19) \n\ + ldq $24,16($20) \n\ + \n\ + ldq $25,24($17) \n\ + xor $0,$1,$1 # 6 cycles from $1 load \n\ + ldq $27,24($18) \n\ + xor $2,$3,$3 # 6 cycles from $3 load \n\ + \n\ + ldq $0,24($19) \n\ + xor $1,$3,$3 \n\ + ldq $1,24($20) \n\ + xor $4,$5,$5 # 7 cycles from $5 load \n\ + \n\ + stq $3,0($17) \n\ + xor $6,$7,$7 \n\ + xor $21,$22,$22 # 7 cycles from $22 load \n\ + xor $5,$7,$7 \n\ + \n\ + stq $7,8($17) \n\ + xor $23,$24,$24 # 7 cycles from $24 load \n\ + ldq $2,32($17) \n\ + xor $22,$24,$24 \n\ + \n\ + ldq $3,32($18) \n\ + ldq $4,32($19) \n\ + ldq $5,32($20) \n\ + xor $25,$27,$27 # 8 cycles from $27 load \n\ + \n\ + ldq $6,40($17) \n\ + ldq $7,40($18) \n\ + ldq $21,40($19) \n\ + ldq $22,40($20) \n\ + \n\ + stq $24,16($17) \n\ + xor $0,$1,$1 # 9 cycles from $1 load \n\ + xor $2,$3,$3 # 5 cycles from $3 load \n\ + xor $27,$1,$1 \n\ + \n\ + stq $1,24($17) \n\ + xor $4,$5,$5 # 5 cycles from $5 load \n\ + ldq $23,48($17) \n\ + xor $3,$5,$5 \n\ + \n\ + ldq $24,48($18) \n\ + ldq $25,48($19) \n\ + ldq $27,48($20) \n\ + ldq $0,56($17) \n\ + \n\ + ldq $1,56($18) \n\ + ldq $2,56($19) \n\ + ldq $3,56($20) \n\ + xor $6,$7,$7 # 8 cycles from $6 load \n\ + \n\ + ldq $31,256($17) \n\ + xor $21,$22,$22 # 8 cycles from $22 load \n\ + ldq $31,256($18) \n\ + xor $7,$22,$22 \n\ + \n\ + ldq $31,256($19) \n\ + xor $23,$24,$24 # 6 cycles from $24 load \n\ + ldq $31,256($20) \n\ + xor $25,$27,$27 # 6 cycles from $27 load \n\ + \n\ + stq $5,32($17) \n\ + xor $24,$27,$27 \n\ + xor $0,$1,$1 # 7 cycles from $1 load \n\ + xor $2,$3,$3 # 6 cycles from $3 load \n\ + \n\ + stq $22,40($17) \n\ + xor $1,$3,$3 \n\ + stq $27,48($17) \n\ + subq $16,1,$16 \n\ + \n\ + stq $3,56($17) \n\ + addq $20,64,$20 \n\ + addq $19,64,$19 \n\ + addq $18,64,$18 \n\ + \n\ + addq $17,64,$17 \n\ + bgt $16,4b \n\ + ret \n\ + .end xor_alpha_prefetch_4 \n\ + \n\ + .align 3 \n\ + .ent xor_alpha_prefetch_5 \n\ +xor_alpha_prefetch_5: \n\ + .prologue 0 \n\ + srl $16, 6, $16 \n\ + \n\ + ldq $31, 0($17) \n\ + ldq $31, 0($18) \n\ + ldq $31, 0($19) \n\ + ldq $31, 0($20) \n\ + ldq $31, 0($21) \n\ + \n\ + ldq $31, 64($17) \n\ + ldq $31, 64($18) \n\ + ldq $31, 64($19) \n\ + ldq $31, 64($20) \n\ + ldq $31, 64($21) \n\ + \n\ + ldq $31, 128($17) \n\ + ldq $31, 128($18) \n\ + ldq $31, 128($19) \n\ + ldq $31, 128($20) \n\ + ldq $31, 128($21) \n\ + \n\ + ldq $31, 192($17) \n\ + ldq $31, 192($18) \n\ + ldq $31, 192($19) \n\ + ldq $31, 192($20) \n\ + ldq $31, 192($21) \n\ + .align 4 \n\ +5: \n\ + ldq $0,0($17) \n\ + ldq $1,0($18) \n\ + ldq $2,0($19) \n\ + ldq $3,0($20) \n\ + \n\ + ldq $4,0($21) \n\ + ldq $5,8($17) \n\ + ldq $6,8($18) \n\ + ldq $7,8($19) \n\ + \n\ + ldq $22,8($20) \n\ + ldq $23,8($21) \n\ + ldq $24,16($17) \n\ + ldq $25,16($18) \n\ + \n\ + ldq $27,16($19) \n\ + xor $0,$1,$1 # 6 cycles from $1 load \n\ + ldq $28,16($20) \n\ + xor $2,$3,$3 # 6 cycles from $3 load \n\ + \n\ + ldq $0,16($21) \n\ + xor $1,$3,$3 \n\ + ldq $1,24($17) \n\ + xor $3,$4,$4 # 7 cycles from $4 load \n\ + \n\ + stq $4,0($17) \n\ + xor $5,$6,$6 # 7 cycles from $6 load \n\ + xor $7,$22,$22 # 7 cycles from $22 load \n\ + xor $6,$23,$23 # 7 cycles from $23 load \n\ + \n\ + ldq $2,24($18) \n\ + xor $22,$23,$23 \n\ + ldq $3,24($19) \n\ + xor $24,$25,$25 # 8 cycles from $25 load \n\ + \n\ + stq $23,8($17) \n\ + xor $25,$27,$27 # 8 cycles from $27 load \n\ + ldq $4,24($20) \n\ + xor $28,$0,$0 # 7 cycles from $0 load \n\ + \n\ + ldq $5,24($21) \n\ + xor $27,$0,$0 \n\ + ldq $6,32($17) \n\ + ldq $7,32($18) \n\ + \n\ + stq $0,16($17) \n\ + xor $1,$2,$2 # 6 cycles from $2 load \n\ + ldq $22,32($19) \n\ + xor $3,$4,$4 # 4 cycles from $4 load \n\ + \n\ + ldq $23,32($20) \n\ + xor $2,$4,$4 \n\ + ldq $24,32($21) \n\ + ldq $25,40($17) \n\ + \n\ + ldq $27,40($18) \n\ + ldq $28,40($19) \n\ + ldq $0,40($20) \n\ + xor $4,$5,$5 # 7 cycles from $5 load \n\ + \n\ + stq $5,24($17) \n\ + xor $6,$7,$7 # 7 cycles from $7 load \n\ + ldq $1,40($21) \n\ + ldq $2,48($17) \n\ + \n\ + ldq $3,48($18) \n\ + xor $7,$22,$22 # 7 cycles from $22 load \n\ + ldq $4,48($19) \n\ + xor $23,$24,$24 # 6 cycles from $24 load \n\ + \n\ + ldq $5,48($20) \n\ + xor $22,$24,$24 \n\ + ldq $6,48($21) \n\ + xor $25,$27,$27 # 7 cycles from $27 load \n\ + \n\ + stq $24,32($17) \n\ + xor $27,$28,$28 # 8 cycles from $28 load \n\ + ldq $7,56($17) \n\ + xor $0,$1,$1 # 6 cycles from $1 load \n\ + \n\ + ldq $22,56($18) \n\ + ldq $23,56($19) \n\ + ldq $24,56($20) \n\ + ldq $25,56($21) \n\ + \n\ + ldq $31,256($17) \n\ + xor $28,$1,$1 \n\ + ldq $31,256($18) \n\ + xor $2,$3,$3 # 9 cycles from $3 load \n\ + \n\ + ldq $31,256($19) \n\ + xor $3,$4,$4 # 9 cycles from $4 load \n\ + ldq $31,256($20) \n\ + xor $5,$6,$6 # 8 cycles from $6 load \n\ + \n\ + stq $1,40($17) \n\ + xor $4,$6,$6 \n\ + xor $7,$22,$22 # 7 cycles from $22 load \n\ + xor $23,$24,$24 # 6 cycles from $24 load \n\ + \n\ + stq $6,48($17) \n\ + xor $22,$24,$24 \n\ + ldq $31,256($21) \n\ + xor $24,$25,$25 # 8 cycles from $25 load \n\ + \n\ + stq $25,56($17) \n\ + subq $16,1,$16 \n\ + addq $21,64,$21 \n\ + addq $20,64,$20 \n\ + \n\ + addq $19,64,$19 \n\ + addq $18,64,$18 \n\ + addq $17,64,$17 \n\ + bgt $16,5b \n\ + \n\ + ret \n\ + .end xor_alpha_prefetch_5 \n\ +"); + +static struct xor_block_template xor_block_alpha = { + .name = "alpha", + .do_2 = xor_alpha_2, + .do_3 = xor_alpha_3, + .do_4 = xor_alpha_4, + .do_5 = xor_alpha_5, +}; + +static struct xor_block_template xor_block_alpha_prefetch = { + .name = "alpha prefetch", + .do_2 = xor_alpha_prefetch_2, + .do_3 = xor_alpha_prefetch_3, + .do_4 = xor_alpha_prefetch_4, + .do_5 = xor_alpha_prefetch_5, +}; + +/* For grins, also test the generic routines. */ +#include <asm-generic/xor.h> + +#undef XOR_TRY_TEMPLATES +#define XOR_TRY_TEMPLATES \ + do { \ + xor_speed(&xor_block_8regs); \ + xor_speed(&xor_block_32regs); \ + xor_speed(&xor_block_alpha); \ + xor_speed(&xor_block_alpha_prefetch); \ + } while (0) + +/* Force the use of alpha_prefetch if EV6, as it is significantly + faster in the cold cache case. */ +#define XOR_SELECT_TEMPLATE(FASTEST) \ + (implver() == IMPLVER_EV6 ? &xor_block_alpha_prefetch : FASTEST) diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile new file mode 100644 index 00000000..7a6d908b --- /dev/null +++ b/arch/alpha/kernel/Makefile @@ -0,0 +1,109 @@ +# +# Makefile for the linux kernel. +# + +extra-y := head.o vmlinux.lds +asflags-y := $(KBUILD_CFLAGS) +ccflags-y := -Wno-sign-compare + +obj-y := entry.o traps.o process.o init_task.o osf_sys.o irq.o \ + irq_alpha.o signal.o setup.o ptrace.o time.o \ + alpha_ksyms.o systbls.o err_common.o io.o + +obj-$(CONFIG_VGA_HOSE) += console.o +obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_PCI) += pci.o pci_iommu.o pci-sysfs.o +obj-$(CONFIG_SRM_ENV) += srm_env.o +obj-$(CONFIG_MODULES) += module.o +obj-$(CONFIG_PERF_EVENTS) += perf_event.o + +ifdef CONFIG_ALPHA_GENERIC + +obj-y += core_apecs.o core_cia.o core_irongate.o core_lca.o \ + core_mcpcia.o core_polaris.o core_t2.o \ + core_tsunami.o + +obj-y += sys_alcor.o sys_cabriolet.o sys_dp264.o sys_eb64p.o sys_eiger.o \ + sys_jensen.o sys_miata.o sys_mikasa.o sys_nautilus.o \ + sys_noritake.o sys_rawhide.o sys_ruffian.o sys_rx164.o \ + sys_sable.o sys_sio.o sys_sx164.o sys_takara.o + +ifndef CONFIG_ALPHA_LEGACY_START_ADDRESS +obj-y += core_marvel.o core_titan.o core_wildfire.o +obj-y += sys_marvel.o sys_titan.o sys_wildfire.o +obj-y += err_ev7.o err_titan.o err_marvel.o +endif + +obj-y += irq_pyxis.o irq_i8259.o irq_srm.o +obj-y += err_ev6.o +obj-y += es1888.o smc37c669.o smc37c93x.o pc873xx.o gct.o +obj-y += srmcons.o + +else + +# Misc support +obj-$(CONFIG_ALPHA_SRM) += srmcons.o + +ifdef CONFIG_BINFMT_AOUT +obj-y += binfmt_loader.o +endif + +# Core logic support +obj-$(CONFIG_ALPHA_APECS) += core_apecs.o +obj-$(CONFIG_ALPHA_CIA) += core_cia.o +obj-$(CONFIG_ALPHA_IRONGATE) += core_irongate.o +obj-$(CONFIG_ALPHA_LCA) += core_lca.o +obj-$(CONFIG_ALPHA_MARVEL) += core_marvel.o gct.o +obj-$(CONFIG_ALPHA_MCPCIA) += core_mcpcia.o +obj-$(CONFIG_ALPHA_POLARIS) += core_polaris.o +obj-$(CONFIG_ALPHA_T2) += core_t2.o +obj-$(CONFIG_ALPHA_TSUNAMI) += core_tsunami.o +obj-$(CONFIG_ALPHA_TITAN) += core_titan.o +obj-$(CONFIG_ALPHA_WILDFIRE) += core_wildfire.o + +# Board support +obj-$(CONFIG_ALPHA_ALCOR) += sys_alcor.o irq_i8259.o irq_srm.o +obj-$(CONFIG_ALPHA_CABRIOLET) += sys_cabriolet.o irq_i8259.o irq_srm.o \ + pc873xx.o +obj-$(CONFIG_ALPHA_EB164) += sys_cabriolet.o irq_i8259.o irq_srm.o \ + pc873xx.o +obj-$(CONFIG_ALPHA_EB66P) += sys_cabriolet.o irq_i8259.o irq_srm.o \ + pc873xx.o +obj-$(CONFIG_ALPHA_LX164) += sys_cabriolet.o irq_i8259.o irq_srm.o \ + smc37c93x.o +obj-$(CONFIG_ALPHA_PC164) += sys_cabriolet.o irq_i8259.o irq_srm.o \ + smc37c93x.o +obj-$(CONFIG_ALPHA_DP264) += sys_dp264.o irq_i8259.o es1888.o smc37c669.o +obj-$(CONFIG_ALPHA_SHARK) += sys_dp264.o irq_i8259.o es1888.o smc37c669.o +obj-$(CONFIG_ALPHA_TITAN) += sys_titan.o irq_i8259.o smc37c669.o +obj-$(CONFIG_ALPHA_EB64P) += sys_eb64p.o irq_i8259.o +obj-$(CONFIG_ALPHA_EB66) += sys_eb64p.o irq_i8259.o +obj-$(CONFIG_ALPHA_EIGER) += sys_eiger.o irq_i8259.o +obj-$(CONFIG_ALPHA_JENSEN) += sys_jensen.o pci-noop.o irq_i8259.o +obj-$(CONFIG_ALPHA_MARVEL) += sys_marvel.o +obj-$(CONFIG_ALPHA_MIATA) += sys_miata.o irq_pyxis.o irq_i8259.o \ + es1888.o smc37c669.o +obj-$(CONFIG_ALPHA_MIKASA) += sys_mikasa.o irq_i8259.o irq_srm.o +obj-$(CONFIG_ALPHA_NAUTILUS) += sys_nautilus.o irq_i8259.o irq_srm.o +obj-$(CONFIG_ALPHA_NORITAKE) += sys_noritake.o irq_i8259.o +obj-$(CONFIG_ALPHA_RAWHIDE) += sys_rawhide.o irq_i8259.o +obj-$(CONFIG_ALPHA_RUFFIAN) += sys_ruffian.o irq_pyxis.o irq_i8259.o +obj-$(CONFIG_ALPHA_RX164) += sys_rx164.o irq_i8259.o +obj-$(CONFIG_ALPHA_SABLE) += sys_sable.o +obj-$(CONFIG_ALPHA_LYNX) += sys_sable.o +obj-$(CONFIG_ALPHA_BOOK1) += sys_sio.o irq_i8259.o irq_srm.o pc873xx.o +obj-$(CONFIG_ALPHA_AVANTI) += sys_sio.o irq_i8259.o irq_srm.o pc873xx.o +obj-$(CONFIG_ALPHA_NONAME) += sys_sio.o irq_i8259.o irq_srm.o pc873xx.o +obj-$(CONFIG_ALPHA_P2K) += sys_sio.o irq_i8259.o irq_srm.o pc873xx.o +obj-$(CONFIG_ALPHA_XL) += sys_sio.o irq_i8259.o irq_srm.o pc873xx.o +obj-$(CONFIG_ALPHA_SX164) += sys_sx164.o irq_pyxis.o irq_i8259.o \ + irq_srm.o smc37c669.o +obj-$(CONFIG_ALPHA_TAKARA) += sys_takara.o irq_i8259.o pc873xx.o +obj-$(CONFIG_ALPHA_WILDFIRE) += sys_wildfire.o irq_i8259.o + +# Error support +obj-$(CONFIG_ALPHA_MARVEL) += err_ev7.o err_marvel.o +obj-$(CONFIG_ALPHA_NAUTILUS) += err_ev6.o +obj-$(CONFIG_ALPHA_TITAN) += err_ev6.o err_titan.o + +endif # GENERIC diff --git a/arch/alpha/kernel/alpha_ksyms.c b/arch/alpha/kernel/alpha_ksyms.c new file mode 100644 index 00000000..d96e742d --- /dev/null +++ b/arch/alpha/kernel/alpha_ksyms.c @@ -0,0 +1,107 @@ +/* + * linux/arch/alpha/kernel/alpha_ksyms.c + * + * Export the alpha-specific functions that are needed for loadable + * modules. + */ + +#include <linux/module.h> +#include <asm/console.h> +#include <asm/uaccess.h> +#include <asm/checksum.h> +#include <asm/fpu.h> +#include <asm/machvec.h> + +#include <linux/syscalls.h> + +/* these are C runtime functions with special calling conventions: */ +extern void __divl (void); +extern void __reml (void); +extern void __divq (void); +extern void __remq (void); +extern void __divlu (void); +extern void __remlu (void); +extern void __divqu (void); +extern void __remqu (void); + +EXPORT_SYMBOL(alpha_mv); +EXPORT_SYMBOL(callback_getenv); +EXPORT_SYMBOL(callback_setenv); +EXPORT_SYMBOL(callback_save_env); + +/* platform dependent support */ +EXPORT_SYMBOL(strcat); +EXPORT_SYMBOL(strcpy); +EXPORT_SYMBOL(strlen); +EXPORT_SYMBOL(strncpy); +EXPORT_SYMBOL(strncat); +EXPORT_SYMBOL(strchr); +EXPORT_SYMBOL(strrchr); +EXPORT_SYMBOL(memmove); +EXPORT_SYMBOL(__memcpy); +EXPORT_SYMBOL(__memset); +EXPORT_SYMBOL(__memsetw); +EXPORT_SYMBOL(__constant_c_memset); +EXPORT_SYMBOL(copy_page); +EXPORT_SYMBOL(clear_page); + +EXPORT_SYMBOL(alpha_read_fp_reg); +EXPORT_SYMBOL(alpha_read_fp_reg_s); +EXPORT_SYMBOL(alpha_write_fp_reg); +EXPORT_SYMBOL(alpha_write_fp_reg_s); + +/* entry.S */ +EXPORT_SYMBOL(kernel_thread); +EXPORT_SYMBOL(kernel_execve); + +/* Networking helper routines. */ +EXPORT_SYMBOL(csum_tcpudp_magic); +EXPORT_SYMBOL(ip_compute_csum); +EXPORT_SYMBOL(ip_fast_csum); +EXPORT_SYMBOL(csum_partial_copy_nocheck); +EXPORT_SYMBOL(csum_partial_copy_from_user); +EXPORT_SYMBOL(csum_ipv6_magic); + +#ifdef CONFIG_MATHEMU_MODULE +extern long (*alpha_fp_emul_imprecise)(struct pt_regs *, unsigned long); +extern long (*alpha_fp_emul) (unsigned long pc); +EXPORT_SYMBOL(alpha_fp_emul_imprecise); +EXPORT_SYMBOL(alpha_fp_emul); +#endif + +/* + * The following are specially called from the uaccess assembly stubs. + */ +EXPORT_SYMBOL(__copy_user); +EXPORT_SYMBOL(__do_clear_user); +EXPORT_SYMBOL(__strncpy_from_user); +EXPORT_SYMBOL(__strnlen_user); + +/* + * SMP-specific symbols. + */ + +#ifdef CONFIG_SMP +EXPORT_SYMBOL(_atomic_dec_and_lock); +#endif /* CONFIG_SMP */ + +/* + * The following are special because they're not called + * explicitly (the C compiler or assembler generates them in + * response to division operations). Fortunately, their + * interface isn't gonna change any time soon now, so it's OK + * to leave it out of version control. + */ +# undef memcpy +# undef memset +EXPORT_SYMBOL(__divl); +EXPORT_SYMBOL(__divlu); +EXPORT_SYMBOL(__divq); +EXPORT_SYMBOL(__divqu); +EXPORT_SYMBOL(__reml); +EXPORT_SYMBOL(__remlu); +EXPORT_SYMBOL(__remq); +EXPORT_SYMBOL(__remqu); +EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(memchr); diff --git a/arch/alpha/kernel/asm-offsets.c b/arch/alpha/kernel/asm-offsets.c new file mode 100644 index 00000000..6ff8886e --- /dev/null +++ b/arch/alpha/kernel/asm-offsets.c @@ -0,0 +1,43 @@ +/* + * Generate definitions needed by assembly language modules. + * This code generates raw asm output which is post-processed to extract + * and format the required data. + */ + +#include <linux/types.h> +#include <linux/stddef.h> +#include <linux/sched.h> +#include <linux/ptrace.h> +#include <linux/kbuild.h> +#include <asm/io.h> + +void foo(void) +{ + DEFINE(TI_TASK, offsetof(struct thread_info, task)); + DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); + DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); + BLANK(); + + DEFINE(TASK_BLOCKED, offsetof(struct task_struct, blocked)); + DEFINE(TASK_CRED, offsetof(struct task_struct, cred)); + DEFINE(TASK_REAL_PARENT, offsetof(struct task_struct, real_parent)); + DEFINE(TASK_GROUP_LEADER, offsetof(struct task_struct, group_leader)); + DEFINE(TASK_TGID, offsetof(struct task_struct, tgid)); + BLANK(); + + DEFINE(CRED_UID, offsetof(struct cred, uid)); + DEFINE(CRED_EUID, offsetof(struct cred, euid)); + DEFINE(CRED_GID, offsetof(struct cred, gid)); + DEFINE(CRED_EGID, offsetof(struct cred, egid)); + BLANK(); + + DEFINE(SIZEOF_PT_REGS, sizeof(struct pt_regs)); + DEFINE(PT_PTRACED, PT_PTRACED); + DEFINE(CLONE_VM, CLONE_VM); + DEFINE(CLONE_UNTRACED, CLONE_UNTRACED); + DEFINE(SIGCHLD, SIGCHLD); + BLANK(); + + DEFINE(HAE_CACHE, offsetof(struct alpha_machine_vector, hae_cache)); + DEFINE(HAE_REG, offsetof(struct alpha_machine_vector, hae_register)); +} diff --git a/arch/alpha/kernel/binfmt_loader.c b/arch/alpha/kernel/binfmt_loader.c new file mode 100644 index 00000000..d1f474d1 --- /dev/null +++ b/arch/alpha/kernel/binfmt_loader.c @@ -0,0 +1,52 @@ +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/file.h> +#include <linux/mm_types.h> +#include <linux/binfmts.h> +#include <linux/a.out.h> + +static int load_binary(struct linux_binprm *bprm, struct pt_regs *regs) +{ + struct exec *eh = (struct exec *)bprm->buf; + unsigned long loader; + struct file *file; + int retval; + + if (eh->fh.f_magic != 0x183 || (eh->fh.f_flags & 0x3000) != 0x3000) + return -ENOEXEC; + + if (bprm->loader) + return -ENOEXEC; + + allow_write_access(bprm->file); + fput(bprm->file); + bprm->file = NULL; + + loader = bprm->vma->vm_end - sizeof(void *); + + file = open_exec("/sbin/loader"); + retval = PTR_ERR(file); + if (IS_ERR(file)) + return retval; + + /* Remember if the application is TASO. */ + bprm->taso = eh->ah.entry < 0x100000000UL; + + bprm->file = file; + bprm->loader = loader; + retval = prepare_binprm(bprm); + if (retval < 0) + return retval; + return search_binary_handler(bprm,regs); +} + +static struct linux_binfmt loader_format = { + .load_binary = load_binary, +}; + +static int __init init_loader_binfmt(void) +{ + insert_binfmt(&loader_format); + return 0; +} +arch_initcall(init_loader_binfmt); diff --git a/arch/alpha/kernel/console.c b/arch/alpha/kernel/console.c new file mode 100644 index 00000000..da711e37 --- /dev/null +++ b/arch/alpha/kernel/console.c @@ -0,0 +1,91 @@ +/* + * linux/arch/alpha/kernel/console.c + * + * Architecture-specific specific support for VGA device on + * non-0 I/O hose + */ + +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/tty.h> +#include <linux/console.h> +#include <linux/vt.h> +#include <asm/vga.h> +#include <asm/machvec.h> + +#include "pci_impl.h" + +#ifdef CONFIG_VGA_HOSE + +struct pci_controller *pci_vga_hose; +static struct resource alpha_vga = { + .name = "alpha-vga+", + .start = 0x3C0, + .end = 0x3DF +}; + +static struct pci_controller * __init +default_vga_hose_select(struct pci_controller *h1, struct pci_controller *h2) +{ + if (h2->index < h1->index) + return h2; + + return h1; +} + +void __init +locate_and_init_vga(void *(*sel_func)(void *, void *)) +{ + struct pci_controller *hose = NULL; + struct pci_dev *dev = NULL; + + /* Default the select function */ + if (!sel_func) sel_func = (void *)default_vga_hose_select; + + /* Find the console VGA device */ + for(dev=NULL; (dev=pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, dev));) { + if (!hose) + hose = dev->sysdata; + else + hose = sel_func(hose, dev->sysdata); + } + + /* Did we already initialize the correct one? Is there one? */ + if (!hose || (conswitchp == &vga_con && pci_vga_hose == hose)) + return; + + /* Create a new VGA ioport resource WRT the hose it is on. */ + alpha_vga.start += hose->io_space->start; + alpha_vga.end += hose->io_space->start; + request_resource(hose->io_space, &alpha_vga); + + /* Set the VGA hose and init the new console. */ + pci_vga_hose = hose; + take_over_console(&vga_con, 0, MAX_NR_CONSOLES-1, 1); +} + +void __init +find_console_vga_hose(void) +{ + u64 *pu64 = (u64 *)((u64)hwrpb + hwrpb->ctbt_offset); + + if (pu64[7] == 3) { /* TERM_TYPE == graphics */ + struct pci_controller *hose; + int h = (pu64[30] >> 24) & 0xff; /* console hose # */ + + /* + * Our hose numbering DOES match the console's, so find + * the right one... + */ + for (hose = hose_head; hose; hose = hose->next) { + if (hose->index == h) break; + } + + if (hose) { + printk("Console graphics on hose %d\n", h); + pci_vga_hose = hose; + } + } +} + +#endif diff --git a/arch/alpha/kernel/core_apecs.c b/arch/alpha/kernel/core_apecs.c new file mode 100644 index 00000000..708c831e --- /dev/null +++ b/arch/alpha/kernel/core_apecs.c @@ -0,0 +1,418 @@ +/* + * linux/arch/alpha/kernel/core_apecs.c + * + * Rewritten for Apecs from the lca.c from: + * + * Written by David Mosberger (davidm@cs.arizona.edu) with some code + * taken from Dave Rusling's (david.rusling@reo.mts.dec.com) 32-bit + * bios code. + * + * Code common to all APECS core logic chips. + */ + +#define __EXTERN_INLINE inline +#include <asm/io.h> +#include <asm/core_apecs.h> +#undef __EXTERN_INLINE + +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/init.h> + +#include <asm/ptrace.h> +#include <asm/smp.h> +#include <asm/mce.h> + +#include "proto.h" +#include "pci_impl.h" + +/* + * NOTE: Herein lie back-to-back mb instructions. They are magic. + * One plausible explanation is that the i/o controller does not properly + * handle the system transaction. Another involves timing. Ho hum. + */ + +/* + * BIOS32-style PCI interface: + */ + +#define DEBUG_CONFIG 0 + +#if DEBUG_CONFIG +# define DBGC(args) printk args +#else +# define DBGC(args) +#endif + +#define vuip volatile unsigned int * + +/* + * Given a bus, device, and function number, compute resulting + * configuration space address and setup the APECS_HAXR2 register + * accordingly. It is therefore not safe to have concurrent + * invocations to configuration space access routines, but there + * really shouldn't be any need for this. + * + * Type 0: + * + * 3 3|3 3 2 2|2 2 2 2|2 2 2 2|1 1 1 1|1 1 1 1|1 1 + * 3 2|1 0 9 8|7 6 5 4|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | | | | | | | | | | | | | | | | | | | | | | |F|F|F|R|R|R|R|R|R|0|0| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * 31:11 Device select bit. + * 10:8 Function number + * 7:2 Register number + * + * Type 1: + * + * 3 3|3 3 2 2|2 2 2 2|2 2 2 2|1 1 1 1|1 1 1 1|1 1 + * 3 2|1 0 9 8|7 6 5 4|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | | | | | | | | | |B|B|B|B|B|B|B|B|D|D|D|D|D|F|F|F|R|R|R|R|R|R|0|1| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * 31:24 reserved + * 23:16 bus number (8 bits = 128 possible buses) + * 15:11 Device number (5 bits) + * 10:8 function number + * 7:2 register number + * + * Notes: + * The function number selects which function of a multi-function device + * (e.g., SCSI and Ethernet). + * + * The register selects a DWORD (32 bit) register offset. Hence it + * doesn't get shifted by 2 bits as we want to "drop" the bottom two + * bits. + */ + +static int +mk_conf_addr(struct pci_bus *pbus, unsigned int device_fn, int where, + unsigned long *pci_addr, unsigned char *type1) +{ + unsigned long addr; + u8 bus = pbus->number; + + DBGC(("mk_conf_addr(bus=%d ,device_fn=0x%x, where=0x%x," + " pci_addr=0x%p, type1=0x%p)\n", + bus, device_fn, where, pci_addr, type1)); + + if (bus == 0) { + int device = device_fn >> 3; + + /* type 0 configuration cycle: */ + + if (device > 20) { + DBGC(("mk_conf_addr: device (%d) > 20, returning -1\n", + device)); + return -1; + } + + *type1 = 0; + addr = (device_fn << 8) | (where); + } else { + /* type 1 configuration cycle: */ + *type1 = 1; + addr = (bus << 16) | (device_fn << 8) | (where); + } + *pci_addr = addr; + DBGC(("mk_conf_addr: returning pci_addr 0x%lx\n", addr)); + return 0; +} + +static unsigned int +conf_read(unsigned long addr, unsigned char type1) +{ + unsigned long flags; + unsigned int stat0, value; + unsigned int haxr2 = 0; + + local_irq_save(flags); /* avoid getting hit by machine check */ + + DBGC(("conf_read(addr=0x%lx, type1=%d)\n", addr, type1)); + + /* Reset status register to avoid losing errors. */ + stat0 = *(vuip)APECS_IOC_DCSR; + *(vuip)APECS_IOC_DCSR = stat0; + mb(); + DBGC(("conf_read: APECS DCSR was 0x%x\n", stat0)); + + /* If Type1 access, must set HAE #2. */ + if (type1) { + haxr2 = *(vuip)APECS_IOC_HAXR2; + mb(); + *(vuip)APECS_IOC_HAXR2 = haxr2 | 1; + DBGC(("conf_read: TYPE1 access\n")); + } + + draina(); + mcheck_expected(0) = 1; + mcheck_taken(0) = 0; + mb(); + + /* Access configuration space. */ + + /* Some SRMs step on these registers during a machine check. */ + asm volatile("ldl %0,%1; mb; mb" : "=r"(value) : "m"(*(vuip)addr) + : "$9", "$10", "$11", "$12", "$13", "$14", "memory"); + + if (mcheck_taken(0)) { + mcheck_taken(0) = 0; + value = 0xffffffffU; + mb(); + } + mcheck_expected(0) = 0; + mb(); + +#if 1 + /* + * david.rusling@reo.mts.dec.com. This code is needed for the + * EB64+ as it does not generate a machine check (why I don't + * know). When we build kernels for one particular platform + * then we can make this conditional on the type. + */ + draina(); + + /* Now look for any errors. */ + stat0 = *(vuip)APECS_IOC_DCSR; + DBGC(("conf_read: APECS DCSR after read 0x%x\n", stat0)); + + /* Is any error bit set? */ + if (stat0 & 0xffe0U) { + /* If not NDEV, print status. */ + if (!(stat0 & 0x0800)) { + printk("apecs.c:conf_read: got stat0=%x\n", stat0); + } + + /* Reset error status. */ + *(vuip)APECS_IOC_DCSR = stat0; + mb(); + wrmces(0x7); /* reset machine check */ + value = 0xffffffff; + } +#endif + + /* If Type1 access, must reset HAE #2 so normal IO space ops work. */ + if (type1) { + *(vuip)APECS_IOC_HAXR2 = haxr2 & ~1; + mb(); + } + local_irq_restore(flags); + + return value; +} + +static void +conf_write(unsigned long addr, unsigned int value, unsigned char type1) +{ + unsigned long flags; + unsigned int stat0; + unsigned int haxr2 = 0; + + local_irq_save(flags); /* avoid getting hit by machine check */ + + /* Reset status register to avoid losing errors. */ + stat0 = *(vuip)APECS_IOC_DCSR; + *(vuip)APECS_IOC_DCSR = stat0; + mb(); + + /* If Type1 access, must set HAE #2. */ + if (type1) { + haxr2 = *(vuip)APECS_IOC_HAXR2; + mb(); + *(vuip)APECS_IOC_HAXR2 = haxr2 | 1; + } + + draina(); + mcheck_expected(0) = 1; + mb(); + + /* Access configuration space. */ + *(vuip)addr = value; + mb(); + mb(); /* magic */ + mcheck_expected(0) = 0; + mb(); + +#if 1 + /* + * david.rusling@reo.mts.dec.com. This code is needed for the + * EB64+ as it does not generate a machine check (why I don't + * know). When we build kernels for one particular platform + * then we can make this conditional on the type. + */ + draina(); + + /* Now look for any errors. */ + stat0 = *(vuip)APECS_IOC_DCSR; + + /* Is any error bit set? */ + if (stat0 & 0xffe0U) { + /* If not NDEV, print status. */ + if (!(stat0 & 0x0800)) { + printk("apecs.c:conf_write: got stat0=%x\n", stat0); + } + + /* Reset error status. */ + *(vuip)APECS_IOC_DCSR = stat0; + mb(); + wrmces(0x7); /* reset machine check */ + } +#endif + + /* If Type1 access, must reset HAE #2 so normal IO space ops work. */ + if (type1) { + *(vuip)APECS_IOC_HAXR2 = haxr2 & ~1; + mb(); + } + local_irq_restore(flags); +} + +static int +apecs_read_config(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 *value) +{ + unsigned long addr, pci_addr; + unsigned char type1; + long mask; + int shift; + + if (mk_conf_addr(bus, devfn, where, &pci_addr, &type1)) + return PCIBIOS_DEVICE_NOT_FOUND; + + mask = (size - 1) * 8; + shift = (where & 3) * 8; + addr = (pci_addr << 5) + mask + APECS_CONF; + *value = conf_read(addr, type1) >> (shift); + return PCIBIOS_SUCCESSFUL; +} + +static int +apecs_write_config(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 value) +{ + unsigned long addr, pci_addr; + unsigned char type1; + long mask; + + if (mk_conf_addr(bus, devfn, where, &pci_addr, &type1)) + return PCIBIOS_DEVICE_NOT_FOUND; + + mask = (size - 1) * 8; + addr = (pci_addr << 5) + mask + APECS_CONF; + conf_write(addr, value << ((where & 3) * 8), type1); + return PCIBIOS_SUCCESSFUL; +} + +struct pci_ops apecs_pci_ops = +{ + .read = apecs_read_config, + .write = apecs_write_config, +}; + +void +apecs_pci_tbi(struct pci_controller *hose, dma_addr_t start, dma_addr_t end) +{ + wmb(); + *(vip)APECS_IOC_TBIA = 0; + mb(); +} + +void __init +apecs_init_arch(void) +{ + struct pci_controller *hose; + + /* + * Create our single hose. + */ + + pci_isa_hose = hose = alloc_pci_controller(); + hose->io_space = &ioport_resource; + hose->mem_space = &iomem_resource; + hose->index = 0; + + hose->sparse_mem_base = APECS_SPARSE_MEM - IDENT_ADDR; + hose->dense_mem_base = APECS_DENSE_MEM - IDENT_ADDR; + hose->sparse_io_base = APECS_IO - IDENT_ADDR; + hose->dense_io_base = 0; + + /* + * Set up the PCI to main memory translation windows. + * + * Window 1 is direct access 1GB at 1GB + * Window 2 is scatter-gather 8MB at 8MB (for isa) + */ + hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0); + hose->sg_pci = NULL; + __direct_map_base = 0x40000000; + __direct_map_size = 0x40000000; + + *(vuip)APECS_IOC_PB1R = __direct_map_base | 0x00080000; + *(vuip)APECS_IOC_PM1R = (__direct_map_size - 1) & 0xfff00000U; + *(vuip)APECS_IOC_TB1R = 0; + + *(vuip)APECS_IOC_PB2R = hose->sg_isa->dma_base | 0x000c0000; + *(vuip)APECS_IOC_PM2R = (hose->sg_isa->size - 1) & 0xfff00000; + *(vuip)APECS_IOC_TB2R = virt_to_phys(hose->sg_isa->ptes) >> 1; + + apecs_pci_tbi(hose, 0, -1); + + /* + * Finally, clear the HAXR2 register, which gets used + * for PCI Config Space accesses. That is the way + * we want to use it, and we do not want to depend on + * what ARC or SRM might have left behind... + */ + *(vuip)APECS_IOC_HAXR2 = 0; + mb(); +} + +void +apecs_pci_clr_err(void) +{ + unsigned int jd; + + jd = *(vuip)APECS_IOC_DCSR; + if (jd & 0xffe0L) { + *(vuip)APECS_IOC_SEAR; + *(vuip)APECS_IOC_DCSR = jd | 0xffe1L; + mb(); + *(vuip)APECS_IOC_DCSR; + } + *(vuip)APECS_IOC_TBIA = (unsigned int)APECS_IOC_TBIA; + mb(); + *(vuip)APECS_IOC_TBIA; +} + +void +apecs_machine_check(unsigned long vector, unsigned long la_ptr) +{ + struct el_common *mchk_header; + struct el_apecs_procdata *mchk_procdata; + struct el_apecs_sysdata_mcheck *mchk_sysdata; + + mchk_header = (struct el_common *)la_ptr; + + mchk_procdata = (struct el_apecs_procdata *) + (la_ptr + mchk_header->proc_offset + - sizeof(mchk_procdata->paltemp)); + + mchk_sysdata = (struct el_apecs_sysdata_mcheck *) + (la_ptr + mchk_header->sys_offset); + + + /* Clear the error before any reporting. */ + mb(); + mb(); /* magic */ + draina(); + apecs_pci_clr_err(); + wrmces(0x7); /* reset machine check pending flag */ + mb(); + + process_mcheck_info(vector, la_ptr, "APECS", + (mcheck_expected(0) + && (mchk_sysdata->epic_dcsr & 0x0c00UL))); +} diff --git a/arch/alpha/kernel/core_cia.c b/arch/alpha/kernel/core_cia.c new file mode 100644 index 00000000..c44339e1 --- /dev/null +++ b/arch/alpha/kernel/core_cia.c @@ -0,0 +1,1212 @@ +/* + * linux/arch/alpha/kernel/core_cia.c + * + * Written by David A Rusling (david.rusling@reo.mts.dec.com). + * December 1995. + * + * Copyright (C) 1995 David A Rusling + * Copyright (C) 1997, 1998 Jay Estabrook + * Copyright (C) 1998, 1999, 2000 Richard Henderson + * + * Code common to all CIA core logic chips. + */ + +#define __EXTERN_INLINE inline +#include <asm/io.h> +#include <asm/core_cia.h> +#undef __EXTERN_INLINE + +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/bootmem.h> + +#include <asm/ptrace.h> +#include <asm/mce.h> + +#include "proto.h" +#include "pci_impl.h" + + +/* + * NOTE: Herein lie back-to-back mb instructions. They are magic. + * One plausible explanation is that the i/o controller does not properly + * handle the system transaction. Another involves timing. Ho hum. + */ + +#define DEBUG_CONFIG 0 +#if DEBUG_CONFIG +# define DBGC(args) printk args +#else +# define DBGC(args) +#endif + +#define vip volatile int * + +/* + * Given a bus, device, and function number, compute resulting + * configuration space address. It is therefore not safe to have + * concurrent invocations to configuration space access routines, but + * there really shouldn't be any need for this. + * + * Type 0: + * + * 3 3|3 3 2 2|2 2 2 2|2 2 2 2|1 1 1 1|1 1 1 1|1 1 + * 3 2|1 0 9 8|7 6 5 4|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | |D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|F|F|F|R|R|R|R|R|R|0|0| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * 31:11 Device select bit. + * 10:8 Function number + * 7:2 Register number + * + * Type 1: + * + * 3 3|3 3 2 2|2 2 2 2|2 2 2 2|1 1 1 1|1 1 1 1|1 1 + * 3 2|1 0 9 8|7 6 5 4|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | | | | | | | | | |B|B|B|B|B|B|B|B|D|D|D|D|D|F|F|F|R|R|R|R|R|R|0|1| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * 31:24 reserved + * 23:16 bus number (8 bits = 128 possible buses) + * 15:11 Device number (5 bits) + * 10:8 function number + * 7:2 register number + * + * Notes: + * The function number selects which function of a multi-function device + * (e.g., SCSI and Ethernet). + * + * The register selects a DWORD (32 bit) register offset. Hence it + * doesn't get shifted by 2 bits as we want to "drop" the bottom two + * bits. + */ + +static int +mk_conf_addr(struct pci_bus *bus_dev, unsigned int device_fn, int where, + unsigned long *pci_addr, unsigned char *type1) +{ + u8 bus = bus_dev->number; + + *type1 = (bus != 0); + *pci_addr = (bus << 16) | (device_fn << 8) | where; + + DBGC(("mk_conf_addr(bus=%d ,device_fn=0x%x, where=0x%x," + " returning address 0x%p\n" + bus, device_fn, where, *pci_addr)); + + return 0; +} + +static unsigned int +conf_read(unsigned long addr, unsigned char type1) +{ + unsigned long flags; + int stat0, value; + int cia_cfg = 0; + + DBGC(("conf_read(addr=0x%lx, type1=%d) ", addr, type1)); + local_irq_save(flags); + + /* Reset status register to avoid losing errors. */ + stat0 = *(vip)CIA_IOC_CIA_ERR; + *(vip)CIA_IOC_CIA_ERR = stat0; + mb(); + *(vip)CIA_IOC_CIA_ERR; /* re-read to force write */ + + /* If Type1 access, must set CIA CFG. */ + if (type1) { + cia_cfg = *(vip)CIA_IOC_CFG; + *(vip)CIA_IOC_CFG = (cia_cfg & ~3) | 1; + mb(); + *(vip)CIA_IOC_CFG; + } + + mb(); + draina(); + mcheck_expected(0) = 1; + mcheck_taken(0) = 0; + mb(); + + /* Access configuration space. */ + value = *(vip)addr; + mb(); + mb(); /* magic */ + if (mcheck_taken(0)) { + mcheck_taken(0) = 0; + value = 0xffffffff; + mb(); + } + mcheck_expected(0) = 0; + mb(); + + /* If Type1 access, must reset IOC CFG so normal IO space ops work. */ + if (type1) { + *(vip)CIA_IOC_CFG = cia_cfg; + mb(); + *(vip)CIA_IOC_CFG; + } + + local_irq_restore(flags); + DBGC(("done\n")); + + return value; +} + +static void +conf_write(unsigned long addr, unsigned int value, unsigned char type1) +{ + unsigned long flags; + int stat0, cia_cfg = 0; + + DBGC(("conf_write(addr=0x%lx, type1=%d) ", addr, type1)); + local_irq_save(flags); + + /* Reset status register to avoid losing errors. */ + stat0 = *(vip)CIA_IOC_CIA_ERR; + *(vip)CIA_IOC_CIA_ERR = stat0; + mb(); + *(vip)CIA_IOC_CIA_ERR; /* re-read to force write */ + + /* If Type1 access, must set CIA CFG. */ + if (type1) { + cia_cfg = *(vip)CIA_IOC_CFG; + *(vip)CIA_IOC_CFG = (cia_cfg & ~3) | 1; + mb(); + *(vip)CIA_IOC_CFG; + } + + mb(); + draina(); + mcheck_expected(0) = 1; + mcheck_taken(0) = 0; + mb(); + + /* Access configuration space. */ + *(vip)addr = value; + mb(); + *(vip)addr; /* read back to force the write */ + + mcheck_expected(0) = 0; + mb(); + + /* If Type1 access, must reset IOC CFG so normal IO space ops work. */ + if (type1) { + *(vip)CIA_IOC_CFG = cia_cfg; + mb(); + *(vip)CIA_IOC_CFG; + } + + local_irq_restore(flags); + DBGC(("done\n")); +} + +static int +cia_read_config(struct pci_bus *bus, unsigned int devfn, int where, int size, + u32 *value) +{ + unsigned long addr, pci_addr; + long mask; + unsigned char type1; + int shift; + + if (mk_conf_addr(bus, devfn, where, &pci_addr, &type1)) + return PCIBIOS_DEVICE_NOT_FOUND; + + mask = (size - 1) * 8; + shift = (where & 3) * 8; + addr = (pci_addr << 5) + mask + CIA_CONF; + *value = conf_read(addr, type1) >> (shift); + return PCIBIOS_SUCCESSFUL; +} + +static int +cia_write_config(struct pci_bus *bus, unsigned int devfn, int where, int size, + u32 value) +{ + unsigned long addr, pci_addr; + long mask; + unsigned char type1; + + if (mk_conf_addr(bus, devfn, where, &pci_addr, &type1)) + return PCIBIOS_DEVICE_NOT_FOUND; + + mask = (size - 1) * 8; + addr = (pci_addr << 5) + mask + CIA_CONF; + conf_write(addr, value << ((where & 3) * 8), type1); + return PCIBIOS_SUCCESSFUL; +} + +struct pci_ops cia_pci_ops = +{ + .read = cia_read_config, + .write = cia_write_config, +}; + +/* + * CIA Pass 1 and PYXIS Pass 1 and 2 have a broken scatter-gather tlb. + * It cannot be invalidated. Rather than hard code the pass numbers, + * actually try the tbia to see if it works. + */ + +void +cia_pci_tbi(struct pci_controller *hose, dma_addr_t start, dma_addr_t end) +{ + wmb(); + *(vip)CIA_IOC_PCI_TBIA = 3; /* Flush all locked and unlocked. */ + mb(); + *(vip)CIA_IOC_PCI_TBIA; +} + +/* + * On PYXIS, even if the tbia works, we cannot use it. It effectively locks + * the chip (as well as direct write to the tag registers) if there is a + * SG DMA operation in progress. This is true at least for PYXIS rev. 1, + * so always use the method below. + */ +/* + * This is the method NT and NetBSD use. + * + * Allocate mappings, and put the chip into DMA loopback mode to read a + * garbage page. This works by causing TLB misses, causing old entries to + * be purged to make room for the new entries coming in for the garbage page. + */ + +#define CIA_BROKEN_TBIA_BASE 0x30000000 +#define CIA_BROKEN_TBIA_SIZE 1024 + +/* Always called with interrupts disabled */ +void +cia_pci_tbi_try2(struct pci_controller *hose, + dma_addr_t start, dma_addr_t end) +{ + void __iomem *bus_addr; + int ctrl; + + /* Put the chip into PCI loopback mode. */ + mb(); + ctrl = *(vip)CIA_IOC_CIA_CTRL; + *(vip)CIA_IOC_CIA_CTRL = ctrl | CIA_CTRL_PCI_LOOP_EN; + mb(); + *(vip)CIA_IOC_CIA_CTRL; + mb(); + + /* Read from PCI dense memory space at TBI_ADDR, skipping 32k on + each read. This forces SG TLB misses. NetBSD claims that the + TLB entries are not quite LRU, meaning that we need to read more + times than there are actual tags. The 2117x docs claim strict + round-robin. Oh well, we've come this far... */ + /* Even better - as seen on the PYXIS rev 1 the TLB tags 0-3 can + be filled by the TLB misses *only once* after being invalidated + (by tbia or direct write). Next misses won't update them even + though the lock bits are cleared. Tags 4-7 are "quite LRU" though, + so use them and read at window 3 base exactly 4 times. Reading + more sometimes makes the chip crazy. -ink */ + + bus_addr = cia_ioremap(CIA_BROKEN_TBIA_BASE, 32768 * 4); + + cia_readl(bus_addr + 0x00000); + cia_readl(bus_addr + 0x08000); + cia_readl(bus_addr + 0x10000); + cia_readl(bus_addr + 0x18000); + + cia_iounmap(bus_addr); + + /* Restore normal PCI operation. */ + mb(); + *(vip)CIA_IOC_CIA_CTRL = ctrl; + mb(); + *(vip)CIA_IOC_CIA_CTRL; + mb(); +} + +static inline void +cia_prepare_tbia_workaround(int window) +{ + unsigned long *ppte, pte; + long i; + + /* Use minimal 1K map. */ + ppte = __alloc_bootmem(CIA_BROKEN_TBIA_SIZE, 32768, 0); + pte = (virt_to_phys(ppte) >> (PAGE_SHIFT - 1)) | 1; + + for (i = 0; i < CIA_BROKEN_TBIA_SIZE / sizeof(unsigned long); ++i) + ppte[i] = pte; + + *(vip)CIA_IOC_PCI_Wn_BASE(window) = CIA_BROKEN_TBIA_BASE | 3; + *(vip)CIA_IOC_PCI_Wn_MASK(window) + = (CIA_BROKEN_TBIA_SIZE*1024 - 1) & 0xfff00000; + *(vip)CIA_IOC_PCI_Tn_BASE(window) = virt_to_phys(ppte) >> 2; +} + +static void __init +verify_tb_operation(void) +{ + static int page[PAGE_SIZE/4] + __attribute__((aligned(PAGE_SIZE))) + __initdata = { 0 }; + + struct pci_iommu_arena *arena = pci_isa_hose->sg_isa; + int ctrl, addr0, tag0, pte0, data0; + int temp, use_tbia_try2 = 0; + void __iomem *bus_addr; + + /* pyxis -- tbia is broken */ + if (pci_isa_hose->dense_io_base) + use_tbia_try2 = 1; + + /* Put the chip into PCI loopback mode. */ + mb(); + ctrl = *(vip)CIA_IOC_CIA_CTRL; + *(vip)CIA_IOC_CIA_CTRL = ctrl | CIA_CTRL_PCI_LOOP_EN; + mb(); + *(vip)CIA_IOC_CIA_CTRL; + mb(); + + /* Write a valid entry directly into the TLB registers. */ + + addr0 = arena->dma_base; + tag0 = addr0 | 1; + pte0 = (virt_to_phys(page) >> (PAGE_SHIFT - 1)) | 1; + + *(vip)CIA_IOC_TB_TAGn(0) = tag0; + *(vip)CIA_IOC_TB_TAGn(1) = 0; + *(vip)CIA_IOC_TB_TAGn(2) = 0; + *(vip)CIA_IOC_TB_TAGn(3) = 0; + *(vip)CIA_IOC_TB_TAGn(4) = 0; + *(vip)CIA_IOC_TB_TAGn(5) = 0; + *(vip)CIA_IOC_TB_TAGn(6) = 0; + *(vip)CIA_IOC_TB_TAGn(7) = 0; + *(vip)CIA_IOC_TBn_PAGEm(0,0) = pte0; + *(vip)CIA_IOC_TBn_PAGEm(0,1) = 0; + *(vip)CIA_IOC_TBn_PAGEm(0,2) = 0; + *(vip)CIA_IOC_TBn_PAGEm(0,3) = 0; + mb(); + + /* Get a usable bus address */ + bus_addr = cia_ioremap(addr0, 8*PAGE_SIZE); + + /* First, verify we can read back what we've written. If + this fails, we can't be sure of any of the other testing + we're going to do, so bail. */ + /* ??? Actually, we could do the work with machine checks. + By passing this register update test, we pretty much + guarantee that cia_pci_tbi_try1 works. If this test + fails, cia_pci_tbi_try2 might still work. */ + + temp = *(vip)CIA_IOC_TB_TAGn(0); + if (temp != tag0) { + printk("pci: failed tb register update test " + "(tag0 %#x != %#x)\n", temp, tag0); + goto failed; + } + temp = *(vip)CIA_IOC_TB_TAGn(1); + if (temp != 0) { + printk("pci: failed tb register update test " + "(tag1 %#x != 0)\n", temp); + goto failed; + } + temp = *(vip)CIA_IOC_TBn_PAGEm(0,0); + if (temp != pte0) { + printk("pci: failed tb register update test " + "(pte0 %#x != %#x)\n", temp, pte0); + goto failed; + } + printk("pci: passed tb register update test\n"); + + /* Second, verify we can actually do I/O through this entry. */ + + data0 = 0xdeadbeef; + page[0] = data0; + mcheck_expected(0) = 1; + mcheck_taken(0) = 0; + mb(); + temp = cia_readl(bus_addr); + mb(); + mcheck_expected(0) = 0; + mb(); + if (mcheck_taken(0)) { + printk("pci: failed sg loopback i/o read test (mcheck)\n"); + goto failed; + } + if (temp != data0) { + printk("pci: failed sg loopback i/o read test " + "(%#x != %#x)\n", temp, data0); + goto failed; + } + printk("pci: passed sg loopback i/o read test\n"); + + /* Third, try to invalidate the TLB. */ + + if (! use_tbia_try2) { + cia_pci_tbi(arena->hose, 0, -1); + temp = *(vip)CIA_IOC_TB_TAGn(0); + if (temp & 1) { + use_tbia_try2 = 1; + printk("pci: failed tbia test; workaround available\n"); + } else { + printk("pci: passed tbia test\n"); + } + } + + /* Fourth, verify the TLB snoops the EV5's caches when + doing a tlb fill. */ + + data0 = 0x5adda15e; + page[0] = data0; + arena->ptes[4] = pte0; + mcheck_expected(0) = 1; + mcheck_taken(0) = 0; + mb(); + temp = cia_readl(bus_addr + 4*PAGE_SIZE); + mb(); + mcheck_expected(0) = 0; + mb(); + if (mcheck_taken(0)) { + printk("pci: failed pte write cache snoop test (mcheck)\n"); + goto failed; + } + if (temp != data0) { + printk("pci: failed pte write cache snoop test " + "(%#x != %#x)\n", temp, data0); + goto failed; + } + printk("pci: passed pte write cache snoop test\n"); + + /* Fifth, verify that a previously invalid PTE entry gets + filled from the page table. */ + + data0 = 0xabcdef12; + page[0] = data0; + arena->ptes[5] = pte0; + mcheck_expected(0) = 1; + mcheck_taken(0) = 0; + mb(); + temp = cia_readl(bus_addr + 5*PAGE_SIZE); + mb(); + mcheck_expected(0) = 0; + mb(); + if (mcheck_taken(0)) { + printk("pci: failed valid tag invalid pte reload test " + "(mcheck; workaround available)\n"); + /* Work around this bug by aligning new allocations + on 4 page boundaries. */ + arena->align_entry = 4; + } else if (temp != data0) { + printk("pci: failed valid tag invalid pte reload test " + "(%#x != %#x)\n", temp, data0); + goto failed; + } else { + printk("pci: passed valid tag invalid pte reload test\n"); + } + + /* Sixth, verify machine checks are working. Test invalid + pte under the same valid tag as we used above. */ + + mcheck_expected(0) = 1; + mcheck_taken(0) = 0; + mb(); + temp = cia_readl(bus_addr + 6*PAGE_SIZE); + mb(); + mcheck_expected(0) = 0; + mb(); + printk("pci: %s pci machine check test\n", + mcheck_taken(0) ? "passed" : "failed"); + + /* Clean up after the tests. */ + arena->ptes[4] = 0; + arena->ptes[5] = 0; + + if (use_tbia_try2) { + alpha_mv.mv_pci_tbi = cia_pci_tbi_try2; + + /* Tags 0-3 must be disabled if we use this workaraund. */ + wmb(); + *(vip)CIA_IOC_TB_TAGn(0) = 2; + *(vip)CIA_IOC_TB_TAGn(1) = 2; + *(vip)CIA_IOC_TB_TAGn(2) = 2; + *(vip)CIA_IOC_TB_TAGn(3) = 2; + + printk("pci: tbia workaround enabled\n"); + } + alpha_mv.mv_pci_tbi(arena->hose, 0, -1); + +exit: + /* unmap the bus addr */ + cia_iounmap(bus_addr); + + /* Restore normal PCI operation. */ + mb(); + *(vip)CIA_IOC_CIA_CTRL = ctrl; + mb(); + *(vip)CIA_IOC_CIA_CTRL; + mb(); + return; + +failed: + printk("pci: disabling sg translation window\n"); + *(vip)CIA_IOC_PCI_W0_BASE = 0; + *(vip)CIA_IOC_PCI_W1_BASE = 0; + pci_isa_hose->sg_isa = NULL; + alpha_mv.mv_pci_tbi = NULL; + goto exit; +} + +#if defined(ALPHA_RESTORE_SRM_SETUP) +/* Save CIA configuration data as the console had it set up. */ +struct +{ + unsigned int hae_mem; + unsigned int hae_io; + unsigned int pci_dac_offset; + unsigned int err_mask; + unsigned int cia_ctrl; + unsigned int cia_cnfg; + struct { + unsigned int w_base; + unsigned int w_mask; + unsigned int t_base; + } window[4]; +} saved_config __attribute((common)); + +void +cia_save_srm_settings(int is_pyxis) +{ + int i; + + /* Save some important registers. */ + saved_config.err_mask = *(vip)CIA_IOC_ERR_MASK; + saved_config.cia_ctrl = *(vip)CIA_IOC_CIA_CTRL; + saved_config.hae_mem = *(vip)CIA_IOC_HAE_MEM; + saved_config.hae_io = *(vip)CIA_IOC_HAE_IO; + saved_config.pci_dac_offset = *(vip)CIA_IOC_PCI_W_DAC; + + if (is_pyxis) + saved_config.cia_cnfg = *(vip)CIA_IOC_CIA_CNFG; + else + saved_config.cia_cnfg = 0; + + /* Save DMA windows configuration. */ + for (i = 0; i < 4; i++) { + saved_config.window[i].w_base = *(vip)CIA_IOC_PCI_Wn_BASE(i); + saved_config.window[i].w_mask = *(vip)CIA_IOC_PCI_Wn_MASK(i); + saved_config.window[i].t_base = *(vip)CIA_IOC_PCI_Tn_BASE(i); + } + mb(); +} + +void +cia_restore_srm_settings(void) +{ + int i; + + for (i = 0; i < 4; i++) { + *(vip)CIA_IOC_PCI_Wn_BASE(i) = saved_config.window[i].w_base; + *(vip)CIA_IOC_PCI_Wn_MASK(i) = saved_config.window[i].w_mask; + *(vip)CIA_IOC_PCI_Tn_BASE(i) = saved_config.window[i].t_base; + } + + *(vip)CIA_IOC_HAE_MEM = saved_config.hae_mem; + *(vip)CIA_IOC_HAE_IO = saved_config.hae_io; + *(vip)CIA_IOC_PCI_W_DAC = saved_config.pci_dac_offset; + *(vip)CIA_IOC_ERR_MASK = saved_config.err_mask; + *(vip)CIA_IOC_CIA_CTRL = saved_config.cia_ctrl; + + if (saved_config.cia_cnfg) /* Must be pyxis. */ + *(vip)CIA_IOC_CIA_CNFG = saved_config.cia_cnfg; + + mb(); +} +#else /* ALPHA_RESTORE_SRM_SETUP */ +#define cia_save_srm_settings(p) do {} while (0) +#define cia_restore_srm_settings() do {} while (0) +#endif /* ALPHA_RESTORE_SRM_SETUP */ + + +static void __init +do_init_arch(int is_pyxis) +{ + struct pci_controller *hose; + int temp, cia_rev, tbia_window; + + cia_rev = *(vip)CIA_IOC_CIA_REV & CIA_REV_MASK; + printk("pci: cia revision %d%s\n", + cia_rev, is_pyxis ? " (pyxis)" : ""); + + if (alpha_using_srm) + cia_save_srm_settings(is_pyxis); + + /* Set up error reporting. */ + temp = *(vip)CIA_IOC_ERR_MASK; + temp &= ~(CIA_ERR_CPU_PE | CIA_ERR_MEM_NEM | CIA_ERR_PA_PTE_INV + | CIA_ERR_RCVD_MAS_ABT | CIA_ERR_RCVD_TAR_ABT); + *(vip)CIA_IOC_ERR_MASK = temp; + + /* Clear all currently pending errors. */ + temp = *(vip)CIA_IOC_CIA_ERR; + *(vip)CIA_IOC_CIA_ERR = temp; + + /* Turn on mchecks. */ + temp = *(vip)CIA_IOC_CIA_CTRL; + temp |= CIA_CTRL_FILL_ERR_EN | CIA_CTRL_MCHK_ERR_EN; + *(vip)CIA_IOC_CIA_CTRL = temp; + + /* Clear the CFG register, which gets used for PCI config space + accesses. That is the way we want to use it, and we do not + want to depend on what ARC or SRM might have left behind. */ + *(vip)CIA_IOC_CFG = 0; + + /* Zero the HAEs. */ + *(vip)CIA_IOC_HAE_MEM = 0; + *(vip)CIA_IOC_HAE_IO = 0; + + /* For PYXIS, we always use BWX bus and i/o accesses. To that end, + make sure they're enabled on the controller. At the same time, + enable the monster window. */ + if (is_pyxis) { + temp = *(vip)CIA_IOC_CIA_CNFG; + temp |= CIA_CNFG_IOA_BWEN | CIA_CNFG_PCI_MWEN; + *(vip)CIA_IOC_CIA_CNFG = temp; + } + + /* Synchronize with all previous changes. */ + mb(); + *(vip)CIA_IOC_CIA_REV; + + /* + * Create our single hose. + */ + + pci_isa_hose = hose = alloc_pci_controller(); + hose->io_space = &ioport_resource; + hose->mem_space = &iomem_resource; + hose->index = 0; + + if (! is_pyxis) { + struct resource *hae_mem = alloc_resource(); + hose->mem_space = hae_mem; + + hae_mem->start = 0; + hae_mem->end = CIA_MEM_R1_MASK; + hae_mem->name = pci_hae0_name; + hae_mem->flags = IORESOURCE_MEM; + + if (request_resource(&iomem_resource, hae_mem) < 0) + printk(KERN_ERR "Failed to request HAE_MEM\n"); + + hose->sparse_mem_base = CIA_SPARSE_MEM - IDENT_ADDR; + hose->dense_mem_base = CIA_DENSE_MEM - IDENT_ADDR; + hose->sparse_io_base = CIA_IO - IDENT_ADDR; + hose->dense_io_base = 0; + } else { + hose->sparse_mem_base = 0; + hose->dense_mem_base = CIA_BW_MEM - IDENT_ADDR; + hose->sparse_io_base = 0; + hose->dense_io_base = CIA_BW_IO - IDENT_ADDR; + } + + /* + * Set up the PCI to main memory translation windows. + * + * Window 0 is S/G 8MB at 8MB (for isa) + * Window 1 is S/G 1MB at 768MB (for tbia) (unused for CIA rev 1) + * Window 2 is direct access 2GB at 2GB + * Window 3 is DAC access 4GB at 8GB (or S/G for tbia if CIA rev 1) + * + * ??? NetBSD hints that page tables must be aligned to 32K, + * possibly due to a hardware bug. This is over-aligned + * from the 8K alignment one would expect for an 8MB window. + * No description of what revisions affected. + */ + + hose->sg_pci = NULL; + hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 32768); + + __direct_map_base = 0x80000000; + __direct_map_size = 0x80000000; + + *(vip)CIA_IOC_PCI_W0_BASE = hose->sg_isa->dma_base | 3; + *(vip)CIA_IOC_PCI_W0_MASK = (hose->sg_isa->size - 1) & 0xfff00000; + *(vip)CIA_IOC_PCI_T0_BASE = virt_to_phys(hose->sg_isa->ptes) >> 2; + + *(vip)CIA_IOC_PCI_W2_BASE = __direct_map_base | 1; + *(vip)CIA_IOC_PCI_W2_MASK = (__direct_map_size - 1) & 0xfff00000; + *(vip)CIA_IOC_PCI_T2_BASE = 0 >> 2; + + /* On PYXIS we have the monster window, selected by bit 40, so + there is no need for window3 to be enabled. + + On CIA, we don't have true arbitrary addressing -- bits <39:32> + are compared against W_DAC. We can, however, directly map 4GB, + which is better than before. However, due to assumptions made + elsewhere, we should not claim that we support DAC unless that + 4GB covers all of physical memory. + + On CIA rev 1, apparently W1 and W2 can't be used for SG. + At least, there are reports that it doesn't work for Alcor. + In that case, we have no choice but to use W3 for the TBIA + workaround, which means we can't use DAC at all. */ + + tbia_window = 1; + if (is_pyxis) { + *(vip)CIA_IOC_PCI_W3_BASE = 0; + } else if (cia_rev == 1) { + *(vip)CIA_IOC_PCI_W1_BASE = 0; + tbia_window = 3; + } else if (max_low_pfn > (0x100000000UL >> PAGE_SHIFT)) { + *(vip)CIA_IOC_PCI_W3_BASE = 0; + } else { + *(vip)CIA_IOC_PCI_W3_BASE = 0x00000000 | 1 | 8; + *(vip)CIA_IOC_PCI_W3_MASK = 0xfff00000; + *(vip)CIA_IOC_PCI_T3_BASE = 0 >> 2; + + alpha_mv.pci_dac_offset = 0x200000000UL; + *(vip)CIA_IOC_PCI_W_DAC = alpha_mv.pci_dac_offset >> 32; + } + + /* Prepare workaround for apparently broken tbia. */ + cia_prepare_tbia_workaround(tbia_window); +} + +void __init +cia_init_arch(void) +{ + do_init_arch(0); +} + +void __init +pyxis_init_arch(void) +{ + /* On pyxis machines we can precisely calculate the + CPU clock frequency using pyxis real time counter. + It's especially useful for SX164 with broken RTC. + + Both CPU and chipset are driven by the single 16.666M + or 16.667M crystal oscillator. PYXIS_RT_COUNT clock is + 66.66 MHz. -ink */ + + unsigned int cc0, cc1; + unsigned long pyxis_cc; + + __asm__ __volatile__ ("rpcc %0" : "=r"(cc0)); + pyxis_cc = *(vulp)PYXIS_RT_COUNT; + do { } while(*(vulp)PYXIS_RT_COUNT - pyxis_cc < 4096); + __asm__ __volatile__ ("rpcc %0" : "=r"(cc1)); + cc1 -= cc0; + hwrpb->cycle_freq = ((cc1 >> 11) * 100000000UL) / 3; + hwrpb_update_checksum(hwrpb); + + do_init_arch(1); +} + +void +cia_kill_arch(int mode) +{ + if (alpha_using_srm) + cia_restore_srm_settings(); +} + +void __init +cia_init_pci(void) +{ + /* Must delay this from init_arch, as we need machine checks. */ + verify_tb_operation(); + common_init_pci(); +} + +static inline void +cia_pci_clr_err(void) +{ + int jd; + + jd = *(vip)CIA_IOC_CIA_ERR; + *(vip)CIA_IOC_CIA_ERR = jd; + mb(); + *(vip)CIA_IOC_CIA_ERR; /* re-read to force write. */ +} + +#ifdef CONFIG_VERBOSE_MCHECK +static void +cia_decode_pci_error(struct el_CIA_sysdata_mcheck *cia, const char *msg) +{ + static const char * const pci_cmd_desc[16] = { + "Interrupt Acknowledge", "Special Cycle", "I/O Read", + "I/O Write", "Reserved 0x4", "Reserved 0x5", "Memory Read", + "Memory Write", "Reserved 0x8", "Reserved 0x9", + "Configuration Read", "Configuration Write", + "Memory Read Multiple", "Dual Address Cycle", + "Memory Read Line", "Memory Write and Invalidate" + }; + + if (cia->cia_err & (CIA_ERR_COR_ERR + | CIA_ERR_UN_COR_ERR + | CIA_ERR_MEM_NEM + | CIA_ERR_PA_PTE_INV)) { + static const char * const window_desc[6] = { + "No window active", "Window 0 hit", "Window 1 hit", + "Window 2 hit", "Window 3 hit", "Monster window hit" + }; + + const char *window; + const char *cmd; + unsigned long addr, tmp; + int lock, dac; + + cmd = pci_cmd_desc[cia->pci_err0 & 0x7]; + lock = (cia->pci_err0 >> 4) & 1; + dac = (cia->pci_err0 >> 5) & 1; + + tmp = (cia->pci_err0 >> 8) & 0x1F; + tmp = ffs(tmp); + window = window_desc[tmp]; + + addr = cia->pci_err1; + if (dac) { + tmp = *(vip)CIA_IOC_PCI_W_DAC & 0xFFUL; + addr |= tmp << 32; + } + + printk(KERN_CRIT "CIA machine check: %s\n", msg); + printk(KERN_CRIT " DMA command: %s\n", cmd); + printk(KERN_CRIT " PCI address: %#010lx\n", addr); + printk(KERN_CRIT " %s, Lock: %d, DAC: %d\n", + window, lock, dac); + } else if (cia->cia_err & (CIA_ERR_PERR + | CIA_ERR_PCI_ADDR_PE + | CIA_ERR_RCVD_MAS_ABT + | CIA_ERR_RCVD_TAR_ABT + | CIA_ERR_IOA_TIMEOUT)) { + static const char * const master_st_desc[16] = { + "Idle", "Drive bus", "Address step cycle", + "Address cycle", "Data cycle", "Last read data cycle", + "Last write data cycle", "Read stop cycle", + "Write stop cycle", "Read turnaround cycle", + "Write turnaround cycle", "Reserved 0xB", + "Reserved 0xC", "Reserved 0xD", "Reserved 0xE", + "Unknown state" + }; + static const char * const target_st_desc[16] = { + "Idle", "Busy", "Read data cycle", "Write data cycle", + "Read stop cycle", "Write stop cycle", + "Read turnaround cycle", "Write turnaround cycle", + "Read wait cycle", "Write wait cycle", + "Reserved 0xA", "Reserved 0xB", "Reserved 0xC", + "Reserved 0xD", "Reserved 0xE", "Unknown state" + }; + + const char *cmd; + const char *master, *target; + unsigned long addr, tmp; + int dac; + + master = master_st_desc[(cia->pci_err0 >> 16) & 0xF]; + target = target_st_desc[(cia->pci_err0 >> 20) & 0xF]; + cmd = pci_cmd_desc[(cia->pci_err0 >> 24) & 0xF]; + dac = (cia->pci_err0 >> 28) & 1; + + addr = cia->pci_err2; + if (dac) { + tmp = *(volatile int *)CIA_IOC_PCI_W_DAC & 0xFFUL; + addr |= tmp << 32; + } + + printk(KERN_CRIT "CIA machine check: %s\n", msg); + printk(KERN_CRIT " PCI command: %s\n", cmd); + printk(KERN_CRIT " Master state: %s, Target state: %s\n", + master, target); + printk(KERN_CRIT " PCI address: %#010lx, DAC: %d\n", + addr, dac); + } else { + printk(KERN_CRIT "CIA machine check: %s\n", msg); + printk(KERN_CRIT " Unknown PCI error\n"); + printk(KERN_CRIT " PCI_ERR0 = %#08lx", cia->pci_err0); + printk(KERN_CRIT " PCI_ERR1 = %#08lx", cia->pci_err1); + printk(KERN_CRIT " PCI_ERR2 = %#08lx", cia->pci_err2); + } +} + +static void +cia_decode_mem_error(struct el_CIA_sysdata_mcheck *cia, const char *msg) +{ + unsigned long mem_port_addr; + unsigned long mem_port_mask; + const char *mem_port_cmd; + const char *seq_state; + const char *set_select; + unsigned long tmp; + + /* If this is a DMA command, also decode the PCI bits. */ + if ((cia->mem_err1 >> 20) & 1) + cia_decode_pci_error(cia, msg); + else + printk(KERN_CRIT "CIA machine check: %s\n", msg); + + mem_port_addr = cia->mem_err0 & 0xfffffff0; + mem_port_addr |= (cia->mem_err1 & 0x83UL) << 32; + + mem_port_mask = (cia->mem_err1 >> 12) & 0xF; + + tmp = (cia->mem_err1 >> 8) & 0xF; + tmp |= ((cia->mem_err1 >> 20) & 1) << 4; + if ((tmp & 0x1E) == 0x06) + mem_port_cmd = "WRITE BLOCK or WRITE BLOCK LOCK"; + else if ((tmp & 0x1C) == 0x08) + mem_port_cmd = "READ MISS or READ MISS MODIFY"; + else if (tmp == 0x1C) + mem_port_cmd = "BC VICTIM"; + else if ((tmp & 0x1E) == 0x0E) + mem_port_cmd = "READ MISS MODIFY"; + else if ((tmp & 0x1C) == 0x18) + mem_port_cmd = "DMA READ or DMA READ MODIFY"; + else if ((tmp & 0x1E) == 0x12) + mem_port_cmd = "DMA WRITE"; + else + mem_port_cmd = "Unknown"; + + tmp = (cia->mem_err1 >> 16) & 0xF; + switch (tmp) { + case 0x0: + seq_state = "Idle"; + break; + case 0x1: + seq_state = "DMA READ or DMA WRITE"; + break; + case 0x2: case 0x3: + seq_state = "READ MISS (or READ MISS MODIFY) with victim"; + break; + case 0x4: case 0x5: case 0x6: + seq_state = "READ MISS (or READ MISS MODIFY) with no victim"; + break; + case 0x8: case 0x9: case 0xB: + seq_state = "Refresh"; + break; + case 0xC: + seq_state = "Idle, waiting for DMA pending read"; + break; + case 0xE: case 0xF: + seq_state = "Idle, ras precharge"; + break; + default: + seq_state = "Unknown"; + break; + } + + tmp = (cia->mem_err1 >> 24) & 0x1F; + switch (tmp) { + case 0x00: set_select = "Set 0 selected"; break; + case 0x01: set_select = "Set 1 selected"; break; + case 0x02: set_select = "Set 2 selected"; break; + case 0x03: set_select = "Set 3 selected"; break; + case 0x04: set_select = "Set 4 selected"; break; + case 0x05: set_select = "Set 5 selected"; break; + case 0x06: set_select = "Set 6 selected"; break; + case 0x07: set_select = "Set 7 selected"; break; + case 0x08: set_select = "Set 8 selected"; break; + case 0x09: set_select = "Set 9 selected"; break; + case 0x0A: set_select = "Set A selected"; break; + case 0x0B: set_select = "Set B selected"; break; + case 0x0C: set_select = "Set C selected"; break; + case 0x0D: set_select = "Set D selected"; break; + case 0x0E: set_select = "Set E selected"; break; + case 0x0F: set_select = "Set F selected"; break; + case 0x10: set_select = "No set selected"; break; + case 0x1F: set_select = "Refresh cycle"; break; + default: set_select = "Unknown"; break; + } + + printk(KERN_CRIT " Memory port command: %s\n", mem_port_cmd); + printk(KERN_CRIT " Memory port address: %#010lx, mask: %#lx\n", + mem_port_addr, mem_port_mask); + printk(KERN_CRIT " Memory sequencer state: %s\n", seq_state); + printk(KERN_CRIT " Memory set: %s\n", set_select); +} + +static void +cia_decode_ecc_error(struct el_CIA_sysdata_mcheck *cia, const char *msg) +{ + long syn; + long i; + const char *fmt; + + cia_decode_mem_error(cia, msg); + + syn = cia->cia_syn & 0xff; + if (syn == (syn & -syn)) { + fmt = KERN_CRIT " ECC syndrome %#x -- check bit %d\n"; + i = ffs(syn) - 1; + } else { + static unsigned char const data_bit[64] = { + 0xCE, 0xCB, 0xD3, 0xD5, + 0xD6, 0xD9, 0xDA, 0xDC, + 0x23, 0x25, 0x26, 0x29, + 0x2A, 0x2C, 0x31, 0x34, + 0x0E, 0x0B, 0x13, 0x15, + 0x16, 0x19, 0x1A, 0x1C, + 0xE3, 0xE5, 0xE6, 0xE9, + 0xEA, 0xEC, 0xF1, 0xF4, + 0x4F, 0x4A, 0x52, 0x54, + 0x57, 0x58, 0x5B, 0x5D, + 0xA2, 0xA4, 0xA7, 0xA8, + 0xAB, 0xAD, 0xB0, 0xB5, + 0x8F, 0x8A, 0x92, 0x94, + 0x97, 0x98, 0x9B, 0x9D, + 0x62, 0x64, 0x67, 0x68, + 0x6B, 0x6D, 0x70, 0x75 + }; + + for (i = 0; i < 64; ++i) + if (data_bit[i] == syn) + break; + + if (i < 64) + fmt = KERN_CRIT " ECC syndrome %#x -- data bit %d\n"; + else + fmt = KERN_CRIT " ECC syndrome %#x -- unknown bit\n"; + } + + printk (fmt, syn, i); +} + +static void +cia_decode_parity_error(struct el_CIA_sysdata_mcheck *cia) +{ + static const char * const cmd_desc[16] = { + "NOP", "LOCK", "FETCH", "FETCH_M", "MEMORY BARRIER", + "SET DIRTY", "WRITE BLOCK", "WRITE BLOCK LOCK", + "READ MISS0", "READ MISS1", "READ MISS MOD0", + "READ MISS MOD1", "BCACHE VICTIM", "Spare", + "READ MISS MOD STC0", "READ MISS MOD STC1" + }; + + unsigned long addr; + unsigned long mask; + const char *cmd; + int par; + + addr = cia->cpu_err0 & 0xfffffff0; + addr |= (cia->cpu_err1 & 0x83UL) << 32; + cmd = cmd_desc[(cia->cpu_err1 >> 8) & 0xF]; + mask = (cia->cpu_err1 >> 12) & 0xF; + par = (cia->cpu_err1 >> 21) & 1; + + printk(KERN_CRIT "CIA machine check: System bus parity error\n"); + printk(KERN_CRIT " Command: %s, Parity bit: %d\n", cmd, par); + printk(KERN_CRIT " Address: %#010lx, Mask: %#lx\n", addr, mask); +} +#endif /* CONFIG_VERBOSE_MCHECK */ + + +static int +cia_decode_mchk(unsigned long la_ptr) +{ + struct el_common *com; + struct el_CIA_sysdata_mcheck *cia; + + com = (void *)la_ptr; + cia = (void *)(la_ptr + com->sys_offset); + + if ((cia->cia_err & CIA_ERR_VALID) == 0) + return 0; + +#ifdef CONFIG_VERBOSE_MCHECK + if (!alpha_verbose_mcheck) + return 1; + + switch (ffs(cia->cia_err & 0xfff) - 1) { + case 0: /* CIA_ERR_COR_ERR */ + cia_decode_ecc_error(cia, "Corrected ECC error"); + break; + case 1: /* CIA_ERR_UN_COR_ERR */ + cia_decode_ecc_error(cia, "Uncorrected ECC error"); + break; + case 2: /* CIA_ERR_CPU_PE */ + cia_decode_parity_error(cia); + break; + case 3: /* CIA_ERR_MEM_NEM */ + cia_decode_mem_error(cia, "Access to nonexistent memory"); + break; + case 4: /* CIA_ERR_PCI_SERR */ + cia_decode_pci_error(cia, "PCI bus system error"); + break; + case 5: /* CIA_ERR_PERR */ + cia_decode_pci_error(cia, "PCI data parity error"); + break; + case 6: /* CIA_ERR_PCI_ADDR_PE */ + cia_decode_pci_error(cia, "PCI address parity error"); + break; + case 7: /* CIA_ERR_RCVD_MAS_ABT */ + cia_decode_pci_error(cia, "PCI master abort"); + break; + case 8: /* CIA_ERR_RCVD_TAR_ABT */ + cia_decode_pci_error(cia, "PCI target abort"); + break; + case 9: /* CIA_ERR_PA_PTE_INV */ + cia_decode_pci_error(cia, "PCI invalid PTE"); + break; + case 10: /* CIA_ERR_FROM_WRT_ERR */ + cia_decode_mem_error(cia, "Write to flash ROM attempted"); + break; + case 11: /* CIA_ERR_IOA_TIMEOUT */ + cia_decode_pci_error(cia, "I/O timeout"); + break; + } + + if (cia->cia_err & CIA_ERR_LOST_CORR_ERR) + printk(KERN_CRIT "CIA lost machine check: " + "Correctable ECC error\n"); + if (cia->cia_err & CIA_ERR_LOST_UN_CORR_ERR) + printk(KERN_CRIT "CIA lost machine check: " + "Uncorrectable ECC error\n"); + if (cia->cia_err & CIA_ERR_LOST_CPU_PE) + printk(KERN_CRIT "CIA lost machine check: " + "System bus parity error\n"); + if (cia->cia_err & CIA_ERR_LOST_MEM_NEM) + printk(KERN_CRIT "CIA lost machine check: " + "Access to nonexistent memory\n"); + if (cia->cia_err & CIA_ERR_LOST_PERR) + printk(KERN_CRIT "CIA lost machine check: " + "PCI data parity error\n"); + if (cia->cia_err & CIA_ERR_LOST_PCI_ADDR_PE) + printk(KERN_CRIT "CIA lost machine check: " + "PCI address parity error\n"); + if (cia->cia_err & CIA_ERR_LOST_RCVD_MAS_ABT) + printk(KERN_CRIT "CIA lost machine check: " + "PCI master abort\n"); + if (cia->cia_err & CIA_ERR_LOST_RCVD_TAR_ABT) + printk(KERN_CRIT "CIA lost machine check: " + "PCI target abort\n"); + if (cia->cia_err & CIA_ERR_LOST_PA_PTE_INV) + printk(KERN_CRIT "CIA lost machine check: " + "PCI invalid PTE\n"); + if (cia->cia_err & CIA_ERR_LOST_FROM_WRT_ERR) + printk(KERN_CRIT "CIA lost machine check: " + "Write to flash ROM attempted\n"); + if (cia->cia_err & CIA_ERR_LOST_IOA_TIMEOUT) + printk(KERN_CRIT "CIA lost machine check: " + "I/O timeout\n"); +#endif /* CONFIG_VERBOSE_MCHECK */ + + return 1; +} + +void +cia_machine_check(unsigned long vector, unsigned long la_ptr) +{ + int expected; + + /* Clear the error before any reporting. */ + mb(); + mb(); /* magic */ + draina(); + cia_pci_clr_err(); + wrmces(rdmces()); /* reset machine check pending flag. */ + mb(); + + expected = mcheck_expected(0); + if (!expected && vector == 0x660) + expected = cia_decode_mchk(la_ptr); + process_mcheck_info(vector, la_ptr, "CIA", expected); +} diff --git a/arch/alpha/kernel/core_irongate.c b/arch/alpha/kernel/core_irongate.c new file mode 100644 index 00000000..00096df0 --- /dev/null +++ b/arch/alpha/kernel/core_irongate.c @@ -0,0 +1,420 @@ +/* + * linux/arch/alpha/kernel/core_irongate.c + * + * Based on code written by David A. Rusling (david.rusling@reo.mts.dec.com). + * + * Copyright (C) 1999 Alpha Processor, Inc., + * (David Daniel, Stig Telfer, Soohoon Lee) + * + * Code common to all IRONGATE core logic chips. + */ + +#define __EXTERN_INLINE inline +#include <asm/io.h> +#include <asm/core_irongate.h> +#undef __EXTERN_INLINE + +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/initrd.h> +#include <linux/bootmem.h> + +#include <asm/ptrace.h> +#include <asm/pci.h> +#include <asm/cacheflush.h> +#include <asm/tlbflush.h> + +#include "proto.h" +#include "pci_impl.h" + +/* + * BIOS32-style PCI interface: + */ + +#define DEBUG_CONFIG 0 + +#if DEBUG_CONFIG +# define DBG_CFG(args) printk args +#else +# define DBG_CFG(args) +#endif + +igcsr32 *IronECC; + +/* + * Given a bus, device, and function number, compute resulting + * configuration space address accordingly. It is therefore not safe + * to have concurrent invocations to configuration space access + * routines, but there really shouldn't be any need for this. + * + * addr[31:24] reserved + * addr[23:16] bus number (8 bits = 128 possible buses) + * addr[15:11] Device number (5 bits) + * addr[10: 8] function number + * addr[ 7: 2] register number + * + * For IRONGATE: + * if (bus = addr[23:16]) == 0 + * then + * type 0 config cycle: + * addr_on_pci[31:11] = id selection for device = addr[15:11] + * addr_on_pci[10: 2] = addr[10: 2] ??? + * addr_on_pci[ 1: 0] = 00 + * else + * type 1 config cycle (pass on with no decoding): + * addr_on_pci[31:24] = 0 + * addr_on_pci[23: 2] = addr[23: 2] + * addr_on_pci[ 1: 0] = 01 + * fi + * + * Notes: + * The function number selects which function of a multi-function device + * (e.g., SCSI and Ethernet). + * + * The register selects a DWORD (32 bit) register offset. Hence it + * doesn't get shifted by 2 bits as we want to "drop" the bottom two + * bits. + */ + +static int +mk_conf_addr(struct pci_bus *pbus, unsigned int device_fn, int where, + unsigned long *pci_addr, unsigned char *type1) +{ + unsigned long addr; + u8 bus = pbus->number; + + DBG_CFG(("mk_conf_addr(bus=%d ,device_fn=0x%x, where=0x%x, " + "pci_addr=0x%p, type1=0x%p)\n", + bus, device_fn, where, pci_addr, type1)); + + *type1 = (bus != 0); + + addr = (bus << 16) | (device_fn << 8) | where; + addr |= IRONGATE_CONF; + + *pci_addr = addr; + DBG_CFG(("mk_conf_addr: returning pci_addr 0x%lx\n", addr)); + return 0; +} + +static int +irongate_read_config(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 *value) +{ + unsigned long addr; + unsigned char type1; + + if (mk_conf_addr(bus, devfn, where, &addr, &type1)) + return PCIBIOS_DEVICE_NOT_FOUND; + + switch (size) { + case 1: + *value = __kernel_ldbu(*(vucp)addr); + break; + case 2: + *value = __kernel_ldwu(*(vusp)addr); + break; + case 4: + *value = *(vuip)addr; + break; + } + + return PCIBIOS_SUCCESSFUL; +} + +static int +irongate_write_config(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 value) +{ + unsigned long addr; + unsigned char type1; + + if (mk_conf_addr(bus, devfn, where, &addr, &type1)) + return PCIBIOS_DEVICE_NOT_FOUND; + + switch (size) { + case 1: + __kernel_stb(value, *(vucp)addr); + mb(); + __kernel_ldbu(*(vucp)addr); + break; + case 2: + __kernel_stw(value, *(vusp)addr); + mb(); + __kernel_ldwu(*(vusp)addr); + break; + case 4: + *(vuip)addr = value; + mb(); + *(vuip)addr; + break; + } + + return PCIBIOS_SUCCESSFUL; +} + +struct pci_ops irongate_pci_ops = +{ + .read = irongate_read_config, + .write = irongate_write_config, +}; + +int +irongate_pci_clr_err(void) +{ + unsigned int nmi_ctl=0; + unsigned int IRONGATE_jd; + +again: + IRONGATE_jd = IRONGATE0->stat_cmd; + printk("Iron stat_cmd %x\n", IRONGATE_jd); + IRONGATE0->stat_cmd = IRONGATE_jd; /* write again clears error bits */ + mb(); + IRONGATE_jd = IRONGATE0->stat_cmd; /* re-read to force write */ + + IRONGATE_jd = *IronECC; + printk("Iron ECC %x\n", IRONGATE_jd); + *IronECC = IRONGATE_jd; /* write again clears error bits */ + mb(); + IRONGATE_jd = *IronECC; /* re-read to force write */ + + /* Clear ALI NMI */ + nmi_ctl = inb(0x61); + nmi_ctl |= 0x0c; + outb(nmi_ctl, 0x61); + nmi_ctl &= ~0x0c; + outb(nmi_ctl, 0x61); + + IRONGATE_jd = *IronECC; + if (IRONGATE_jd & 0x300) goto again; + + return 0; +} + +#define IRONGATE_3GB 0xc0000000UL + +/* On Albacore (aka UP1500) with 4Gb of RAM we have to reserve some + memory for PCI. At this point we just reserve memory above 3Gb. Most + of this memory will be freed after PCI setup is done. */ +static void __init +albacore_init_arch(void) +{ + unsigned long memtop = max_low_pfn << PAGE_SHIFT; + unsigned long pci_mem = (memtop + 0x1000000UL) & ~0xffffffUL; + struct percpu_struct *cpu; + int pal_rev, pal_var; + + cpu = (struct percpu_struct*)((char*)hwrpb + hwrpb->processor_offset); + pal_rev = cpu->pal_revision & 0xffff; + pal_var = (cpu->pal_revision >> 16) & 0xff; + + /* Consoles earlier than A5.6-18 (OSF PALcode v1.62-2) set up + the CPU incorrectly (leave speculative stores enabled), + which causes memory corruption under certain conditions. + Issue a warning for such consoles. */ + if (alpha_using_srm && + (pal_rev < 0x13e || (pal_rev == 0x13e && pal_var < 2))) + printk(KERN_WARNING "WARNING! Upgrade to SRM A5.6-19 " + "or later\n"); + + if (pci_mem > IRONGATE_3GB) + pci_mem = IRONGATE_3GB; + IRONGATE0->pci_mem = pci_mem; + alpha_mv.min_mem_address = pci_mem; + if (memtop > pci_mem) { +#ifdef CONFIG_BLK_DEV_INITRD + extern unsigned long initrd_start, initrd_end; + extern void *move_initrd(unsigned long); + + /* Move the initrd out of the way. */ + if (initrd_end && __pa(initrd_end) > pci_mem) { + unsigned long size; + + size = initrd_end - initrd_start; + free_bootmem_node(NODE_DATA(0), __pa(initrd_start), + PAGE_ALIGN(size)); + if (!move_initrd(pci_mem)) + printk("irongate_init_arch: initrd too big " + "(%ldK)\ndisabling initrd\n", + size / 1024); + } +#endif + reserve_bootmem_node(NODE_DATA(0), pci_mem, memtop - + pci_mem, BOOTMEM_DEFAULT); + printk("irongate_init_arch: temporarily reserving " + "region %08lx-%08lx for PCI\n", pci_mem, memtop - 1); + } +} + +static void __init +irongate_setup_agp(void) +{ + /* Disable the GART window. AGPGART doesn't work due to yet + unresolved memory coherency issues... */ + IRONGATE0->agpva = IRONGATE0->agpva & ~0xf; + alpha_agpgart_size = 0; +} + +void __init +irongate_init_arch(void) +{ + struct pci_controller *hose; + int amd761 = (IRONGATE0->dev_vendor >> 16) > 0x7006; /* Albacore? */ + + IronECC = amd761 ? &IRONGATE0->bacsr54_eccms761 : &IRONGATE0->dramms; + + irongate_pci_clr_err(); + + if (amd761) + albacore_init_arch(); + + irongate_setup_agp(); + + /* + * Create our single hose. + */ + + pci_isa_hose = hose = alloc_pci_controller(); + hose->io_space = &ioport_resource; + hose->mem_space = &iomem_resource; + hose->index = 0; + + /* This is for userland consumption. For some reason, the 40-bit + PIO bias that we use in the kernel through KSEG didn't work for + the page table based user mappings. So make sure we get the + 43-bit PIO bias. */ + hose->sparse_mem_base = 0; + hose->sparse_io_base = 0; + hose->dense_mem_base + = (IRONGATE_MEM & 0xffffffffffUL) | 0x80000000000UL; + hose->dense_io_base + = (IRONGATE_IO & 0xffffffffffUL) | 0x80000000000UL; + + hose->sg_isa = hose->sg_pci = NULL; + __direct_map_base = 0; + __direct_map_size = 0xffffffff; +} + +/* + * IO map and AGP support + */ +#include <linux/vmalloc.h> +#include <linux/agp_backend.h> +#include <linux/agpgart.h> +#include <linux/export.h> +#include <asm/pgalloc.h> + +#define GET_PAGE_DIR_OFF(addr) (addr >> 22) +#define GET_PAGE_DIR_IDX(addr) (GET_PAGE_DIR_OFF(addr)) + +#define GET_GATT_OFF(addr) ((addr & 0x003ff000) >> 12) +#define GET_GATT(addr) (gatt_pages[GET_PAGE_DIR_IDX(addr)]) + +void __iomem * +irongate_ioremap(unsigned long addr, unsigned long size) +{ + struct vm_struct *area; + unsigned long vaddr; + unsigned long baddr, last; + u32 *mmio_regs, *gatt_pages, *cur_gatt, pte; + unsigned long gart_bus_addr; + + if (!alpha_agpgart_size) + return (void __iomem *)(addr + IRONGATE_MEM); + + gart_bus_addr = (unsigned long)IRONGATE0->bar0 & + PCI_BASE_ADDRESS_MEM_MASK; + + /* + * Check for within the AGP aperture... + */ + do { + /* + * Check the AGP area + */ + if (addr >= gart_bus_addr && addr + size - 1 < + gart_bus_addr + alpha_agpgart_size) + break; + + /* + * Not found - assume legacy ioremap + */ + return (void __iomem *)(addr + IRONGATE_MEM); + } while(0); + + mmio_regs = (u32 *)(((unsigned long)IRONGATE0->bar1 & + PCI_BASE_ADDRESS_MEM_MASK) + IRONGATE_MEM); + + gatt_pages = (u32 *)(phys_to_virt(mmio_regs[1])); /* FIXME */ + + /* + * Adjust the limits (mappings must be page aligned) + */ + if (addr & ~PAGE_MASK) { + printk("AGP ioremap failed... addr not page aligned (0x%lx)\n", + addr); + return (void __iomem *)(addr + IRONGATE_MEM); + } + last = addr + size - 1; + size = PAGE_ALIGN(last) - addr; + +#if 0 + printk("irongate_ioremap(0x%lx, 0x%lx)\n", addr, size); + printk("irongate_ioremap: gart_bus_addr 0x%lx\n", gart_bus_addr); + printk("irongate_ioremap: gart_aper_size 0x%lx\n", gart_aper_size); + printk("irongate_ioremap: mmio_regs %p\n", mmio_regs); + printk("irongate_ioremap: gatt_pages %p\n", gatt_pages); + + for(baddr = addr; baddr <= last; baddr += PAGE_SIZE) + { + cur_gatt = phys_to_virt(GET_GATT(baddr) & ~1); + pte = cur_gatt[GET_GATT_OFF(baddr)] & ~1; + printk("irongate_ioremap: cur_gatt %p pte 0x%x\n", + cur_gatt, pte); + } +#endif + + /* + * Map it + */ + area = get_vm_area(size, VM_IOREMAP); + if (!area) return NULL; + + for(baddr = addr, vaddr = (unsigned long)area->addr; + baddr <= last; + baddr += PAGE_SIZE, vaddr += PAGE_SIZE) + { + cur_gatt = phys_to_virt(GET_GATT(baddr) & ~1); + pte = cur_gatt[GET_GATT_OFF(baddr)] & ~1; + + if (__alpha_remap_area_pages(vaddr, + pte, PAGE_SIZE, 0)) { + printk("AGP ioremap: FAILED to map...\n"); + vfree(area->addr); + return NULL; + } + } + + flush_tlb_all(); + + vaddr = (unsigned long)area->addr + (addr & ~PAGE_MASK); +#if 0 + printk("irongate_ioremap(0x%lx, 0x%lx) returning 0x%lx\n", + addr, size, vaddr); +#endif + return (void __iomem *)vaddr; +} +EXPORT_SYMBOL(irongate_ioremap); + +void +irongate_iounmap(volatile void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + if (((long)addr >> 41) == -2) + return; /* kseg map, nothing to do */ + if (addr) + return vfree((void *)(PAGE_MASK & addr)); +} +EXPORT_SYMBOL(irongate_iounmap); diff --git a/arch/alpha/kernel/core_lca.c b/arch/alpha/kernel/core_lca.c new file mode 100644 index 00000000..cb2801cf --- /dev/null +++ b/arch/alpha/kernel/core_lca.c @@ -0,0 +1,515 @@ +/* + * linux/arch/alpha/kernel/core_lca.c + * + * Written by David Mosberger (davidm@cs.arizona.edu) with some code + * taken from Dave Rusling's (david.rusling@reo.mts.dec.com) 32-bit + * bios code. + * + * Code common to all LCA core logic chips. + */ + +#define __EXTERN_INLINE inline +#include <asm/io.h> +#include <asm/core_lca.h> +#undef __EXTERN_INLINE + +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/tty.h> + +#include <asm/ptrace.h> +#include <asm/irq_regs.h> +#include <asm/smp.h> + +#include "proto.h" +#include "pci_impl.h" + + +/* + * BIOS32-style PCI interface: + */ + +/* + * Machine check reasons. Defined according to PALcode sources + * (osf.h and platform.h). + */ +#define MCHK_K_TPERR 0x0080 +#define MCHK_K_TCPERR 0x0082 +#define MCHK_K_HERR 0x0084 +#define MCHK_K_ECC_C 0x0086 +#define MCHK_K_ECC_NC 0x0088 +#define MCHK_K_UNKNOWN 0x008A +#define MCHK_K_CACKSOFT 0x008C +#define MCHK_K_BUGCHECK 0x008E +#define MCHK_K_OS_BUGCHECK 0x0090 +#define MCHK_K_DCPERR 0x0092 +#define MCHK_K_ICPERR 0x0094 + + +/* + * Platform-specific machine-check reasons: + */ +#define MCHK_K_SIO_SERR 0x204 /* all platforms so far */ +#define MCHK_K_SIO_IOCHK 0x206 /* all platforms so far */ +#define MCHK_K_DCSR 0x208 /* all but Noname */ + + +/* + * Given a bus, device, and function number, compute resulting + * configuration space address and setup the LCA_IOC_CONF register + * accordingly. It is therefore not safe to have concurrent + * invocations to configuration space access routines, but there + * really shouldn't be any need for this. + * + * Type 0: + * + * 3 3|3 3 2 2|2 2 2 2|2 2 2 2|1 1 1 1|1 1 1 1|1 1 + * 3 2|1 0 9 8|7 6 5 4|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | | | | | | | | | | | | | | | | | | | | | | |F|F|F|R|R|R|R|R|R|0|0| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * 31:11 Device select bit. + * 10:8 Function number + * 7:2 Register number + * + * Type 1: + * + * 3 3|3 3 2 2|2 2 2 2|2 2 2 2|1 1 1 1|1 1 1 1|1 1 + * 3 2|1 0 9 8|7 6 5 4|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | | | | | | | | | |B|B|B|B|B|B|B|B|D|D|D|D|D|F|F|F|R|R|R|R|R|R|0|1| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * 31:24 reserved + * 23:16 bus number (8 bits = 128 possible buses) + * 15:11 Device number (5 bits) + * 10:8 function number + * 7:2 register number + * + * Notes: + * The function number selects which function of a multi-function device + * (e.g., SCSI and Ethernet). + * + * The register selects a DWORD (32 bit) register offset. Hence it + * doesn't get shifted by 2 bits as we want to "drop" the bottom two + * bits. + */ + +static int +mk_conf_addr(struct pci_bus *pbus, unsigned int device_fn, int where, + unsigned long *pci_addr) +{ + unsigned long addr; + u8 bus = pbus->number; + + if (bus == 0) { + int device = device_fn >> 3; + int func = device_fn & 0x7; + + /* Type 0 configuration cycle. */ + + if (device > 12) { + return -1; + } + + *(vulp)LCA_IOC_CONF = 0; + addr = (1 << (11 + device)) | (func << 8) | where; + } else { + /* Type 1 configuration cycle. */ + *(vulp)LCA_IOC_CONF = 1; + addr = (bus << 16) | (device_fn << 8) | where; + } + *pci_addr = addr; + return 0; +} + +static unsigned int +conf_read(unsigned long addr) +{ + unsigned long flags, code, stat0; + unsigned int value; + + local_irq_save(flags); + + /* Reset status register to avoid losing errors. */ + stat0 = *(vulp)LCA_IOC_STAT0; + *(vulp)LCA_IOC_STAT0 = stat0; + mb(); + + /* Access configuration space. */ + value = *(vuip)addr; + draina(); + + stat0 = *(vulp)LCA_IOC_STAT0; + if (stat0 & LCA_IOC_STAT0_ERR) { + code = ((stat0 >> LCA_IOC_STAT0_CODE_SHIFT) + & LCA_IOC_STAT0_CODE_MASK); + if (code != 1) { + printk("lca.c:conf_read: got stat0=%lx\n", stat0); + } + + /* Reset error status. */ + *(vulp)LCA_IOC_STAT0 = stat0; + mb(); + + /* Reset machine check. */ + wrmces(0x7); + + value = 0xffffffff; + } + local_irq_restore(flags); + return value; +} + +static void +conf_write(unsigned long addr, unsigned int value) +{ + unsigned long flags, code, stat0; + + local_irq_save(flags); /* avoid getting hit by machine check */ + + /* Reset status register to avoid losing errors. */ + stat0 = *(vulp)LCA_IOC_STAT0; + *(vulp)LCA_IOC_STAT0 = stat0; + mb(); + + /* Access configuration space. */ + *(vuip)addr = value; + draina(); + + stat0 = *(vulp)LCA_IOC_STAT0; + if (stat0 & LCA_IOC_STAT0_ERR) { + code = ((stat0 >> LCA_IOC_STAT0_CODE_SHIFT) + & LCA_IOC_STAT0_CODE_MASK); + if (code != 1) { + printk("lca.c:conf_write: got stat0=%lx\n", stat0); + } + + /* Reset error status. */ + *(vulp)LCA_IOC_STAT0 = stat0; + mb(); + + /* Reset machine check. */ + wrmces(0x7); + } + local_irq_restore(flags); +} + +static int +lca_read_config(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 *value) +{ + unsigned long addr, pci_addr; + long mask; + int shift; + + if (mk_conf_addr(bus, devfn, where, &pci_addr)) + return PCIBIOS_DEVICE_NOT_FOUND; + + shift = (where & 3) * 8; + mask = (size - 1) * 8; + addr = (pci_addr << 5) + mask + LCA_CONF; + *value = conf_read(addr) >> (shift); + return PCIBIOS_SUCCESSFUL; +} + +static int +lca_write_config(struct pci_bus *bus, unsigned int devfn, int where, int size, + u32 value) +{ + unsigned long addr, pci_addr; + long mask; + + if (mk_conf_addr(bus, devfn, where, &pci_addr)) + return PCIBIOS_DEVICE_NOT_FOUND; + + mask = (size - 1) * 8; + addr = (pci_addr << 5) + mask + LCA_CONF; + conf_write(addr, value << ((where & 3) * 8)); + return PCIBIOS_SUCCESSFUL; +} + +struct pci_ops lca_pci_ops = +{ + .read = lca_read_config, + .write = lca_write_config, +}; + +void +lca_pci_tbi(struct pci_controller *hose, dma_addr_t start, dma_addr_t end) +{ + wmb(); + *(vulp)LCA_IOC_TBIA = 0; + mb(); +} + +void __init +lca_init_arch(void) +{ + struct pci_controller *hose; + + /* + * Create our single hose. + */ + + pci_isa_hose = hose = alloc_pci_controller(); + hose->io_space = &ioport_resource; + hose->mem_space = &iomem_resource; + hose->index = 0; + + hose->sparse_mem_base = LCA_SPARSE_MEM - IDENT_ADDR; + hose->dense_mem_base = LCA_DENSE_MEM - IDENT_ADDR; + hose->sparse_io_base = LCA_IO - IDENT_ADDR; + hose->dense_io_base = 0; + + /* + * Set up the PCI to main memory translation windows. + * + * Mimic the SRM settings for the direct-map window. + * Window 0 is scatter-gather 8MB at 8MB (for isa). + * Window 1 is direct access 1GB at 1GB. + * + * Note that we do not try to save any of the DMA window CSRs + * before setting them, since we cannot read those CSRs on LCA. + */ + hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0); + hose->sg_pci = NULL; + __direct_map_base = 0x40000000; + __direct_map_size = 0x40000000; + + *(vulp)LCA_IOC_W_BASE0 = hose->sg_isa->dma_base | (3UL << 32); + *(vulp)LCA_IOC_W_MASK0 = (hose->sg_isa->size - 1) & 0xfff00000; + *(vulp)LCA_IOC_T_BASE0 = virt_to_phys(hose->sg_isa->ptes); + + *(vulp)LCA_IOC_W_BASE1 = __direct_map_base | (2UL << 32); + *(vulp)LCA_IOC_W_MASK1 = (__direct_map_size - 1) & 0xfff00000; + *(vulp)LCA_IOC_T_BASE1 = 0; + + *(vulp)LCA_IOC_TB_ENA = 0x80; + + lca_pci_tbi(hose, 0, -1); + + /* + * Disable PCI parity for now. The NCR53c810 chip has + * troubles meeting the PCI spec which results in + * data parity errors. + */ + *(vulp)LCA_IOC_PAR_DIS = 1UL<<5; + + /* + * Finally, set up for restoring the correct HAE if using SRM. + * Again, since we cannot read many of the CSRs on the LCA, + * one of which happens to be the HAE, we save the value that + * the SRM will expect... + */ + if (alpha_using_srm) + srm_hae = 0x80000000UL; +} + +/* + * Constants used during machine-check handling. I suppose these + * could be moved into lca.h but I don't see much reason why anybody + * else would want to use them. + */ + +#define ESR_EAV (1UL<< 0) /* error address valid */ +#define ESR_CEE (1UL<< 1) /* correctable error */ +#define ESR_UEE (1UL<< 2) /* uncorrectable error */ +#define ESR_WRE (1UL<< 3) /* write-error */ +#define ESR_SOR (1UL<< 4) /* error source */ +#define ESR_CTE (1UL<< 7) /* cache-tag error */ +#define ESR_MSE (1UL<< 9) /* multiple soft errors */ +#define ESR_MHE (1UL<<10) /* multiple hard errors */ +#define ESR_NXM (1UL<<12) /* non-existent memory */ + +#define IOC_ERR ( 1<<4) /* ioc logs an error */ +#define IOC_CMD_SHIFT 0 +#define IOC_CMD (0xf<<IOC_CMD_SHIFT) +#define IOC_CODE_SHIFT 8 +#define IOC_CODE (0xf<<IOC_CODE_SHIFT) +#define IOC_LOST ( 1<<5) +#define IOC_P_NBR ((__u32) ~((1<<13) - 1)) + +static void +mem_error(unsigned long esr, unsigned long ear) +{ + printk(" %s %s error to %s occurred at address %x\n", + ((esr & ESR_CEE) ? "Correctable" : + (esr & ESR_UEE) ? "Uncorrectable" : "A"), + (esr & ESR_WRE) ? "write" : "read", + (esr & ESR_SOR) ? "memory" : "b-cache", + (unsigned) (ear & 0x1ffffff8)); + if (esr & ESR_CTE) { + printk(" A b-cache tag parity error was detected.\n"); + } + if (esr & ESR_MSE) { + printk(" Several other correctable errors occurred.\n"); + } + if (esr & ESR_MHE) { + printk(" Several other uncorrectable errors occurred.\n"); + } + if (esr & ESR_NXM) { + printk(" Attempted to access non-existent memory.\n"); + } +} + +static void +ioc_error(__u32 stat0, __u32 stat1) +{ + static const char * const pci_cmd[] = { + "Interrupt Acknowledge", "Special", "I/O Read", "I/O Write", + "Rsvd 1", "Rsvd 2", "Memory Read", "Memory Write", "Rsvd3", + "Rsvd4", "Configuration Read", "Configuration Write", + "Memory Read Multiple", "Dual Address", "Memory Read Line", + "Memory Write and Invalidate" + }; + static const char * const err_name[] = { + "exceeded retry limit", "no device", "bad data parity", + "target abort", "bad address parity", "page table read error", + "invalid page", "data error" + }; + unsigned code = (stat0 & IOC_CODE) >> IOC_CODE_SHIFT; + unsigned cmd = (stat0 & IOC_CMD) >> IOC_CMD_SHIFT; + + printk(" %s initiated PCI %s cycle to address %x" + " failed due to %s.\n", + code > 3 ? "PCI" : "CPU", pci_cmd[cmd], stat1, err_name[code]); + + if (code == 5 || code == 6) { + printk(" (Error occurred at PCI memory address %x.)\n", + (stat0 & ~IOC_P_NBR)); + } + if (stat0 & IOC_LOST) { + printk(" Other PCI errors occurred simultaneously.\n"); + } +} + +void +lca_machine_check(unsigned long vector, unsigned long la_ptr) +{ + const char * reason; + union el_lca el; + + el.c = (struct el_common *) la_ptr; + + wrmces(rdmces()); /* reset machine check pending flag */ + + printk(KERN_CRIT "LCA machine check: vector=%#lx pc=%#lx code=%#x\n", + vector, get_irq_regs()->pc, (unsigned int) el.c->code); + + /* + * The first quadword after the common header always seems to + * be the machine check reason---don't know why this isn't + * part of the common header instead. In the case of a long + * logout frame, the upper 32 bits is the machine check + * revision level, which we ignore for now. + */ + switch ((unsigned int) el.c->code) { + case MCHK_K_TPERR: reason = "tag parity error"; break; + case MCHK_K_TCPERR: reason = "tag control parity error"; break; + case MCHK_K_HERR: reason = "access to non-existent memory"; break; + case MCHK_K_ECC_C: reason = "correctable ECC error"; break; + case MCHK_K_ECC_NC: reason = "non-correctable ECC error"; break; + case MCHK_K_CACKSOFT: reason = "MCHK_K_CACKSOFT"; break; + case MCHK_K_BUGCHECK: reason = "illegal exception in PAL mode"; break; + case MCHK_K_OS_BUGCHECK: reason = "callsys in kernel mode"; break; + case MCHK_K_DCPERR: reason = "d-cache parity error"; break; + case MCHK_K_ICPERR: reason = "i-cache parity error"; break; + case MCHK_K_SIO_SERR: reason = "SIO SERR occurred on PCI bus"; break; + case MCHK_K_SIO_IOCHK: reason = "SIO IOCHK occurred on ISA bus"; break; + case MCHK_K_DCSR: reason = "MCHK_K_DCSR"; break; + case MCHK_K_UNKNOWN: + default: reason = "unknown"; break; + } + + switch (el.c->size) { + case sizeof(struct el_lca_mcheck_short): + printk(KERN_CRIT + " Reason: %s (short frame%s, dc_stat=%#lx):\n", + reason, el.c->retry ? ", retryable" : "", + el.s->dc_stat); + if (el.s->esr & ESR_EAV) { + mem_error(el.s->esr, el.s->ear); + } + if (el.s->ioc_stat0 & IOC_ERR) { + ioc_error(el.s->ioc_stat0, el.s->ioc_stat1); + } + break; + + case sizeof(struct el_lca_mcheck_long): + printk(KERN_CRIT " Reason: %s (long frame%s):\n", + reason, el.c->retry ? ", retryable" : ""); + printk(KERN_CRIT + " reason: %#lx exc_addr: %#lx dc_stat: %#lx\n", + el.l->pt[0], el.l->exc_addr, el.l->dc_stat); + printk(KERN_CRIT " car: %#lx\n", el.l->car); + if (el.l->esr & ESR_EAV) { + mem_error(el.l->esr, el.l->ear); + } + if (el.l->ioc_stat0 & IOC_ERR) { + ioc_error(el.l->ioc_stat0, el.l->ioc_stat1); + } + break; + + default: + printk(KERN_CRIT " Unknown errorlog size %d\n", el.c->size); + } + + /* Dump the logout area to give all info. */ +#ifdef CONFIG_VERBOSE_MCHECK + if (alpha_verbose_mcheck > 1) { + unsigned long * ptr = (unsigned long *) la_ptr; + long i; + for (i = 0; i < el.c->size / sizeof(long); i += 2) { + printk(KERN_CRIT " +%8lx %016lx %016lx\n", + i*sizeof(long), ptr[i], ptr[i+1]); + } + } +#endif /* CONFIG_VERBOSE_MCHECK */ +} + +/* + * The following routines are needed to support the SPEED changing + * necessary to successfully manage the thermal problem on the AlphaBook1. + */ + +void +lca_clock_print(void) +{ + long pmr_reg; + + pmr_reg = LCA_READ_PMR; + + printk("Status of clock control:\n"); + printk("\tPrimary clock divisor\t0x%lx\n", LCA_GET_PRIMARY(pmr_reg)); + printk("\tOverride clock divisor\t0x%lx\n", LCA_GET_OVERRIDE(pmr_reg)); + printk("\tInterrupt override is %s\n", + (pmr_reg & LCA_PMR_INTO) ? "on" : "off"); + printk("\tDMA override is %s\n", + (pmr_reg & LCA_PMR_DMAO) ? "on" : "off"); + +} + +int +lca_get_clock(void) +{ + long pmr_reg; + + pmr_reg = LCA_READ_PMR; + return(LCA_GET_PRIMARY(pmr_reg)); + +} + +void +lca_clock_fiddle(int divisor) +{ + long pmr_reg; + + pmr_reg = LCA_READ_PMR; + LCA_SET_PRIMARY_CLOCK(pmr_reg, divisor); + /* lca_norm_clock = divisor; */ + LCA_WRITE_PMR(pmr_reg); + mb(); +} diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c new file mode 100644 index 00000000..53dd2f1a --- /dev/null +++ b/arch/alpha/kernel/core_marvel.c @@ -0,0 +1,1139 @@ +/* + * linux/arch/alpha/kernel/core_marvel.c + * + * Code common to all Marvel based systems. + */ + +#define __EXTERN_INLINE inline +#include <asm/io.h> +#include <asm/core_marvel.h> +#undef __EXTERN_INLINE + +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/vmalloc.h> +#include <linux/mc146818rtc.h> +#include <linux/rtc.h> +#include <linux/module.h> +#include <linux/bootmem.h> + +#include <asm/ptrace.h> +#include <asm/smp.h> +#include <asm/gct.h> +#include <asm/pgalloc.h> +#include <asm/tlbflush.h> +#include <asm/rtc.h> +#include <asm/vga.h> + +#include "proto.h" +#include "pci_impl.h" + + +/* + * Debug helpers + */ +#define DEBUG_CONFIG 0 + +#if DEBUG_CONFIG +# define DBG_CFG(args) printk args +#else +# define DBG_CFG(args) +#endif + + +/* + * Private data + */ +static struct io7 *io7_head = NULL; + + +/* + * Helper functions + */ +static unsigned long __attribute__ ((unused)) +read_ev7_csr(int pe, unsigned long offset) +{ + ev7_csr *ev7csr = EV7_CSR_KERN(pe, offset); + unsigned long q; + + mb(); + q = ev7csr->csr; + mb(); + + return q; +} + +static void __attribute__ ((unused)) +write_ev7_csr(int pe, unsigned long offset, unsigned long q) +{ + ev7_csr *ev7csr = EV7_CSR_KERN(pe, offset); + + mb(); + ev7csr->csr = q; + mb(); +} + +static char * __init +mk_resource_name(int pe, int port, char *str) +{ + char tmp[80]; + char *name; + + sprintf(tmp, "PCI %s PE %d PORT %d", str, pe, port); + name = alloc_bootmem(strlen(tmp) + 1); + strcpy(name, tmp); + + return name; +} + +inline struct io7 * +marvel_next_io7(struct io7 *prev) +{ + return (prev ? prev->next : io7_head); +} + +struct io7 * +marvel_find_io7(int pe) +{ + struct io7 *io7; + + for (io7 = io7_head; io7 && io7->pe != pe; io7 = io7->next) + continue; + + return io7; +} + +static struct io7 * __init +alloc_io7(unsigned int pe) +{ + struct io7 *io7; + struct io7 *insp; + int h; + + if (marvel_find_io7(pe)) { + printk(KERN_WARNING "IO7 at PE %d already allocated!\n", pe); + return NULL; + } + + io7 = alloc_bootmem(sizeof(*io7)); + io7->pe = pe; + spin_lock_init(&io7->irq_lock); + + for (h = 0; h < 4; h++) { + io7->ports[h].io7 = io7; + io7->ports[h].port = h; + io7->ports[h].enabled = 0; /* default to disabled */ + } + + /* + * Insert in pe sorted order. + */ + if (NULL == io7_head) /* empty list */ + io7_head = io7; + else if (io7_head->pe > io7->pe) { /* insert at head */ + io7->next = io7_head; + io7_head = io7; + } else { /* insert at position */ + for (insp = io7_head; insp; insp = insp->next) { + if (insp->pe == io7->pe) { + printk(KERN_ERR "Too many IO7s at PE %d\n", + io7->pe); + return NULL; + } + + if (NULL == insp->next || + insp->next->pe > io7->pe) { /* insert here */ + io7->next = insp->next; + insp->next = io7; + break; + } + } + + if (NULL == insp) { /* couldn't insert ?!? */ + printk(KERN_WARNING "Failed to insert IO7 at PE %d " + " - adding at head of list\n", io7->pe); + io7->next = io7_head; + io7_head = io7; + } + } + + return io7; +} + +void +io7_clear_errors(struct io7 *io7) +{ + io7_port7_csrs *p7csrs; + io7_ioport_csrs *csrs; + int port; + + + /* + * First the IO ports. + */ + for (port = 0; port < 4; port++) { + csrs = IO7_CSRS_KERN(io7->pe, port); + + csrs->POx_ERR_SUM.csr = -1UL; + csrs->POx_TLB_ERR.csr = -1UL; + csrs->POx_SPL_COMPLT.csr = -1UL; + csrs->POx_TRANS_SUM.csr = -1UL; + } + + /* + * Then the common ones. + */ + p7csrs = IO7_PORT7_CSRS_KERN(io7->pe); + + p7csrs->PO7_ERROR_SUM.csr = -1UL; + p7csrs->PO7_UNCRR_SYM.csr = -1UL; + p7csrs->PO7_CRRCT_SYM.csr = -1UL; +} + + +/* + * IO7 PCI, PCI/X, AGP configuration. + */ +static void __init +io7_init_hose(struct io7 *io7, int port) +{ + static int hose_index = 0; + + struct pci_controller *hose = alloc_pci_controller(); + struct io7_port *io7_port = &io7->ports[port]; + io7_ioport_csrs *csrs = IO7_CSRS_KERN(io7->pe, port); + int i; + + hose->index = hose_index++; /* arbitrary */ + + /* + * We don't have an isa or legacy hose, but glibc expects to be + * able to use the bus == 0 / dev == 0 form of the iobase syscall + * to determine information about the i/o system. Since XFree86 + * relies on glibc's determination to tell whether or not to use + * sparse access, we need to point the pci_isa_hose at a real hose + * so at least that determination is correct. + */ + if (hose->index == 0) + pci_isa_hose = hose; + + io7_port->csrs = csrs; + io7_port->hose = hose; + hose->sysdata = io7_port; + + hose->io_space = alloc_resource(); + hose->mem_space = alloc_resource(); + + /* + * Base addresses for userland consumption. Since these are going + * to be mapped, they are pure physical addresses. + */ + hose->sparse_mem_base = hose->sparse_io_base = 0; + hose->dense_mem_base = IO7_MEM_PHYS(io7->pe, port); + hose->dense_io_base = IO7_IO_PHYS(io7->pe, port); + + /* + * Base addresses and resource ranges for kernel consumption. + */ + hose->config_space_base = (unsigned long)IO7_CONF_KERN(io7->pe, port); + + hose->io_space->start = (unsigned long)IO7_IO_KERN(io7->pe, port); + hose->io_space->end = hose->io_space->start + IO7_IO_SPACE - 1; + hose->io_space->name = mk_resource_name(io7->pe, port, "IO"); + hose->io_space->flags = IORESOURCE_IO; + + hose->mem_space->start = (unsigned long)IO7_MEM_KERN(io7->pe, port); + hose->mem_space->end = hose->mem_space->start + IO7_MEM_SPACE - 1; + hose->mem_space->name = mk_resource_name(io7->pe, port, "MEM"); + hose->mem_space->flags = IORESOURCE_MEM; + + if (request_resource(&ioport_resource, hose->io_space) < 0) + printk(KERN_ERR "Failed to request IO on hose %d\n", + hose->index); + if (request_resource(&iomem_resource, hose->mem_space) < 0) + printk(KERN_ERR "Failed to request MEM on hose %d\n", + hose->index); + + /* + * Save the existing DMA window settings for later restoration. + */ + for (i = 0; i < 4; i++) { + io7_port->saved_wbase[i] = csrs->POx_WBASE[i].csr; + io7_port->saved_wmask[i] = csrs->POx_WMASK[i].csr; + io7_port->saved_tbase[i] = csrs->POx_TBASE[i].csr; + } + + /* + * Set up the PCI to main memory translation windows. + * + * Window 0 is scatter-gather 8MB at 8MB + * Window 1 is direct access 1GB at 2GB + * Window 2 is scatter-gather (up-to) 1GB at 3GB + * Window 3 is disabled + */ + + /* + * TBIA before modifying windows. + */ + marvel_pci_tbi(hose, 0, -1); + + /* + * Set up window 0 for scatter-gather 8MB at 8MB. + */ + hose->sg_isa = iommu_arena_new_node(marvel_cpuid_to_nid(io7->pe), + hose, 0x00800000, 0x00800000, 0); + hose->sg_isa->align_entry = 8; /* cache line boundary */ + csrs->POx_WBASE[0].csr = + hose->sg_isa->dma_base | wbase_m_ena | wbase_m_sg; + csrs->POx_WMASK[0].csr = (hose->sg_isa->size - 1) & wbase_m_addr; + csrs->POx_TBASE[0].csr = virt_to_phys(hose->sg_isa->ptes); + + /* + * Set up window 1 for direct-mapped 1GB at 2GB. + */ + csrs->POx_WBASE[1].csr = __direct_map_base | wbase_m_ena; + csrs->POx_WMASK[1].csr = (__direct_map_size - 1) & wbase_m_addr; + csrs->POx_TBASE[1].csr = 0; + + /* + * Set up window 2 for scatter-gather (up-to) 1GB at 3GB. + */ + hose->sg_pci = iommu_arena_new_node(marvel_cpuid_to_nid(io7->pe), + hose, 0xc0000000, 0x40000000, 0); + hose->sg_pci->align_entry = 8; /* cache line boundary */ + csrs->POx_WBASE[2].csr = + hose->sg_pci->dma_base | wbase_m_ena | wbase_m_sg; + csrs->POx_WMASK[2].csr = (hose->sg_pci->size - 1) & wbase_m_addr; + csrs->POx_TBASE[2].csr = virt_to_phys(hose->sg_pci->ptes); + + /* + * Disable window 3. + */ + csrs->POx_WBASE[3].csr = 0; + + /* + * Make sure that the AGP Monster Window is disabled. + */ + csrs->POx_CTRL.csr &= ~(1UL << 61); + +#if 1 + printk("FIXME: disabling master aborts\n"); + csrs->POx_MSK_HEI.csr &= ~(3UL << 14); +#endif + /* + * TBIA after modifying windows. + */ + marvel_pci_tbi(hose, 0, -1); +} + +static void __init +marvel_init_io7(struct io7 *io7) +{ + int i; + + printk("Initializing IO7 at PID %d\n", io7->pe); + + /* + * Get the Port 7 CSR pointer. + */ + io7->csrs = IO7_PORT7_CSRS_KERN(io7->pe); + + /* + * Init this IO7's hoses. + */ + for (i = 0; i < IO7_NUM_PORTS; i++) { + io7_ioport_csrs *csrs = IO7_CSRS_KERN(io7->pe, i); + if (csrs->POx_CACHE_CTL.csr == 8) { + io7->ports[i].enabled = 1; + io7_init_hose(io7, i); + } + } +} + +void +marvel_io7_present(gct6_node *node) +{ + int pe; + + if (node->type != GCT_TYPE_HOSE || + node->subtype != GCT_SUBTYPE_IO_PORT_MODULE) + return; + + pe = (node->id >> 8) & 0xff; + printk("Found an IO7 at PID %d\n", pe); + + alloc_io7(pe); +} + +static void __init +marvel_find_console_vga_hose(void) +{ + u64 *pu64 = (u64 *)((u64)hwrpb + hwrpb->ctbt_offset); + + if (pu64[7] == 3) { /* TERM_TYPE == graphics */ + struct pci_controller *hose = NULL; + int h = (pu64[30] >> 24) & 0xff; /* TERM_OUT_LOC, hose # */ + struct io7 *io7; + int pid, port; + + /* FIXME - encoding is going to have to change for Marvel + * since hose will be able to overflow a byte... + * need to fix this decode when the console + * changes its encoding + */ + printk("console graphics is on hose %d (console)\n", h); + + /* + * The console's hose numbering is: + * + * hose<n:2>: PID + * hose<1:0>: PORT + * + * We need to find the hose at that pid and port + */ + pid = h >> 2; + port = h & 3; + if ((io7 = marvel_find_io7(pid))) + hose = io7->ports[port].hose; + + if (hose) { + printk("Console graphics on hose %d\n", hose->index); + pci_vga_hose = hose; + } + } +} + +gct6_search_struct gct_wanted_node_list[] = { + { GCT_TYPE_HOSE, GCT_SUBTYPE_IO_PORT_MODULE, marvel_io7_present }, + { 0, 0, NULL } +}; + +/* + * In case the GCT is not complete, let the user specify PIDs with IO7s + * at boot time. Syntax is 'io7=a,b,c,...,n' where a-n are the PIDs (decimal) + * where IO7s are connected + */ +static int __init +marvel_specify_io7(char *str) +{ + unsigned long pid; + struct io7 *io7; + char *pchar; + + do { + pid = simple_strtoul(str, &pchar, 0); + if (pchar != str) { + printk("User-specified IO7 at PID %lu\n", pid); + io7 = alloc_io7(pid); + if (io7) marvel_init_io7(io7); + } + + if (pchar == str) pchar++; + str = pchar; + } while(*str); + + return 1; +} +__setup("io7=", marvel_specify_io7); + +void __init +marvel_init_arch(void) +{ + struct io7 *io7; + + /* With multiple PCI busses, we play with I/O as physical addrs. */ + ioport_resource.end = ~0UL; + + /* PCI DMA Direct Mapping is 1GB at 2GB. */ + __direct_map_base = 0x80000000; + __direct_map_size = 0x40000000; + + /* Parse the config tree. */ + gct6_find_nodes(GCT_NODE_PTR(0), gct_wanted_node_list); + + /* Init the io7s. */ + for (io7 = NULL; NULL != (io7 = marvel_next_io7(io7)); ) + marvel_init_io7(io7); + + /* Check for graphic console location (if any). */ + marvel_find_console_vga_hose(); +} + +void +marvel_kill_arch(int mode) +{ +} + + +/* + * PCI Configuration Space access functions + * + * Configuration space addresses have the following format: + * + * |2 2 2 2|1 1 1 1|1 1 1 1|1 1 + * |3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |B|B|B|B|B|B|B|B|D|D|D|D|D|F|F|F|R|R|R|R|R|R|R|R| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * n:24 reserved for hose base + * 23:16 bus number (8 bits = 128 possible buses) + * 15:11 Device number (5 bits) + * 10:8 function number + * 7:2 register number + * + * Notes: + * IO7 determines whether to use a type 0 or type 1 config cycle + * based on the bus number. Therefore the bus number must be set + * to 0 for the root bus on any hose. + * + * The function number selects which function of a multi-function device + * (e.g., SCSI and Ethernet). + * + */ + +static inline unsigned long +build_conf_addr(struct pci_controller *hose, u8 bus, + unsigned int devfn, int where) +{ + return (hose->config_space_base | (bus << 16) | (devfn << 8) | where); +} + +static unsigned long +mk_conf_addr(struct pci_bus *pbus, unsigned int devfn, int where) +{ + struct pci_controller *hose = pbus->sysdata; + struct io7_port *io7_port; + unsigned long addr = 0; + u8 bus = pbus->number; + + if (!hose) + return addr; + + /* Check for enabled. */ + io7_port = hose->sysdata; + if (!io7_port->enabled) + return addr; + + if (!pbus->parent) { /* No parent means peer PCI bus. */ + /* Don't support idsel > 20 on primary bus. */ + if (devfn >= PCI_DEVFN(21, 0)) + return addr; + bus = 0; + } + + addr = build_conf_addr(hose, bus, devfn, where); + + DBG_CFG(("mk_conf_addr: returning pci_addr 0x%lx\n", addr)); + return addr; +} + +static int +marvel_read_config(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 *value) +{ + unsigned long addr; + + if (0 == (addr = mk_conf_addr(bus, devfn, where))) + return PCIBIOS_DEVICE_NOT_FOUND; + + switch(size) { + case 1: + *value = __kernel_ldbu(*(vucp)addr); + break; + case 2: + *value = __kernel_ldwu(*(vusp)addr); + break; + case 4: + *value = *(vuip)addr; + break; + default: + return PCIBIOS_FUNC_NOT_SUPPORTED; + } + + return PCIBIOS_SUCCESSFUL; +} + +static int +marvel_write_config(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 value) +{ + unsigned long addr; + + if (0 == (addr = mk_conf_addr(bus, devfn, where))) + return PCIBIOS_DEVICE_NOT_FOUND; + + switch (size) { + case 1: + __kernel_stb(value, *(vucp)addr); + mb(); + __kernel_ldbu(*(vucp)addr); + break; + case 2: + __kernel_stw(value, *(vusp)addr); + mb(); + __kernel_ldwu(*(vusp)addr); + break; + case 4: + *(vuip)addr = value; + mb(); + *(vuip)addr; + break; + default: + return PCIBIOS_FUNC_NOT_SUPPORTED; + } + + return PCIBIOS_SUCCESSFUL; +} + +struct pci_ops marvel_pci_ops = +{ + .read = marvel_read_config, + .write = marvel_write_config, +}; + + +/* + * Other PCI helper functions. + */ +void +marvel_pci_tbi(struct pci_controller *hose, dma_addr_t start, dma_addr_t end) +{ + io7_ioport_csrs *csrs = ((struct io7_port *)hose->sysdata)->csrs; + + wmb(); + csrs->POx_SG_TBIA.csr = 0; + mb(); + csrs->POx_SG_TBIA.csr; +} + + + +/* + * RTC Support + */ +struct marvel_rtc_access_info { + unsigned long function; + unsigned long index; + unsigned long data; +}; + +static void +__marvel_access_rtc(void *info) +{ + struct marvel_rtc_access_info *rtc_access = info; + + register unsigned long __r0 __asm__("$0"); + register unsigned long __r16 __asm__("$16") = rtc_access->function; + register unsigned long __r17 __asm__("$17") = rtc_access->index; + register unsigned long __r18 __asm__("$18") = rtc_access->data; + + __asm__ __volatile__( + "call_pal %4 # cserve rtc" + : "=r"(__r16), "=r"(__r17), "=r"(__r18), "=r"(__r0) + : "i"(PAL_cserve), "0"(__r16), "1"(__r17), "2"(__r18) + : "$1", "$22", "$23", "$24", "$25"); + + rtc_access->data = __r0; +} + +static u8 +__marvel_rtc_io(u8 b, unsigned long addr, int write) +{ + static u8 index = 0; + + struct marvel_rtc_access_info rtc_access; + u8 ret = 0; + + switch(addr) { + case 0x70: /* RTC_PORT(0) */ + if (write) index = b; + ret = index; + break; + + case 0x71: /* RTC_PORT(1) */ + rtc_access.index = index; + rtc_access.data = bcd2bin(b); + rtc_access.function = 0x48 + !write; /* GET/PUT_TOY */ + + __marvel_access_rtc(&rtc_access); + + ret = bin2bcd(rtc_access.data); + break; + + default: + printk(KERN_WARNING "Illegal RTC port %lx\n", addr); + break; + } + + return ret; +} + + +/* + * IO map support. + */ +void __iomem * +marvel_ioremap(unsigned long addr, unsigned long size) +{ + struct pci_controller *hose; + unsigned long baddr, last; + struct vm_struct *area; + unsigned long vaddr; + unsigned long *ptes; + unsigned long pfn; + + /* + * Adjust the address. + */ + FIXUP_MEMADDR_VGA(addr); + + /* + * Find the hose. + */ + for (hose = hose_head; hose; hose = hose->next) { + if ((addr >> 32) == (hose->mem_space->start >> 32)) + break; + } + if (!hose) + return NULL; + + /* + * We have the hose - calculate the bus limits. + */ + baddr = addr - hose->mem_space->start; + last = baddr + size - 1; + + /* + * Is it direct-mapped? + */ + if ((baddr >= __direct_map_base) && + ((baddr + size - 1) < __direct_map_base + __direct_map_size)) { + addr = IDENT_ADDR | (baddr - __direct_map_base); + return (void __iomem *) addr; + } + + /* + * Check the scatter-gather arena. + */ + if (hose->sg_pci && + baddr >= (unsigned long)hose->sg_pci->dma_base && + last < (unsigned long)hose->sg_pci->dma_base + hose->sg_pci->size) { + + /* + * Adjust the limits (mappings must be page aligned) + */ + baddr -= hose->sg_pci->dma_base; + last -= hose->sg_pci->dma_base; + baddr &= PAGE_MASK; + size = PAGE_ALIGN(last) - baddr; + + /* + * Map it. + */ + area = get_vm_area(size, VM_IOREMAP); + if (!area) + return NULL; + + ptes = hose->sg_pci->ptes; + for (vaddr = (unsigned long)area->addr; + baddr <= last; + baddr += PAGE_SIZE, vaddr += PAGE_SIZE) { + pfn = ptes[baddr >> PAGE_SHIFT]; + if (!(pfn & 1)) { + printk("ioremap failed... pte not valid...\n"); + vfree(area->addr); + return NULL; + } + pfn >>= 1; /* make it a true pfn */ + + if (__alpha_remap_area_pages(vaddr, + pfn << PAGE_SHIFT, + PAGE_SIZE, 0)) { + printk("FAILED to map...\n"); + vfree(area->addr); + return NULL; + } + } + + flush_tlb_all(); + + vaddr = (unsigned long)area->addr + (addr & ~PAGE_MASK); + + return (void __iomem *) vaddr; + } + + /* Assume it was already a reasonable address */ + vaddr = baddr + hose->mem_space->start; + return (void __iomem *) vaddr; +} + +void +marvel_iounmap(volatile void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + if (addr >= VMALLOC_START) + vfree((void *)(PAGE_MASK & addr)); +} + +int +marvel_is_mmio(const volatile void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + + if (addr >= VMALLOC_START) + return 1; + else + return (addr & 0xFF000000UL) == 0; +} + +#define __marvel_is_port_kbd(a) (((a) == 0x60) || ((a) == 0x64)) +#define __marvel_is_port_rtc(a) (((a) == 0x70) || ((a) == 0x71)) + +void __iomem *marvel_ioportmap (unsigned long addr) +{ + FIXUP_IOADDR_VGA(addr); + return (void __iomem *)addr; +} + +unsigned int +marvel_ioread8(void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + if (__marvel_is_port_kbd(addr)) + return 0; + else if (__marvel_is_port_rtc(addr)) + return __marvel_rtc_io(0, addr, 0); + else if (marvel_is_ioaddr(addr)) + return __kernel_ldbu(*(vucp)addr); + else + /* this should catch other legacy addresses + that would normally fail on MARVEL, + because there really is nothing there... + */ + return ~0; +} + +void +marvel_iowrite8(u8 b, void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + if (__marvel_is_port_kbd(addr)) + return; + else if (__marvel_is_port_rtc(addr)) + __marvel_rtc_io(b, addr, 1); + else if (marvel_is_ioaddr(addr)) + __kernel_stb(b, *(vucp)addr); +} + +#ifndef CONFIG_ALPHA_GENERIC +EXPORT_SYMBOL(marvel_ioremap); +EXPORT_SYMBOL(marvel_iounmap); +EXPORT_SYMBOL(marvel_is_mmio); +EXPORT_SYMBOL(marvel_ioportmap); +EXPORT_SYMBOL(marvel_ioread8); +EXPORT_SYMBOL(marvel_iowrite8); +#endif + +/* + * NUMA Support + */ +/********** + * FIXME - for now each cpu is a node by itself + * -- no real support for striped mode + ********** + */ +int +marvel_pa_to_nid(unsigned long pa) +{ + int cpuid; + + if ((pa >> 43) & 1) /* I/O */ + cpuid = (~(pa >> 35) & 0xff); + else /* mem */ + cpuid = ((pa >> 34) & 0x3) | ((pa >> (37 - 2)) & (0x1f << 2)); + + return marvel_cpuid_to_nid(cpuid); +} + +int +marvel_cpuid_to_nid(int cpuid) +{ + return cpuid; +} + +unsigned long +marvel_node_mem_start(int nid) +{ + unsigned long pa; + + pa = (nid & 0x3) | ((nid & (0x1f << 2)) << 1); + pa <<= 34; + + return pa; +} + +unsigned long +marvel_node_mem_size(int nid) +{ + return 16UL * 1024 * 1024 * 1024; /* 16GB */ +} + + +/* + * AGP GART Support. + */ +#include <linux/agp_backend.h> +#include <asm/agp_backend.h> +#include <linux/slab.h> +#include <linux/delay.h> + +struct marvel_agp_aperture { + struct pci_iommu_arena *arena; + long pg_start; + long pg_count; +}; + +static int +marvel_agp_setup(alpha_agp_info *agp) +{ + struct marvel_agp_aperture *aper; + + if (!alpha_agpgart_size) + return -ENOMEM; + + aper = kmalloc(sizeof(*aper), GFP_KERNEL); + if (aper == NULL) return -ENOMEM; + + aper->arena = agp->hose->sg_pci; + aper->pg_count = alpha_agpgart_size / PAGE_SIZE; + aper->pg_start = iommu_reserve(aper->arena, aper->pg_count, + aper->pg_count - 1); + + if (aper->pg_start < 0) { + printk(KERN_ERR "Failed to reserve AGP memory\n"); + kfree(aper); + return -ENOMEM; + } + + agp->aperture.bus_base = + aper->arena->dma_base + aper->pg_start * PAGE_SIZE; + agp->aperture.size = aper->pg_count * PAGE_SIZE; + agp->aperture.sysdata = aper; + + return 0; +} + +static void +marvel_agp_cleanup(alpha_agp_info *agp) +{ + struct marvel_agp_aperture *aper = agp->aperture.sysdata; + int status; + + status = iommu_release(aper->arena, aper->pg_start, aper->pg_count); + if (status == -EBUSY) { + printk(KERN_WARNING + "Attempted to release bound AGP memory - unbinding\n"); + iommu_unbind(aper->arena, aper->pg_start, aper->pg_count); + status = iommu_release(aper->arena, aper->pg_start, + aper->pg_count); + } + if (status < 0) + printk(KERN_ERR "Failed to release AGP memory\n"); + + kfree(aper); + kfree(agp); +} + +static int +marvel_agp_configure(alpha_agp_info *agp) +{ + io7_ioport_csrs *csrs = ((struct io7_port *)agp->hose->sysdata)->csrs; + struct io7 *io7 = ((struct io7_port *)agp->hose->sysdata)->io7; + unsigned int new_rate = 0; + unsigned long agp_pll; + + /* + * Check the requested mode against the PLL setting. + * The agpgart_be code has not programmed the card yet, + * so we can still tweak mode here. + */ + agp_pll = io7->csrs->POx_RST[IO7_AGP_PORT].csr; + switch(IO7_PLL_RNGB(agp_pll)) { + case 0x4: /* 2x only */ + /* + * The PLL is only programmed for 2x, so adjust the + * rate to 2x, if necessary. + */ + if (agp->mode.bits.rate != 2) + new_rate = 2; + break; + + case 0x6: /* 1x / 4x */ + /* + * The PLL is programmed for 1x or 4x. Don't go faster + * than requested, so if the requested rate is 2x, use 1x. + */ + if (agp->mode.bits.rate == 2) + new_rate = 1; + break; + + default: /* ??????? */ + /* + * Don't know what this PLL setting is, take the requested + * rate, but warn the user. + */ + printk("%s: unknown PLL setting RNGB=%lx (PLL6_CTL=%016lx)\n", + __func__, IO7_PLL_RNGB(agp_pll), agp_pll); + break; + } + + /* + * Set the new rate, if necessary. + */ + if (new_rate) { + printk("Requested AGP Rate %dX not compatible " + "with PLL setting - using %dX\n", + agp->mode.bits.rate, + new_rate); + + agp->mode.bits.rate = new_rate; + } + + printk("Enabling AGP on hose %d: %dX%s RQ %d\n", + agp->hose->index, agp->mode.bits.rate, + agp->mode.bits.sba ? " - SBA" : "", agp->mode.bits.rq); + + csrs->AGP_CMD.csr = agp->mode.lw; + + return 0; +} + +static int +marvel_agp_bind_memory(alpha_agp_info *agp, off_t pg_start, struct agp_memory *mem) +{ + struct marvel_agp_aperture *aper = agp->aperture.sysdata; + return iommu_bind(aper->arena, aper->pg_start + pg_start, + mem->page_count, mem->pages); +} + +static int +marvel_agp_unbind_memory(alpha_agp_info *agp, off_t pg_start, struct agp_memory *mem) +{ + struct marvel_agp_aperture *aper = agp->aperture.sysdata; + return iommu_unbind(aper->arena, aper->pg_start + pg_start, + mem->page_count); +} + +static unsigned long +marvel_agp_translate(alpha_agp_info *agp, dma_addr_t addr) +{ + struct marvel_agp_aperture *aper = agp->aperture.sysdata; + unsigned long baddr = addr - aper->arena->dma_base; + unsigned long pte; + + if (addr < agp->aperture.bus_base || + addr >= agp->aperture.bus_base + agp->aperture.size) { + printk("%s: addr out of range\n", __func__); + return -EINVAL; + } + + pte = aper->arena->ptes[baddr >> PAGE_SHIFT]; + if (!(pte & 1)) { + printk("%s: pte not valid\n", __func__); + return -EINVAL; + } + return (pte >> 1) << PAGE_SHIFT; +} + +struct alpha_agp_ops marvel_agp_ops = +{ + .setup = marvel_agp_setup, + .cleanup = marvel_agp_cleanup, + .configure = marvel_agp_configure, + .bind = marvel_agp_bind_memory, + .unbind = marvel_agp_unbind_memory, + .translate = marvel_agp_translate +}; + +alpha_agp_info * +marvel_agp_info(void) +{ + struct pci_controller *hose; + io7_ioport_csrs *csrs; + alpha_agp_info *agp; + struct io7 *io7; + + /* + * Find the first IO7 with an AGP card. + * + * FIXME -- there should be a better way (we want to be able to + * specify and what if the agp card is not video???) + */ + hose = NULL; + for (io7 = NULL; (io7 = marvel_next_io7(io7)) != NULL; ) { + struct pci_controller *h; + vuip addr; + + if (!io7->ports[IO7_AGP_PORT].enabled) + continue; + + h = io7->ports[IO7_AGP_PORT].hose; + addr = (vuip)build_conf_addr(h, 0, PCI_DEVFN(5, 0), 0); + + if (*addr != 0xffffffffu) { + hose = h; + break; + } + } + + if (!hose || !hose->sg_pci) + return NULL; + + printk("MARVEL - using hose %d as AGP\n", hose->index); + + /* + * Get the csrs from the hose. + */ + csrs = ((struct io7_port *)hose->sysdata)->csrs; + + /* + * Allocate the info structure. + */ + agp = kmalloc(sizeof(*agp), GFP_KERNEL); + if (!agp) + return NULL; + + /* + * Fill it in. + */ + agp->hose = hose; + agp->private = NULL; + agp->ops = &marvel_agp_ops; + + /* + * Aperture - not configured until ops.setup(). + */ + agp->aperture.bus_base = 0; + agp->aperture.size = 0; + agp->aperture.sysdata = NULL; + + /* + * Capabilities. + * + * NOTE: IO7 reports through AGP_STAT that it can support a read queue + * depth of 17 (rq = 0x10). It actually only supports a depth of + * 16 (rq = 0xf). + */ + agp->capability.lw = csrs->AGP_STAT.csr; + agp->capability.bits.rq = 0xf; + + /* + * Mode. + */ + agp->mode.lw = csrs->AGP_CMD.csr; + + return agp; +} diff --git a/arch/alpha/kernel/core_mcpcia.c b/arch/alpha/kernel/core_mcpcia.c new file mode 100644 index 00000000..da7bcc37 --- /dev/null +++ b/arch/alpha/kernel/core_mcpcia.c @@ -0,0 +1,613 @@ +/* + * linux/arch/alpha/kernel/core_mcpcia.c + * + * Based on code written by David A Rusling (david.rusling@reo.mts.dec.com). + * + * Code common to all MCbus-PCI Adaptor core logic chipsets + */ + +#define __EXTERN_INLINE inline +#include <asm/io.h> +#include <asm/core_mcpcia.h> +#undef __EXTERN_INLINE + +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/delay.h> + +#include <asm/ptrace.h> + +#include "proto.h" +#include "pci_impl.h" + +/* + * NOTE: Herein lie back-to-back mb instructions. They are magic. + * One plausible explanation is that the i/o controller does not properly + * handle the system transaction. Another involves timing. Ho hum. + */ + +/* + * BIOS32-style PCI interface: + */ + +#define DEBUG_CFG 0 + +#if DEBUG_CFG +# define DBG_CFG(args) printk args +#else +# define DBG_CFG(args) +#endif + +/* + * Given a bus, device, and function number, compute resulting + * configuration space address and setup the MCPCIA_HAXR2 register + * accordingly. It is therefore not safe to have concurrent + * invocations to configuration space access routines, but there + * really shouldn't be any need for this. + * + * Type 0: + * + * 3 3|3 3 2 2|2 2 2 2|2 2 2 2|1 1 1 1|1 1 1 1|1 1 + * 3 2|1 0 9 8|7 6 5 4|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | |D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|F|F|F|R|R|R|R|R|R|0|0| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * 31:11 Device select bit. + * 10:8 Function number + * 7:2 Register number + * + * Type 1: + * + * 3 3|3 3 2 2|2 2 2 2|2 2 2 2|1 1 1 1|1 1 1 1|1 1 + * 3 2|1 0 9 8|7 6 5 4|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | | | | | | | | | |B|B|B|B|B|B|B|B|D|D|D|D|D|F|F|F|R|R|R|R|R|R|0|1| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * 31:24 reserved + * 23:16 bus number (8 bits = 128 possible buses) + * 15:11 Device number (5 bits) + * 10:8 function number + * 7:2 register number + * + * Notes: + * The function number selects which function of a multi-function device + * (e.g., SCSI and Ethernet). + * + * The register selects a DWORD (32 bit) register offset. Hence it + * doesn't get shifted by 2 bits as we want to "drop" the bottom two + * bits. + */ + +static unsigned int +conf_read(unsigned long addr, unsigned char type1, + struct pci_controller *hose) +{ + unsigned long flags; + unsigned long mid = MCPCIA_HOSE2MID(hose->index); + unsigned int stat0, value, cpu; + + cpu = smp_processor_id(); + + local_irq_save(flags); + + DBG_CFG(("conf_read(addr=0x%lx, type1=%d, hose=%d)\n", + addr, type1, mid)); + + /* Reset status register to avoid losing errors. */ + stat0 = *(vuip)MCPCIA_CAP_ERR(mid); + *(vuip)MCPCIA_CAP_ERR(mid) = stat0; + mb(); + *(vuip)MCPCIA_CAP_ERR(mid); + DBG_CFG(("conf_read: MCPCIA_CAP_ERR(%d) was 0x%x\n", mid, stat0)); + + mb(); + draina(); + mcheck_expected(cpu) = 1; + mcheck_taken(cpu) = 0; + mcheck_extra(cpu) = mid; + mb(); + + /* Access configuration space. */ + value = *((vuip)addr); + mb(); + mb(); /* magic */ + + if (mcheck_taken(cpu)) { + mcheck_taken(cpu) = 0; + value = 0xffffffffU; + mb(); + } + mcheck_expected(cpu) = 0; + mb(); + + DBG_CFG(("conf_read(): finished\n")); + + local_irq_restore(flags); + return value; +} + +static void +conf_write(unsigned long addr, unsigned int value, unsigned char type1, + struct pci_controller *hose) +{ + unsigned long flags; + unsigned long mid = MCPCIA_HOSE2MID(hose->index); + unsigned int stat0, cpu; + + cpu = smp_processor_id(); + + local_irq_save(flags); /* avoid getting hit by machine check */ + + /* Reset status register to avoid losing errors. */ + stat0 = *(vuip)MCPCIA_CAP_ERR(mid); + *(vuip)MCPCIA_CAP_ERR(mid) = stat0; mb(); + *(vuip)MCPCIA_CAP_ERR(mid); + DBG_CFG(("conf_write: MCPCIA CAP_ERR(%d) was 0x%x\n", mid, stat0)); + + draina(); + mcheck_expected(cpu) = 1; + mcheck_extra(cpu) = mid; + mb(); + + /* Access configuration space. */ + *((vuip)addr) = value; + mb(); + mb(); /* magic */ + *(vuip)MCPCIA_CAP_ERR(mid); /* read to force the write */ + mcheck_expected(cpu) = 0; + mb(); + + DBG_CFG(("conf_write(): finished\n")); + local_irq_restore(flags); +} + +static int +mk_conf_addr(struct pci_bus *pbus, unsigned int devfn, int where, + struct pci_controller *hose, unsigned long *pci_addr, + unsigned char *type1) +{ + u8 bus = pbus->number; + unsigned long addr; + + DBG_CFG(("mk_conf_addr(bus=%d,devfn=0x%x,hose=%d,where=0x%x," + " pci_addr=0x%p, type1=0x%p)\n", + bus, devfn, hose->index, where, pci_addr, type1)); + + /* Type 1 configuration cycle for *ALL* busses. */ + *type1 = 1; + + if (!pbus->parent) /* No parent means peer PCI bus. */ + bus = 0; + addr = (bus << 16) | (devfn << 8) | (where); + addr <<= 5; /* swizzle for SPARSE */ + addr |= hose->config_space_base; + + *pci_addr = addr; + DBG_CFG(("mk_conf_addr: returning pci_addr 0x%lx\n", addr)); + return 0; +} + +static int +mcpcia_read_config(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 *value) +{ + struct pci_controller *hose = bus->sysdata; + unsigned long addr, w; + unsigned char type1; + + if (mk_conf_addr(bus, devfn, where, hose, &addr, &type1)) + return PCIBIOS_DEVICE_NOT_FOUND; + + addr |= (size - 1) * 8; + w = conf_read(addr, type1, hose); + switch (size) { + case 1: + *value = __kernel_extbl(w, where & 3); + break; + case 2: + *value = __kernel_extwl(w, where & 3); + break; + case 4: + *value = w; + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static int +mcpcia_write_config(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 value) +{ + struct pci_controller *hose = bus->sysdata; + unsigned long addr; + unsigned char type1; + + if (mk_conf_addr(bus, devfn, where, hose, &addr, &type1)) + return PCIBIOS_DEVICE_NOT_FOUND; + + addr |= (size - 1) * 8; + value = __kernel_insql(value, where & 3); + conf_write(addr, value, type1, hose); + return PCIBIOS_SUCCESSFUL; +} + +struct pci_ops mcpcia_pci_ops = +{ + .read = mcpcia_read_config, + .write = mcpcia_write_config, +}; + +void +mcpcia_pci_tbi(struct pci_controller *hose, dma_addr_t start, dma_addr_t end) +{ + wmb(); + *(vuip)MCPCIA_SG_TBIA(MCPCIA_HOSE2MID(hose->index)) = 0; + mb(); +} + +static int __init +mcpcia_probe_hose(int h) +{ + int cpu = smp_processor_id(); + int mid = MCPCIA_HOSE2MID(h); + unsigned int pci_rev; + + /* Gotta be REAL careful. If hose is absent, we get an mcheck. */ + + mb(); + mb(); + draina(); + wrmces(7); + + mcheck_expected(cpu) = 2; /* indicates probing */ + mcheck_taken(cpu) = 0; + mcheck_extra(cpu) = mid; + mb(); + + /* Access the bus revision word. */ + pci_rev = *(vuip)MCPCIA_REV(mid); + + mb(); + mb(); /* magic */ + if (mcheck_taken(cpu)) { + mcheck_taken(cpu) = 0; + pci_rev = 0xffffffff; + mb(); + } + mcheck_expected(cpu) = 0; + mb(); + + return (pci_rev >> 16) == PCI_CLASS_BRIDGE_HOST; +} + +static void __init +mcpcia_new_hose(int h) +{ + struct pci_controller *hose; + struct resource *io, *mem, *hae_mem; + int mid = MCPCIA_HOSE2MID(h); + + hose = alloc_pci_controller(); + if (h == 0) + pci_isa_hose = hose; + io = alloc_resource(); + mem = alloc_resource(); + hae_mem = alloc_resource(); + + hose->io_space = io; + hose->mem_space = hae_mem; + hose->sparse_mem_base = MCPCIA_SPARSE(mid) - IDENT_ADDR; + hose->dense_mem_base = MCPCIA_DENSE(mid) - IDENT_ADDR; + hose->sparse_io_base = MCPCIA_IO(mid) - IDENT_ADDR; + hose->dense_io_base = 0; + hose->config_space_base = MCPCIA_CONF(mid); + hose->index = h; + + io->start = MCPCIA_IO(mid) - MCPCIA_IO_BIAS; + io->end = io->start + 0xffff; + io->name = pci_io_names[h]; + io->flags = IORESOURCE_IO; + + mem->start = MCPCIA_DENSE(mid) - MCPCIA_MEM_BIAS; + mem->end = mem->start + 0xffffffff; + mem->name = pci_mem_names[h]; + mem->flags = IORESOURCE_MEM; + + hae_mem->start = mem->start; + hae_mem->end = mem->start + MCPCIA_MEM_MASK; + hae_mem->name = pci_hae0_name; + hae_mem->flags = IORESOURCE_MEM; + + if (request_resource(&ioport_resource, io) < 0) + printk(KERN_ERR "Failed to request IO on hose %d\n", h); + if (request_resource(&iomem_resource, mem) < 0) + printk(KERN_ERR "Failed to request MEM on hose %d\n", h); + if (request_resource(mem, hae_mem) < 0) + printk(KERN_ERR "Failed to request HAE_MEM on hose %d\n", h); +} + +static void +mcpcia_pci_clr_err(int mid) +{ + *(vuip)MCPCIA_CAP_ERR(mid); + *(vuip)MCPCIA_CAP_ERR(mid) = 0xffffffff; /* Clear them all. */ + mb(); + *(vuip)MCPCIA_CAP_ERR(mid); /* Re-read for force write. */ +} + +static void __init +mcpcia_startup_hose(struct pci_controller *hose) +{ + int mid = MCPCIA_HOSE2MID(hose->index); + unsigned int tmp; + + mcpcia_pci_clr_err(mid); + + /* + * Set up error reporting. + */ + tmp = *(vuip)MCPCIA_CAP_ERR(mid); + tmp |= 0x0006; /* master/target abort */ + *(vuip)MCPCIA_CAP_ERR(mid) = tmp; + mb(); + tmp = *(vuip)MCPCIA_CAP_ERR(mid); + + /* + * Set up the PCI->physical memory translation windows. + * + * Window 0 is scatter-gather 8MB at 8MB (for isa) + * Window 1 is scatter-gather (up to) 1GB at 1GB (for pci) + * Window 2 is direct access 2GB at 2GB + */ + hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0); + hose->sg_pci = iommu_arena_new(hose, 0x40000000, + size_for_memory(0x40000000), 0); + + __direct_map_base = 0x80000000; + __direct_map_size = 0x80000000; + + *(vuip)MCPCIA_W0_BASE(mid) = hose->sg_isa->dma_base | 3; + *(vuip)MCPCIA_W0_MASK(mid) = (hose->sg_isa->size - 1) & 0xfff00000; + *(vuip)MCPCIA_T0_BASE(mid) = virt_to_phys(hose->sg_isa->ptes) >> 8; + + *(vuip)MCPCIA_W1_BASE(mid) = hose->sg_pci->dma_base | 3; + *(vuip)MCPCIA_W1_MASK(mid) = (hose->sg_pci->size - 1) & 0xfff00000; + *(vuip)MCPCIA_T1_BASE(mid) = virt_to_phys(hose->sg_pci->ptes) >> 8; + + *(vuip)MCPCIA_W2_BASE(mid) = __direct_map_base | 1; + *(vuip)MCPCIA_W2_MASK(mid) = (__direct_map_size - 1) & 0xfff00000; + *(vuip)MCPCIA_T2_BASE(mid) = 0; + + *(vuip)MCPCIA_W3_BASE(mid) = 0x0; + + mcpcia_pci_tbi(hose, 0, -1); + + *(vuip)MCPCIA_HBASE(mid) = 0x0; + mb(); + + *(vuip)MCPCIA_HAE_MEM(mid) = 0U; + mb(); + *(vuip)MCPCIA_HAE_MEM(mid); /* read it back. */ + *(vuip)MCPCIA_HAE_IO(mid) = 0; + mb(); + *(vuip)MCPCIA_HAE_IO(mid); /* read it back. */ +} + +void __init +mcpcia_init_arch(void) +{ + /* With multiple PCI busses, we play with I/O as physical addrs. */ + ioport_resource.end = ~0UL; + + /* Allocate hose 0. That's the one that all the ISA junk hangs + off of, from which we'll be registering stuff here in a bit. + Other hose detection is done in mcpcia_init_hoses, which is + called from init_IRQ. */ + + mcpcia_new_hose(0); +} + +/* This is called from init_IRQ, since we cannot take interrupts + before then. Which means we cannot do this in init_arch. */ + +void __init +mcpcia_init_hoses(void) +{ + struct pci_controller *hose; + int hose_count; + int h; + + /* First, find how many hoses we have. */ + hose_count = 0; + for (h = 0; h < MCPCIA_MAX_HOSES; ++h) { + if (mcpcia_probe_hose(h)) { + if (h != 0) + mcpcia_new_hose(h); + hose_count++; + } + } + + printk("mcpcia_init_hoses: found %d hoses\n", hose_count); + + /* Now do init for each hose. */ + for (hose = hose_head; hose; hose = hose->next) + mcpcia_startup_hose(hose); +} + +static void +mcpcia_print_uncorrectable(struct el_MCPCIA_uncorrected_frame_mcheck *logout) +{ + struct el_common_EV5_uncorrectable_mcheck *frame; + int i; + + frame = &logout->procdata; + + /* Print PAL fields */ + for (i = 0; i < 24; i += 2) { + printk(" paltmp[%d-%d] = %16lx %16lx\n", + i, i+1, frame->paltemp[i], frame->paltemp[i+1]); + } + for (i = 0; i < 8; i += 2) { + printk(" shadow[%d-%d] = %16lx %16lx\n", + i, i+1, frame->shadow[i], + frame->shadow[i+1]); + } + printk(" Addr of excepting instruction = %16lx\n", + frame->exc_addr); + printk(" Summary of arithmetic traps = %16lx\n", + frame->exc_sum); + printk(" Exception mask = %16lx\n", + frame->exc_mask); + printk(" Base address for PALcode = %16lx\n", + frame->pal_base); + printk(" Interrupt Status Reg = %16lx\n", + frame->isr); + printk(" CURRENT SETUP OF EV5 IBOX = %16lx\n", + frame->icsr); + printk(" I-CACHE Reg %s parity error = %16lx\n", + (frame->ic_perr_stat & 0x800L) ? + "Data" : "Tag", + frame->ic_perr_stat); + printk(" D-CACHE error Reg = %16lx\n", + frame->dc_perr_stat); + if (frame->dc_perr_stat & 0x2) { + switch (frame->dc_perr_stat & 0x03c) { + case 8: + printk(" Data error in bank 1\n"); + break; + case 4: + printk(" Data error in bank 0\n"); + break; + case 20: + printk(" Tag error in bank 1\n"); + break; + case 10: + printk(" Tag error in bank 0\n"); + break; + } + } + printk(" Effective VA = %16lx\n", + frame->va); + printk(" Reason for D-stream = %16lx\n", + frame->mm_stat); + printk(" EV5 SCache address = %16lx\n", + frame->sc_addr); + printk(" EV5 SCache TAG/Data parity = %16lx\n", + frame->sc_stat); + printk(" EV5 BC_TAG_ADDR = %16lx\n", + frame->bc_tag_addr); + printk(" EV5 EI_ADDR: Phys addr of Xfer = %16lx\n", + frame->ei_addr); + printk(" Fill Syndrome = %16lx\n", + frame->fill_syndrome); + printk(" EI_STAT reg = %16lx\n", + frame->ei_stat); + printk(" LD_LOCK = %16lx\n", + frame->ld_lock); +} + +static void +mcpcia_print_system_area(unsigned long la_ptr) +{ + struct el_common *frame; + struct pci_controller *hose; + + struct IOD_subpacket { + unsigned long base; + unsigned int whoami; + unsigned int rsvd1; + unsigned int pci_rev; + unsigned int cap_ctrl; + unsigned int hae_mem; + unsigned int hae_io; + unsigned int int_ctl; + unsigned int int_reg; + unsigned int int_mask0; + unsigned int int_mask1; + unsigned int mc_err0; + unsigned int mc_err1; + unsigned int cap_err; + unsigned int rsvd2; + unsigned int pci_err1; + unsigned int mdpa_stat; + unsigned int mdpa_syn; + unsigned int mdpb_stat; + unsigned int mdpb_syn; + unsigned int rsvd3; + unsigned int rsvd4; + unsigned int rsvd5; + } *iodpp; + + frame = (struct el_common *)la_ptr; + iodpp = (struct IOD_subpacket *) (la_ptr + frame->sys_offset); + + for (hose = hose_head; hose; hose = hose->next, iodpp++) { + + printk("IOD %d Register Subpacket - Bridge Base Address %16lx\n", + hose->index, iodpp->base); + printk(" WHOAMI = %8x\n", iodpp->whoami); + printk(" PCI_REV = %8x\n", iodpp->pci_rev); + printk(" CAP_CTRL = %8x\n", iodpp->cap_ctrl); + printk(" HAE_MEM = %8x\n", iodpp->hae_mem); + printk(" HAE_IO = %8x\n", iodpp->hae_io); + printk(" INT_CTL = %8x\n", iodpp->int_ctl); + printk(" INT_REG = %8x\n", iodpp->int_reg); + printk(" INT_MASK0 = %8x\n", iodpp->int_mask0); + printk(" INT_MASK1 = %8x\n", iodpp->int_mask1); + printk(" MC_ERR0 = %8x\n", iodpp->mc_err0); + printk(" MC_ERR1 = %8x\n", iodpp->mc_err1); + printk(" CAP_ERR = %8x\n", iodpp->cap_err); + printk(" PCI_ERR1 = %8x\n", iodpp->pci_err1); + printk(" MDPA_STAT = %8x\n", iodpp->mdpa_stat); + printk(" MDPA_SYN = %8x\n", iodpp->mdpa_syn); + printk(" MDPB_STAT = %8x\n", iodpp->mdpb_stat); + printk(" MDPB_SYN = %8x\n", iodpp->mdpb_syn); + } +} + +void +mcpcia_machine_check(unsigned long vector, unsigned long la_ptr) +{ + struct el_MCPCIA_uncorrected_frame_mcheck *mchk_logout; + unsigned int cpu = smp_processor_id(); + int expected; + + mchk_logout = (struct el_MCPCIA_uncorrected_frame_mcheck *)la_ptr; + expected = mcheck_expected(cpu); + + mb(); + mb(); /* magic */ + draina(); + + switch (expected) { + case 0: + { + /* FIXME: how do we figure out which hose the + error was on? */ + struct pci_controller *hose; + for (hose = hose_head; hose; hose = hose->next) + mcpcia_pci_clr_err(MCPCIA_HOSE2MID(hose->index)); + break; + } + case 1: + mcpcia_pci_clr_err(mcheck_extra(cpu)); + break; + default: + /* Otherwise, we're being called from mcpcia_probe_hose + and there's no hose clear an error from. */ + break; + } + + wrmces(0x7); + mb(); + + process_mcheck_info(vector, la_ptr, "MCPCIA", expected != 0); + if (!expected && vector != 0x620 && vector != 0x630) { + mcpcia_print_uncorrectable(mchk_logout); + mcpcia_print_system_area(la_ptr); + } +} diff --git a/arch/alpha/kernel/core_polaris.c b/arch/alpha/kernel/core_polaris.c new file mode 100644 index 00000000..c5a271d3 --- /dev/null +++ b/arch/alpha/kernel/core_polaris.c @@ -0,0 +1,202 @@ +/* + * linux/arch/alpha/kernel/core_polaris.c + * + * POLARIS chip-specific code + */ + +#define __EXTERN_INLINE inline +#include <asm/io.h> +#include <asm/core_polaris.h> +#undef __EXTERN_INLINE + +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/sched.h> +#include <linux/init.h> + +#include <asm/ptrace.h> + +#include "proto.h" +#include "pci_impl.h" + +/* + * BIOS32-style PCI interface: + */ + +#define DEBUG_CONFIG 0 + +#if DEBUG_CONFIG +# define DBG_CFG(args) printk args +#else +# define DBG_CFG(args) +#endif + + +/* + * Given a bus, device, and function number, compute resulting + * configuration space address. This is fairly straightforward + * on POLARIS, since the chip itself generates Type 0 or Type 1 + * cycles automatically depending on the bus number (Bus 0 is + * hardwired to Type 0, all others are Type 1. Peer bridges + * are not supported). + * + * All types: + * + * 3 3 3 3|3 3 3 3|3 3 2 2|2 2 2 2|2 2 2 2|1 1 1 1|1 1 1 1|1 1 + * 9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |1|1|1|1|1|0|0|1|1|1|1|1|1|1|1|0|B|B|B|B|B|B|B|B|D|D|D|D|D|F|F|F|R|R|R|R|R|R|x|x| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * 23:16 bus number (8 bits = 128 possible buses) + * 15:11 Device number (5 bits) + * 10:8 function number + * 7:2 register number + * + * Notes: + * The function number selects which function of a multi-function device + * (e.g., scsi and ethernet). + * + * The register selects a DWORD (32 bit) register offset. Hence it + * doesn't get shifted by 2 bits as we want to "drop" the bottom two + * bits. + */ + +static int +mk_conf_addr(struct pci_bus *pbus, unsigned int device_fn, int where, + unsigned long *pci_addr, u8 *type1) +{ + u8 bus = pbus->number; + + *type1 = (bus == 0) ? 0 : 1; + *pci_addr = (bus << 16) | (device_fn << 8) | (where) | + POLARIS_DENSE_CONFIG_BASE; + + DBG_CFG(("mk_conf_addr(bus=%d ,device_fn=0x%x, where=0x%x," + " returning address 0x%p\n" + bus, device_fn, where, *pci_addr)); + + return 0; +} + +static int +polaris_read_config(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 *value) +{ + unsigned long addr; + unsigned char type1; + + if (mk_conf_addr(bus, devfn, where, &addr, &type1)) + return PCIBIOS_DEVICE_NOT_FOUND; + + switch (size) { + case 1: + *value = __kernel_ldbu(*(vucp)addr); + break; + case 2: + *value = __kernel_ldwu(*(vusp)addr); + break; + case 4: + *value = *(vuip)addr; + break; + } + + return PCIBIOS_SUCCESSFUL; +} + + +static int +polaris_write_config(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 value) +{ + unsigned long addr; + unsigned char type1; + + if (mk_conf_addr(bus, devfn, where, &addr, &type1)) + return PCIBIOS_DEVICE_NOT_FOUND; + + switch (size) { + case 1: + __kernel_stb(value, *(vucp)addr); + mb(); + __kernel_ldbu(*(vucp)addr); + break; + case 2: + __kernel_stw(value, *(vusp)addr); + mb(); + __kernel_ldwu(*(vusp)addr); + break; + case 4: + *(vuip)addr = value; + mb(); + *(vuip)addr; + break; + } + + return PCIBIOS_SUCCESSFUL; +} + +struct pci_ops polaris_pci_ops = +{ + .read = polaris_read_config, + .write = polaris_write_config, +}; + +void __init +polaris_init_arch(void) +{ + struct pci_controller *hose; + + /* May need to initialize error reporting (see PCICTL0/1), but + * for now assume that the firmware has done the right thing + * already. + */ +#if 0 + printk("polaris_init_arch(): trusting firmware for setup\n"); +#endif + + /* + * Create our single hose. + */ + + pci_isa_hose = hose = alloc_pci_controller(); + hose->io_space = &ioport_resource; + hose->mem_space = &iomem_resource; + hose->index = 0; + + hose->sparse_mem_base = 0; + hose->dense_mem_base = POLARIS_DENSE_MEM_BASE - IDENT_ADDR; + hose->sparse_io_base = 0; + hose->dense_io_base = POLARIS_DENSE_IO_BASE - IDENT_ADDR; + + hose->sg_isa = hose->sg_pci = NULL; + + /* The I/O window is fixed at 2G @ 2G. */ + __direct_map_base = 0x80000000; + __direct_map_size = 0x80000000; +} + +static inline void +polaris_pci_clr_err(void) +{ + *(vusp)POLARIS_W_STATUS; + /* Write 1's to settable bits to clear errors */ + *(vusp)POLARIS_W_STATUS = 0x7800; + mb(); + *(vusp)POLARIS_W_STATUS; +} + +void +polaris_machine_check(unsigned long vector, unsigned long la_ptr) +{ + /* Clear the error before any reporting. */ + mb(); + mb(); + draina(); + polaris_pci_clr_err(); + wrmces(0x7); + mb(); + + process_mcheck_info(vector, la_ptr, "POLARIS", + mcheck_expected(0)); +} diff --git a/arch/alpha/kernel/core_t2.c b/arch/alpha/kernel/core_t2.c new file mode 100644 index 00000000..3ada4f7b --- /dev/null +++ b/arch/alpha/kernel/core_t2.c @@ -0,0 +1,623 @@ +/* + * linux/arch/alpha/kernel/core_t2.c + * + * Written by Jay A Estabrook (jestabro@amt.tay1.dec.com). + * December 1996. + * + * based on CIA code by David A Rusling (david.rusling@reo.mts.dec.com) + * + * Code common to all T2 core logic chips. + */ + +#define __EXTERN_INLINE +#include <asm/io.h> +#include <asm/core_t2.h> +#undef __EXTERN_INLINE + +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/sched.h> +#include <linux/init.h> + +#include <asm/ptrace.h> +#include <asm/delay.h> +#include <asm/mce.h> + +#include "proto.h" +#include "pci_impl.h" + +/* For dumping initial DMA window settings. */ +#define DEBUG_PRINT_INITIAL_SETTINGS 0 + +/* For dumping final DMA window settings. */ +#define DEBUG_PRINT_FINAL_SETTINGS 0 + +/* + * By default, we direct-map starting at 2GB, in order to allow the + * maximum size direct-map window (2GB) to match the maximum amount of + * memory (2GB) that can be present on SABLEs. But that limits the + * floppy to DMA only via the scatter/gather window set up for 8MB + * ISA DMA, since the maximum ISA DMA address is 2GB-1. + * + * For now, this seems a reasonable trade-off: even though most SABLEs + * have less than 1GB of memory, floppy usage/performance will not + * really be affected by forcing it to go via scatter/gather... + */ +#define T2_DIRECTMAP_2G 1 + +#if T2_DIRECTMAP_2G +# define T2_DIRECTMAP_START 0x80000000UL +# define T2_DIRECTMAP_LENGTH 0x80000000UL +#else +# define T2_DIRECTMAP_START 0x40000000UL +# define T2_DIRECTMAP_LENGTH 0x40000000UL +#endif + +/* The ISA scatter/gather window settings. */ +#define T2_ISA_SG_START 0x00800000UL +#define T2_ISA_SG_LENGTH 0x00800000UL + +/* + * NOTE: Herein lie back-to-back mb instructions. They are magic. + * One plausible explanation is that the i/o controller does not properly + * handle the system transaction. Another involves timing. Ho hum. + */ + +/* + * BIOS32-style PCI interface: + */ + +#define DEBUG_CONFIG 0 + +#if DEBUG_CONFIG +# define DBG(args) printk args +#else +# define DBG(args) +#endif + +static volatile unsigned int t2_mcheck_any_expected; +static volatile unsigned int t2_mcheck_last_taken; + +/* Place to save the DMA Window registers as set up by SRM + for restoration during shutdown. */ +static struct +{ + struct { + unsigned long wbase; + unsigned long wmask; + unsigned long tbase; + } window[2]; + unsigned long hae_1; + unsigned long hae_2; + unsigned long hae_3; + unsigned long hae_4; + unsigned long hbase; +} t2_saved_config __attribute((common)); + +/* + * Given a bus, device, and function number, compute resulting + * configuration space address and setup the T2_HAXR2 register + * accordingly. It is therefore not safe to have concurrent + * invocations to configuration space access routines, but there + * really shouldn't be any need for this. + * + * Type 0: + * + * 3 3|3 3 2 2|2 2 2 2|2 2 2 2|1 1 1 1|1 1 1 1|1 1 + * 3 2|1 0 9 8|7 6 5 4|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | |D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|F|F|F|R|R|R|R|R|R|0|0| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * 31:11 Device select bit. + * 10:8 Function number + * 7:2 Register number + * + * Type 1: + * + * 3 3|3 3 2 2|2 2 2 2|2 2 2 2|1 1 1 1|1 1 1 1|1 1 + * 3 2|1 0 9 8|7 6 5 4|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | | | | | | | | | |B|B|B|B|B|B|B|B|D|D|D|D|D|F|F|F|R|R|R|R|R|R|0|1| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * 31:24 reserved + * 23:16 bus number (8 bits = 128 possible buses) + * 15:11 Device number (5 bits) + * 10:8 function number + * 7:2 register number + * + * Notes: + * The function number selects which function of a multi-function device + * (e.g., SCSI and Ethernet). + * + * The register selects a DWORD (32 bit) register offset. Hence it + * doesn't get shifted by 2 bits as we want to "drop" the bottom two + * bits. + */ + +static int +mk_conf_addr(struct pci_bus *pbus, unsigned int device_fn, int where, + unsigned long *pci_addr, unsigned char *type1) +{ + unsigned long addr; + u8 bus = pbus->number; + + DBG(("mk_conf_addr(bus=%d, dfn=0x%x, where=0x%x," + " addr=0x%lx, type1=0x%x)\n", + bus, device_fn, where, pci_addr, type1)); + + if (bus == 0) { + int device = device_fn >> 3; + + /* Type 0 configuration cycle. */ + + if (device > 8) { + DBG(("mk_conf_addr: device (%d)>20, returning -1\n", + device)); + return -1; + } + + *type1 = 0; + addr = (0x0800L << device) | ((device_fn & 7) << 8) | (where); + } else { + /* Type 1 configuration cycle. */ + *type1 = 1; + addr = (bus << 16) | (device_fn << 8) | (where); + } + *pci_addr = addr; + DBG(("mk_conf_addr: returning pci_addr 0x%lx\n", addr)); + return 0; +} + +/* + * NOTE: both conf_read() and conf_write() may set HAE_3 when needing + * to do type1 access. This is protected by the use of spinlock IRQ + * primitives in the wrapper functions pci_{read,write}_config_*() + * defined in drivers/pci/pci.c. + */ +static unsigned int +conf_read(unsigned long addr, unsigned char type1) +{ + unsigned int value, cpu, taken; + unsigned long t2_cfg = 0; + + cpu = smp_processor_id(); + + DBG(("conf_read(addr=0x%lx, type1=%d)\n", addr, type1)); + + /* If Type1 access, must set T2 CFG. */ + if (type1) { + t2_cfg = *(vulp)T2_HAE_3 & ~0xc0000000UL; + *(vulp)T2_HAE_3 = 0x40000000UL | t2_cfg; + mb(); + } + mb(); + draina(); + + mcheck_expected(cpu) = 1; + mcheck_taken(cpu) = 0; + t2_mcheck_any_expected |= (1 << cpu); + mb(); + + /* Access configuration space. */ + value = *(vuip)addr; + mb(); + mb(); /* magic */ + + /* Wait for possible mcheck. Also, this lets other CPUs clear + their mchecks as well, as they can reliably tell when + another CPU is in the midst of handling a real mcheck via + the "taken" function. */ + udelay(100); + + if ((taken = mcheck_taken(cpu))) { + mcheck_taken(cpu) = 0; + t2_mcheck_last_taken |= (1 << cpu); + value = 0xffffffffU; + mb(); + } + mcheck_expected(cpu) = 0; + t2_mcheck_any_expected = 0; + mb(); + + /* If Type1 access, must reset T2 CFG so normal IO space ops work. */ + if (type1) { + *(vulp)T2_HAE_3 = t2_cfg; + mb(); + } + + return value; +} + +static void +conf_write(unsigned long addr, unsigned int value, unsigned char type1) +{ + unsigned int cpu, taken; + unsigned long t2_cfg = 0; + + cpu = smp_processor_id(); + + /* If Type1 access, must set T2 CFG. */ + if (type1) { + t2_cfg = *(vulp)T2_HAE_3 & ~0xc0000000UL; + *(vulp)T2_HAE_3 = t2_cfg | 0x40000000UL; + mb(); + } + mb(); + draina(); + + mcheck_expected(cpu) = 1; + mcheck_taken(cpu) = 0; + t2_mcheck_any_expected |= (1 << cpu); + mb(); + + /* Access configuration space. */ + *(vuip)addr = value; + mb(); + mb(); /* magic */ + + /* Wait for possible mcheck. Also, this lets other CPUs clear + their mchecks as well, as they can reliably tell when + this CPU is in the midst of handling a real mcheck via + the "taken" function. */ + udelay(100); + + if ((taken = mcheck_taken(cpu))) { + mcheck_taken(cpu) = 0; + t2_mcheck_last_taken |= (1 << cpu); + mb(); + } + mcheck_expected(cpu) = 0; + t2_mcheck_any_expected = 0; + mb(); + + /* If Type1 access, must reset T2 CFG so normal IO space ops work. */ + if (type1) { + *(vulp)T2_HAE_3 = t2_cfg; + mb(); + } +} + +static int +t2_read_config(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 *value) +{ + unsigned long addr, pci_addr; + unsigned char type1; + int shift; + long mask; + + if (mk_conf_addr(bus, devfn, where, &pci_addr, &type1)) + return PCIBIOS_DEVICE_NOT_FOUND; + + mask = (size - 1) * 8; + shift = (where & 3) * 8; + addr = (pci_addr << 5) + mask + T2_CONF; + *value = conf_read(addr, type1) >> (shift); + return PCIBIOS_SUCCESSFUL; +} + +static int +t2_write_config(struct pci_bus *bus, unsigned int devfn, int where, int size, + u32 value) +{ + unsigned long addr, pci_addr; + unsigned char type1; + long mask; + + if (mk_conf_addr(bus, devfn, where, &pci_addr, &type1)) + return PCIBIOS_DEVICE_NOT_FOUND; + + mask = (size - 1) * 8; + addr = (pci_addr << 5) + mask + T2_CONF; + conf_write(addr, value << ((where & 3) * 8), type1); + return PCIBIOS_SUCCESSFUL; +} + +struct pci_ops t2_pci_ops = +{ + .read = t2_read_config, + .write = t2_write_config, +}; + +static void __init +t2_direct_map_window1(unsigned long base, unsigned long length) +{ + unsigned long temp; + + __direct_map_base = base; + __direct_map_size = length; + + temp = (base & 0xfff00000UL) | ((base + length - 1) >> 20); + *(vulp)T2_WBASE1 = temp | 0x80000UL; /* OR in ENABLE bit */ + temp = (length - 1) & 0xfff00000UL; + *(vulp)T2_WMASK1 = temp; + *(vulp)T2_TBASE1 = 0; + +#if DEBUG_PRINT_FINAL_SETTINGS + printk("%s: setting WBASE1=0x%lx WMASK1=0x%lx TBASE1=0x%lx\n", + __func__, *(vulp)T2_WBASE1, *(vulp)T2_WMASK1, *(vulp)T2_TBASE1); +#endif +} + +static void __init +t2_sg_map_window2(struct pci_controller *hose, + unsigned long base, + unsigned long length) +{ + unsigned long temp; + + /* Note we can only do 1 SG window, as the other is for direct, so + do an ISA SG area, especially for the floppy. */ + hose->sg_isa = iommu_arena_new(hose, base, length, 0); + hose->sg_pci = NULL; + + temp = (base & 0xfff00000UL) | ((base + length - 1) >> 20); + *(vulp)T2_WBASE2 = temp | 0xc0000UL; /* OR in ENABLE/SG bits */ + temp = (length - 1) & 0xfff00000UL; + *(vulp)T2_WMASK2 = temp; + *(vulp)T2_TBASE2 = virt_to_phys(hose->sg_isa->ptes) >> 1; + mb(); + + t2_pci_tbi(hose, 0, -1); /* flush TLB all */ + +#if DEBUG_PRINT_FINAL_SETTINGS + printk("%s: setting WBASE2=0x%lx WMASK2=0x%lx TBASE2=0x%lx\n", + __func__, *(vulp)T2_WBASE2, *(vulp)T2_WMASK2, *(vulp)T2_TBASE2); +#endif +} + +static void __init +t2_save_configuration(void) +{ +#if DEBUG_PRINT_INITIAL_SETTINGS + printk("%s: HAE_1 was 0x%lx\n", __func__, srm_hae); /* HW is 0 */ + printk("%s: HAE_2 was 0x%lx\n", __func__, *(vulp)T2_HAE_2); + printk("%s: HAE_3 was 0x%lx\n", __func__, *(vulp)T2_HAE_3); + printk("%s: HAE_4 was 0x%lx\n", __func__, *(vulp)T2_HAE_4); + printk("%s: HBASE was 0x%lx\n", __func__, *(vulp)T2_HBASE); + + printk("%s: WBASE1=0x%lx WMASK1=0x%lx TBASE1=0x%lx\n", __func__, + *(vulp)T2_WBASE1, *(vulp)T2_WMASK1, *(vulp)T2_TBASE1); + printk("%s: WBASE2=0x%lx WMASK2=0x%lx TBASE2=0x%lx\n", __func__, + *(vulp)T2_WBASE2, *(vulp)T2_WMASK2, *(vulp)T2_TBASE2); +#endif + + /* + * Save the DMA Window registers. + */ + t2_saved_config.window[0].wbase = *(vulp)T2_WBASE1; + t2_saved_config.window[0].wmask = *(vulp)T2_WMASK1; + t2_saved_config.window[0].tbase = *(vulp)T2_TBASE1; + t2_saved_config.window[1].wbase = *(vulp)T2_WBASE2; + t2_saved_config.window[1].wmask = *(vulp)T2_WMASK2; + t2_saved_config.window[1].tbase = *(vulp)T2_TBASE2; + + t2_saved_config.hae_1 = srm_hae; /* HW is already set to 0 */ + t2_saved_config.hae_2 = *(vulp)T2_HAE_2; + t2_saved_config.hae_3 = *(vulp)T2_HAE_3; + t2_saved_config.hae_4 = *(vulp)T2_HAE_4; + t2_saved_config.hbase = *(vulp)T2_HBASE; +} + +void __init +t2_init_arch(void) +{ + struct pci_controller *hose; + struct resource *hae_mem; + unsigned long temp; + unsigned int i; + + for (i = 0; i < NR_CPUS; i++) { + mcheck_expected(i) = 0; + mcheck_taken(i) = 0; + } + t2_mcheck_any_expected = 0; + t2_mcheck_last_taken = 0; + + /* Enable scatter/gather TLB use. */ + temp = *(vulp)T2_IOCSR; + if (!(temp & (0x1UL << 26))) { + printk("t2_init_arch: enabling SG TLB, IOCSR was 0x%lx\n", + temp); + *(vulp)T2_IOCSR = temp | (0x1UL << 26); + mb(); + *(vulp)T2_IOCSR; /* read it back to make sure */ + } + + t2_save_configuration(); + + /* + * Create our single hose. + */ + pci_isa_hose = hose = alloc_pci_controller(); + hose->io_space = &ioport_resource; + hae_mem = alloc_resource(); + hae_mem->start = 0; + hae_mem->end = T2_MEM_R1_MASK; + hae_mem->name = pci_hae0_name; + if (request_resource(&iomem_resource, hae_mem) < 0) + printk(KERN_ERR "Failed to request HAE_MEM\n"); + hose->mem_space = hae_mem; + hose->index = 0; + + hose->sparse_mem_base = T2_SPARSE_MEM - IDENT_ADDR; + hose->dense_mem_base = T2_DENSE_MEM - IDENT_ADDR; + hose->sparse_io_base = T2_IO - IDENT_ADDR; + hose->dense_io_base = 0; + + /* + * Set up the PCI->physical memory translation windows. + * + * Window 1 is direct mapped. + * Window 2 is scatter/gather (for ISA). + */ + + t2_direct_map_window1(T2_DIRECTMAP_START, T2_DIRECTMAP_LENGTH); + + /* Always make an ISA DMA window. */ + t2_sg_map_window2(hose, T2_ISA_SG_START, T2_ISA_SG_LENGTH); + + *(vulp)T2_HBASE = 0x0; /* Disable HOLES. */ + + /* Zero HAE. */ + *(vulp)T2_HAE_1 = 0; mb(); /* Sparse MEM HAE */ + *(vulp)T2_HAE_2 = 0; mb(); /* Sparse I/O HAE */ + *(vulp)T2_HAE_3 = 0; mb(); /* Config Space HAE */ + + /* + * We also now zero out HAE_4, the dense memory HAE, so that + * we need not account for its "offset" when accessing dense + * memory resources which we allocated in our normal way. This + * HAE would need to stay untouched were we to keep the SRM + * resource settings. + * + * Thus we can now run standard X servers on SABLE/LYNX. :-) + */ + *(vulp)T2_HAE_4 = 0; mb(); +} + +void +t2_kill_arch(int mode) +{ + /* + * Restore the DMA Window registers. + */ + *(vulp)T2_WBASE1 = t2_saved_config.window[0].wbase; + *(vulp)T2_WMASK1 = t2_saved_config.window[0].wmask; + *(vulp)T2_TBASE1 = t2_saved_config.window[0].tbase; + *(vulp)T2_WBASE2 = t2_saved_config.window[1].wbase; + *(vulp)T2_WMASK2 = t2_saved_config.window[1].wmask; + *(vulp)T2_TBASE2 = t2_saved_config.window[1].tbase; + mb(); + + *(vulp)T2_HAE_1 = srm_hae; + *(vulp)T2_HAE_2 = t2_saved_config.hae_2; + *(vulp)T2_HAE_3 = t2_saved_config.hae_3; + *(vulp)T2_HAE_4 = t2_saved_config.hae_4; + *(vulp)T2_HBASE = t2_saved_config.hbase; + mb(); + *(vulp)T2_HBASE; /* READ it back to ensure WRITE occurred. */ +} + +void +t2_pci_tbi(struct pci_controller *hose, dma_addr_t start, dma_addr_t end) +{ + unsigned long t2_iocsr; + + t2_iocsr = *(vulp)T2_IOCSR; + + /* set the TLB Clear bit */ + *(vulp)T2_IOCSR = t2_iocsr | (0x1UL << 28); + mb(); + *(vulp)T2_IOCSR; /* read it back to make sure */ + + /* clear the TLB Clear bit */ + *(vulp)T2_IOCSR = t2_iocsr & ~(0x1UL << 28); + mb(); + *(vulp)T2_IOCSR; /* read it back to make sure */ +} + +#define SIC_SEIC (1UL << 33) /* System Event Clear */ + +static void +t2_clear_errors(int cpu) +{ + struct sable_cpu_csr *cpu_regs; + + cpu_regs = (struct sable_cpu_csr *)T2_CPUn_BASE(cpu); + + cpu_regs->sic &= ~SIC_SEIC; + + /* Clear CPU errors. */ + cpu_regs->bcce |= cpu_regs->bcce; + cpu_regs->cbe |= cpu_regs->cbe; + cpu_regs->bcue |= cpu_regs->bcue; + cpu_regs->dter |= cpu_regs->dter; + + *(vulp)T2_CERR1 |= *(vulp)T2_CERR1; + *(vulp)T2_PERR1 |= *(vulp)T2_PERR1; + + mb(); + mb(); /* magic */ +} + +/* + * SABLE seems to have a "broadcast" style machine check, in that all + * CPUs receive it. And, the issuing CPU, in the case of PCI Config + * space read/write faults, will also receive a second mcheck, upon + * lowering IPL during completion processing in pci_read_config_byte() + * et al. + * + * Hence all the taken/expected/any_expected/last_taken stuff... + */ +void +t2_machine_check(unsigned long vector, unsigned long la_ptr) +{ + int cpu = smp_processor_id(); +#ifdef CONFIG_VERBOSE_MCHECK + struct el_common *mchk_header = (struct el_common *)la_ptr; +#endif + + /* Clear the error before any reporting. */ + mb(); + mb(); /* magic */ + draina(); + t2_clear_errors(cpu); + + /* This should not actually be done until the logout frame is + examined, but, since we don't do that, go on and do this... */ + wrmces(0x7); + mb(); + + /* Now, do testing for the anomalous conditions. */ + if (!mcheck_expected(cpu) && t2_mcheck_any_expected) { + /* + * FUNKY: Received mcheck on a CPU and not + * expecting it, but another CPU is expecting one. + * + * Just dismiss it for now on this CPU... + */ +#ifdef CONFIG_VERBOSE_MCHECK + if (alpha_verbose_mcheck > 1) { + printk("t2_machine_check(cpu%d): any_expected 0x%x -" + " (assumed) spurious -" + " code 0x%x\n", cpu, t2_mcheck_any_expected, + (unsigned int)mchk_header->code); + } +#endif + return; + } + + if (!mcheck_expected(cpu) && !t2_mcheck_any_expected) { + if (t2_mcheck_last_taken & (1 << cpu)) { +#ifdef CONFIG_VERBOSE_MCHECK + if (alpha_verbose_mcheck > 1) { + printk("t2_machine_check(cpu%d): last_taken 0x%x - " + "unexpected mcheck - code 0x%x\n", + cpu, t2_mcheck_last_taken, + (unsigned int)mchk_header->code); + } +#endif + t2_mcheck_last_taken = 0; + mb(); + return; + } else { + t2_mcheck_last_taken = 0; + mb(); + } + } + +#ifdef CONFIG_VERBOSE_MCHECK + if (alpha_verbose_mcheck > 1) { + printk("%s t2_mcheck(cpu%d): last_taken 0x%x - " + "any_expected 0x%x - code 0x%x\n", + (mcheck_expected(cpu) ? "EX" : "UN"), cpu, + t2_mcheck_last_taken, t2_mcheck_any_expected, + (unsigned int)mchk_header->code); + } +#endif + + process_mcheck_info(vector, la_ptr, "T2", mcheck_expected(cpu)); +} diff --git a/arch/alpha/kernel/core_titan.c b/arch/alpha/kernel/core_titan.c new file mode 100644 index 00000000..219bf271 --- /dev/null +++ b/arch/alpha/kernel/core_titan.c @@ -0,0 +1,798 @@ +/* + * linux/arch/alpha/kernel/core_titan.c + * + * Code common to all TITAN core logic chips. + */ + +#define __EXTERN_INLINE inline +#include <asm/io.h> +#include <asm/core_titan.h> +#undef __EXTERN_INLINE + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/vmalloc.h> +#include <linux/bootmem.h> + +#include <asm/ptrace.h> +#include <asm/smp.h> +#include <asm/pgalloc.h> +#include <asm/tlbflush.h> +#include <asm/vga.h> + +#include "proto.h" +#include "pci_impl.h" + +/* Save Titan configuration data as the console had it set up. */ + +struct +{ + unsigned long wsba[4]; + unsigned long wsm[4]; + unsigned long tba[4]; +} saved_config[4] __attribute__((common)); + +/* + * Is PChip 1 present? No need to query it more than once. + */ +static int titan_pchip1_present; + +/* + * BIOS32-style PCI interface: + */ + +#define DEBUG_CONFIG 0 + +#if DEBUG_CONFIG +# define DBG_CFG(args) printk args +#else +# define DBG_CFG(args) +#endif + + +/* + * Routines to access TIG registers. + */ +static inline volatile unsigned long * +mk_tig_addr(int offset) +{ + return (volatile unsigned long *)(TITAN_TIG_SPACE + (offset << 6)); +} + +static inline u8 +titan_read_tig(int offset, u8 value) +{ + volatile unsigned long *tig_addr = mk_tig_addr(offset); + return (u8)(*tig_addr & 0xff); +} + +static inline void +titan_write_tig(int offset, u8 value) +{ + volatile unsigned long *tig_addr = mk_tig_addr(offset); + *tig_addr = (unsigned long)value; +} + + +/* + * Given a bus, device, and function number, compute resulting + * configuration space address + * accordingly. It is therefore not safe to have concurrent + * invocations to configuration space access routines, but there + * really shouldn't be any need for this. + * + * Note that all config space accesses use Type 1 address format. + * + * Note also that type 1 is determined by non-zero bus number. + * + * Type 1: + * + * 3 3|3 3 2 2|2 2 2 2|2 2 2 2|1 1 1 1|1 1 1 1|1 1 + * 3 2|1 0 9 8|7 6 5 4|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | | | | | | | | | |B|B|B|B|B|B|B|B|D|D|D|D|D|F|F|F|R|R|R|R|R|R|0|1| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * 31:24 reserved + * 23:16 bus number (8 bits = 128 possible buses) + * 15:11 Device number (5 bits) + * 10:8 function number + * 7:2 register number + * + * Notes: + * The function number selects which function of a multi-function device + * (e.g., SCSI and Ethernet). + * + * The register selects a DWORD (32 bit) register offset. Hence it + * doesn't get shifted by 2 bits as we want to "drop" the bottom two + * bits. + */ + +static int +mk_conf_addr(struct pci_bus *pbus, unsigned int device_fn, int where, + unsigned long *pci_addr, unsigned char *type1) +{ + struct pci_controller *hose = pbus->sysdata; + unsigned long addr; + u8 bus = pbus->number; + + DBG_CFG(("mk_conf_addr(bus=%d ,device_fn=0x%x, where=0x%x, " + "pci_addr=0x%p, type1=0x%p)\n", + bus, device_fn, where, pci_addr, type1)); + + if (!pbus->parent) /* No parent means peer PCI bus. */ + bus = 0; + *type1 = (bus != 0); + + addr = (bus << 16) | (device_fn << 8) | where; + addr |= hose->config_space_base; + + *pci_addr = addr; + DBG_CFG(("mk_conf_addr: returning pci_addr 0x%lx\n", addr)); + return 0; +} + +static int +titan_read_config(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 *value) +{ + unsigned long addr; + unsigned char type1; + + if (mk_conf_addr(bus, devfn, where, &addr, &type1)) + return PCIBIOS_DEVICE_NOT_FOUND; + + switch (size) { + case 1: + *value = __kernel_ldbu(*(vucp)addr); + break; + case 2: + *value = __kernel_ldwu(*(vusp)addr); + break; + case 4: + *value = *(vuip)addr; + break; + } + + return PCIBIOS_SUCCESSFUL; +} + +static int +titan_write_config(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 value) +{ + unsigned long addr; + unsigned char type1; + + if (mk_conf_addr(bus, devfn, where, &addr, &type1)) + return PCIBIOS_DEVICE_NOT_FOUND; + + switch (size) { + case 1: + __kernel_stb(value, *(vucp)addr); + mb(); + __kernel_ldbu(*(vucp)addr); + break; + case 2: + __kernel_stw(value, *(vusp)addr); + mb(); + __kernel_ldwu(*(vusp)addr); + break; + case 4: + *(vuip)addr = value; + mb(); + *(vuip)addr; + break; + } + + return PCIBIOS_SUCCESSFUL; +} + +struct pci_ops titan_pci_ops = +{ + .read = titan_read_config, + .write = titan_write_config, +}; + + +void +titan_pci_tbi(struct pci_controller *hose, dma_addr_t start, dma_addr_t end) +{ + titan_pachip *pachip = + (hose->index & 1) ? TITAN_pachip1 : TITAN_pachip0; + titan_pachip_port *port; + volatile unsigned long *csr; + unsigned long value; + + /* Get the right hose. */ + port = &pachip->g_port; + if (hose->index & 2) + port = &pachip->a_port; + + /* We can invalidate up to 8 tlb entries in a go. The flush + matches against <31:16> in the pci address. + Note that gtlbi* and atlbi* are in the same place in the g_port + and a_port, respectively, so the g_port offset can be used + even if hose is an a_port */ + csr = &port->port_specific.g.gtlbia.csr; + if (((start ^ end) & 0xffff0000) == 0) + csr = &port->port_specific.g.gtlbiv.csr; + + /* For TBIA, it doesn't matter what value we write. For TBI, + it's the shifted tag bits. */ + value = (start & 0xffff0000) >> 12; + + wmb(); + *csr = value; + mb(); + *csr; +} + +static int +titan_query_agp(titan_pachip_port *port) +{ + union TPAchipPCTL pctl; + + /* set up APCTL */ + pctl.pctl_q_whole = port->pctl.csr; + + return pctl.pctl_r_bits.apctl_v_agp_present; + +} + +static void __init +titan_init_one_pachip_port(titan_pachip_port *port, int index) +{ + struct pci_controller *hose; + + hose = alloc_pci_controller(); + if (index == 0) + pci_isa_hose = hose; + hose->io_space = alloc_resource(); + hose->mem_space = alloc_resource(); + + /* + * This is for userland consumption. The 40-bit PIO bias that we + * use in the kernel through KSEG doesn't work in the page table + * based user mappings. (43-bit KSEG sign extends the physical + * address from bit 40 to hit the I/O bit - mapped addresses don't). + * So make sure we get the 43-bit PIO bias. + */ + hose->sparse_mem_base = 0; + hose->sparse_io_base = 0; + hose->dense_mem_base + = (TITAN_MEM(index) & 0xffffffffffUL) | 0x80000000000UL; + hose->dense_io_base + = (TITAN_IO(index) & 0xffffffffffUL) | 0x80000000000UL; + + hose->config_space_base = TITAN_CONF(index); + hose->index = index; + + hose->io_space->start = TITAN_IO(index) - TITAN_IO_BIAS; + hose->io_space->end = hose->io_space->start + TITAN_IO_SPACE - 1; + hose->io_space->name = pci_io_names[index]; + hose->io_space->flags = IORESOURCE_IO; + + hose->mem_space->start = TITAN_MEM(index) - TITAN_MEM_BIAS; + hose->mem_space->end = hose->mem_space->start + 0xffffffff; + hose->mem_space->name = pci_mem_names[index]; + hose->mem_space->flags = IORESOURCE_MEM; + + if (request_resource(&ioport_resource, hose->io_space) < 0) + printk(KERN_ERR "Failed to request IO on hose %d\n", index); + if (request_resource(&iomem_resource, hose->mem_space) < 0) + printk(KERN_ERR "Failed to request MEM on hose %d\n", index); + + /* + * Save the existing PCI window translations. SRM will + * need them when we go to reboot. + */ + saved_config[index].wsba[0] = port->wsba[0].csr; + saved_config[index].wsm[0] = port->wsm[0].csr; + saved_config[index].tba[0] = port->tba[0].csr; + + saved_config[index].wsba[1] = port->wsba[1].csr; + saved_config[index].wsm[1] = port->wsm[1].csr; + saved_config[index].tba[1] = port->tba[1].csr; + + saved_config[index].wsba[2] = port->wsba[2].csr; + saved_config[index].wsm[2] = port->wsm[2].csr; + saved_config[index].tba[2] = port->tba[2].csr; + + saved_config[index].wsba[3] = port->wsba[3].csr; + saved_config[index].wsm[3] = port->wsm[3].csr; + saved_config[index].tba[3] = port->tba[3].csr; + + /* + * Set up the PCI to main memory translation windows. + * + * Note: Window 3 on Titan is Scatter-Gather ONLY. + * + * Window 0 is scatter-gather 8MB at 8MB (for isa) + * Window 1 is direct access 1GB at 2GB + * Window 2 is scatter-gather 1GB at 3GB + */ + hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0); + hose->sg_isa->align_entry = 8; /* 64KB for ISA */ + + hose->sg_pci = iommu_arena_new(hose, 0xc0000000, 0x40000000, 0); + hose->sg_pci->align_entry = 4; /* Titan caches 4 PTEs at a time */ + + port->wsba[0].csr = hose->sg_isa->dma_base | 3; + port->wsm[0].csr = (hose->sg_isa->size - 1) & 0xfff00000; + port->tba[0].csr = virt_to_phys(hose->sg_isa->ptes); + + port->wsba[1].csr = __direct_map_base | 1; + port->wsm[1].csr = (__direct_map_size - 1) & 0xfff00000; + port->tba[1].csr = 0; + + port->wsba[2].csr = hose->sg_pci->dma_base | 3; + port->wsm[2].csr = (hose->sg_pci->size - 1) & 0xfff00000; + port->tba[2].csr = virt_to_phys(hose->sg_pci->ptes); + + port->wsba[3].csr = 0; + + /* Enable the Monster Window to make DAC pci64 possible. */ + port->pctl.csr |= pctl_m_mwin; + + /* + * If it's an AGP port, initialize agplastwr. + */ + if (titan_query_agp(port)) + port->port_specific.a.agplastwr.csr = __direct_map_base; + + titan_pci_tbi(hose, 0, -1); +} + +static void __init +titan_init_pachips(titan_pachip *pachip0, titan_pachip *pachip1) +{ + titan_pchip1_present = TITAN_cchip->csc.csr & 1L<<14; + + /* Init the ports in hose order... */ + titan_init_one_pachip_port(&pachip0->g_port, 0); /* hose 0 */ + if (titan_pchip1_present) + titan_init_one_pachip_port(&pachip1->g_port, 1);/* hose 1 */ + titan_init_one_pachip_port(&pachip0->a_port, 2); /* hose 2 */ + if (titan_pchip1_present) + titan_init_one_pachip_port(&pachip1->a_port, 3);/* hose 3 */ +} + +void __init +titan_init_arch(void) +{ +#if 0 + printk("%s: titan_init_arch()\n", __func__); + printk("%s: CChip registers:\n", __func__); + printk("%s: CSR_CSC 0x%lx\n", __func__, TITAN_cchip->csc.csr); + printk("%s: CSR_MTR 0x%lx\n", __func__, TITAN_cchip->mtr.csr); + printk("%s: CSR_MISC 0x%lx\n", __func__, TITAN_cchip->misc.csr); + printk("%s: CSR_DIM0 0x%lx\n", __func__, TITAN_cchip->dim0.csr); + printk("%s: CSR_DIM1 0x%lx\n", __func__, TITAN_cchip->dim1.csr); + printk("%s: CSR_DIR0 0x%lx\n", __func__, TITAN_cchip->dir0.csr); + printk("%s: CSR_DIR1 0x%lx\n", __func__, TITAN_cchip->dir1.csr); + printk("%s: CSR_DRIR 0x%lx\n", __func__, TITAN_cchip->drir.csr); + + printk("%s: DChip registers:\n", __func__); + printk("%s: CSR_DSC 0x%lx\n", __func__, TITAN_dchip->dsc.csr); + printk("%s: CSR_STR 0x%lx\n", __func__, TITAN_dchip->str.csr); + printk("%s: CSR_DREV 0x%lx\n", __func__, TITAN_dchip->drev.csr); +#endif + + boot_cpuid = __hard_smp_processor_id(); + + /* With multiple PCI busses, we play with I/O as physical addrs. */ + ioport_resource.end = ~0UL; + iomem_resource.end = ~0UL; + + /* PCI DMA Direct Mapping is 1GB at 2GB. */ + __direct_map_base = 0x80000000; + __direct_map_size = 0x40000000; + + /* Init the PA chip(s). */ + titan_init_pachips(TITAN_pachip0, TITAN_pachip1); + + /* Check for graphic console location (if any). */ + find_console_vga_hose(); +} + +static void +titan_kill_one_pachip_port(titan_pachip_port *port, int index) +{ + port->wsba[0].csr = saved_config[index].wsba[0]; + port->wsm[0].csr = saved_config[index].wsm[0]; + port->tba[0].csr = saved_config[index].tba[0]; + + port->wsba[1].csr = saved_config[index].wsba[1]; + port->wsm[1].csr = saved_config[index].wsm[1]; + port->tba[1].csr = saved_config[index].tba[1]; + + port->wsba[2].csr = saved_config[index].wsba[2]; + port->wsm[2].csr = saved_config[index].wsm[2]; + port->tba[2].csr = saved_config[index].tba[2]; + + port->wsba[3].csr = saved_config[index].wsba[3]; + port->wsm[3].csr = saved_config[index].wsm[3]; + port->tba[3].csr = saved_config[index].tba[3]; +} + +static void +titan_kill_pachips(titan_pachip *pachip0, titan_pachip *pachip1) +{ + if (titan_pchip1_present) { + titan_kill_one_pachip_port(&pachip1->g_port, 1); + titan_kill_one_pachip_port(&pachip1->a_port, 3); + } + titan_kill_one_pachip_port(&pachip0->g_port, 0); + titan_kill_one_pachip_port(&pachip0->a_port, 2); +} + +void +titan_kill_arch(int mode) +{ + titan_kill_pachips(TITAN_pachip0, TITAN_pachip1); +} + + +/* + * IO map support. + */ + +void __iomem * +titan_ioportmap(unsigned long addr) +{ + FIXUP_IOADDR_VGA(addr); + return (void __iomem *)(addr + TITAN_IO_BIAS); +} + + +void __iomem * +titan_ioremap(unsigned long addr, unsigned long size) +{ + int h = (addr & TITAN_HOSE_MASK) >> TITAN_HOSE_SHIFT; + unsigned long baddr = addr & ~TITAN_HOSE_MASK; + unsigned long last = baddr + size - 1; + struct pci_controller *hose; + struct vm_struct *area; + unsigned long vaddr; + unsigned long *ptes; + unsigned long pfn; + + /* + * Adjust the address and hose, if necessary. + */ + if (pci_vga_hose && __is_mem_vga(addr)) { + h = pci_vga_hose->index; + addr += pci_vga_hose->mem_space->start; + } + + /* + * Find the hose. + */ + for (hose = hose_head; hose; hose = hose->next) + if (hose->index == h) + break; + if (!hose) + return NULL; + + /* + * Is it direct-mapped? + */ + if ((baddr >= __direct_map_base) && + ((baddr + size - 1) < __direct_map_base + __direct_map_size)) { + vaddr = addr - __direct_map_base + TITAN_MEM_BIAS; + return (void __iomem *) vaddr; + } + + /* + * Check the scatter-gather arena. + */ + if (hose->sg_pci && + baddr >= (unsigned long)hose->sg_pci->dma_base && + last < (unsigned long)hose->sg_pci->dma_base + hose->sg_pci->size){ + + /* + * Adjust the limits (mappings must be page aligned) + */ + baddr -= hose->sg_pci->dma_base; + last -= hose->sg_pci->dma_base; + baddr &= PAGE_MASK; + size = PAGE_ALIGN(last) - baddr; + + /* + * Map it + */ + area = get_vm_area(size, VM_IOREMAP); + if (!area) { + printk("ioremap failed... no vm_area...\n"); + return NULL; + } + + ptes = hose->sg_pci->ptes; + for (vaddr = (unsigned long)area->addr; + baddr <= last; + baddr += PAGE_SIZE, vaddr += PAGE_SIZE) { + pfn = ptes[baddr >> PAGE_SHIFT]; + if (!(pfn & 1)) { + printk("ioremap failed... pte not valid...\n"); + vfree(area->addr); + return NULL; + } + pfn >>= 1; /* make it a true pfn */ + + if (__alpha_remap_area_pages(vaddr, + pfn << PAGE_SHIFT, + PAGE_SIZE, 0)) { + printk("FAILED to remap_area_pages...\n"); + vfree(area->addr); + return NULL; + } + } + + flush_tlb_all(); + + vaddr = (unsigned long)area->addr + (addr & ~PAGE_MASK); + return (void __iomem *) vaddr; + } + + /* Assume a legacy (read: VGA) address, and return appropriately. */ + return (void __iomem *)(addr + TITAN_MEM_BIAS); +} + +void +titan_iounmap(volatile void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + if (addr >= VMALLOC_START) + vfree((void *)(PAGE_MASK & addr)); +} + +int +titan_is_mmio(const volatile void __iomem *xaddr) +{ + unsigned long addr = (unsigned long) xaddr; + + if (addr >= VMALLOC_START) + return 1; + else + return (addr & 0x100000000UL) == 0; +} + +#ifndef CONFIG_ALPHA_GENERIC +EXPORT_SYMBOL(titan_ioportmap); +EXPORT_SYMBOL(titan_ioremap); +EXPORT_SYMBOL(titan_iounmap); +EXPORT_SYMBOL(titan_is_mmio); +#endif + +/* + * AGP GART Support. + */ +#include <linux/agp_backend.h> +#include <asm/agp_backend.h> +#include <linux/slab.h> +#include <linux/delay.h> + +struct titan_agp_aperture { + struct pci_iommu_arena *arena; + long pg_start; + long pg_count; +}; + +static int +titan_agp_setup(alpha_agp_info *agp) +{ + struct titan_agp_aperture *aper; + + if (!alpha_agpgart_size) + return -ENOMEM; + + aper = kmalloc(sizeof(struct titan_agp_aperture), GFP_KERNEL); + if (aper == NULL) + return -ENOMEM; + + aper->arena = agp->hose->sg_pci; + aper->pg_count = alpha_agpgart_size / PAGE_SIZE; + aper->pg_start = iommu_reserve(aper->arena, aper->pg_count, + aper->pg_count - 1); + if (aper->pg_start < 0) { + printk(KERN_ERR "Failed to reserve AGP memory\n"); + kfree(aper); + return -ENOMEM; + } + + agp->aperture.bus_base = + aper->arena->dma_base + aper->pg_start * PAGE_SIZE; + agp->aperture.size = aper->pg_count * PAGE_SIZE; + agp->aperture.sysdata = aper; + + return 0; +} + +static void +titan_agp_cleanup(alpha_agp_info *agp) +{ + struct titan_agp_aperture *aper = agp->aperture.sysdata; + int status; + + status = iommu_release(aper->arena, aper->pg_start, aper->pg_count); + if (status == -EBUSY) { + printk(KERN_WARNING + "Attempted to release bound AGP memory - unbinding\n"); + iommu_unbind(aper->arena, aper->pg_start, aper->pg_count); + status = iommu_release(aper->arena, aper->pg_start, + aper->pg_count); + } + if (status < 0) + printk(KERN_ERR "Failed to release AGP memory\n"); + + kfree(aper); + kfree(agp); +} + +static int +titan_agp_configure(alpha_agp_info *agp) +{ + union TPAchipPCTL pctl; + titan_pachip_port *port = agp->private; + pctl.pctl_q_whole = port->pctl.csr; + + /* Side-Band Addressing? */ + pctl.pctl_r_bits.apctl_v_agp_sba_en = agp->mode.bits.sba; + + /* AGP Rate? */ + pctl.pctl_r_bits.apctl_v_agp_rate = 0; /* 1x */ + if (agp->mode.bits.rate & 2) + pctl.pctl_r_bits.apctl_v_agp_rate = 1; /* 2x */ +#if 0 + if (agp->mode.bits.rate & 4) + pctl.pctl_r_bits.apctl_v_agp_rate = 2; /* 4x */ +#endif + + /* RQ Depth? */ + pctl.pctl_r_bits.apctl_v_agp_hp_rd = 2; + pctl.pctl_r_bits.apctl_v_agp_lp_rd = 7; + + /* + * AGP Enable. + */ + pctl.pctl_r_bits.apctl_v_agp_en = agp->mode.bits.enable; + + /* Tell the user. */ + printk("Enabling AGP: %dX%s\n", + 1 << pctl.pctl_r_bits.apctl_v_agp_rate, + pctl.pctl_r_bits.apctl_v_agp_sba_en ? " - SBA" : ""); + + /* Write it. */ + port->pctl.csr = pctl.pctl_q_whole; + + /* And wait at least 5000 66MHz cycles (per Titan spec). */ + udelay(100); + + return 0; +} + +static int +titan_agp_bind_memory(alpha_agp_info *agp, off_t pg_start, struct agp_memory *mem) +{ + struct titan_agp_aperture *aper = agp->aperture.sysdata; + return iommu_bind(aper->arena, aper->pg_start + pg_start, + mem->page_count, mem->pages); +} + +static int +titan_agp_unbind_memory(alpha_agp_info *agp, off_t pg_start, struct agp_memory *mem) +{ + struct titan_agp_aperture *aper = agp->aperture.sysdata; + return iommu_unbind(aper->arena, aper->pg_start + pg_start, + mem->page_count); +} + +static unsigned long +titan_agp_translate(alpha_agp_info *agp, dma_addr_t addr) +{ + struct titan_agp_aperture *aper = agp->aperture.sysdata; + unsigned long baddr = addr - aper->arena->dma_base; + unsigned long pte; + + if (addr < agp->aperture.bus_base || + addr >= agp->aperture.bus_base + agp->aperture.size) { + printk("%s: addr out of range\n", __func__); + return -EINVAL; + } + + pte = aper->arena->ptes[baddr >> PAGE_SHIFT]; + if (!(pte & 1)) { + printk("%s: pte not valid\n", __func__); + return -EINVAL; + } + + return (pte >> 1) << PAGE_SHIFT; +} + +struct alpha_agp_ops titan_agp_ops = +{ + .setup = titan_agp_setup, + .cleanup = titan_agp_cleanup, + .configure = titan_agp_configure, + .bind = titan_agp_bind_memory, + .unbind = titan_agp_unbind_memory, + .translate = titan_agp_translate +}; + +alpha_agp_info * +titan_agp_info(void) +{ + alpha_agp_info *agp; + struct pci_controller *hose; + titan_pachip_port *port; + int hosenum = -1; + union TPAchipPCTL pctl; + + /* + * Find the AGP port. + */ + port = &TITAN_pachip0->a_port; + if (titan_query_agp(port)) + hosenum = 2; + if (hosenum < 0 && + titan_pchip1_present && + titan_query_agp(port = &TITAN_pachip1->a_port)) + hosenum = 3; + + /* + * Find the hose the port is on. + */ + for (hose = hose_head; hose; hose = hose->next) + if (hose->index == hosenum) + break; + + if (!hose || !hose->sg_pci) + return NULL; + + /* + * Allocate the info structure. + */ + agp = kmalloc(sizeof(*agp), GFP_KERNEL); + if (!agp) + return NULL; + + /* + * Fill it in. + */ + agp->hose = hose; + agp->private = port; + agp->ops = &titan_agp_ops; + + /* + * Aperture - not configured until ops.setup(). + * + * FIXME - should we go ahead and allocate it here? + */ + agp->aperture.bus_base = 0; + agp->aperture.size = 0; + agp->aperture.sysdata = NULL; + + /* + * Capabilities. + */ + agp->capability.lw = 0; + agp->capability.bits.rate = 3; /* 2x, 1x */ + agp->capability.bits.sba = 1; + agp->capability.bits.rq = 7; /* 8 - 1 */ + + /* + * Mode. + */ + pctl.pctl_q_whole = port->pctl.csr; + agp->mode.lw = 0; + agp->mode.bits.rate = 1 << pctl.pctl_r_bits.apctl_v_agp_rate; + agp->mode.bits.sba = pctl.pctl_r_bits.apctl_v_agp_sba_en; + agp->mode.bits.rq = 7; /* RQ Depth? */ + agp->mode.bits.enable = pctl.pctl_r_bits.apctl_v_agp_en; + + return agp; +} diff --git a/arch/alpha/kernel/core_tsunami.c b/arch/alpha/kernel/core_tsunami.c new file mode 100644 index 00000000..61893d7b --- /dev/null +++ b/arch/alpha/kernel/core_tsunami.c @@ -0,0 +1,481 @@ +/* + * linux/arch/alpha/kernel/core_tsunami.c + * + * Based on code written by David A. Rusling (david.rusling@reo.mts.dec.com). + * + * Code common to all TSUNAMI core logic chips. + */ + +#define __EXTERN_INLINE inline +#include <asm/io.h> +#include <asm/core_tsunami.h> +#undef __EXTERN_INLINE + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/bootmem.h> + +#include <asm/ptrace.h> +#include <asm/smp.h> +#include <asm/vga.h> + +#include "proto.h" +#include "pci_impl.h" + +/* Save Tsunami configuration data as the console had it set up. */ + +struct +{ + unsigned long wsba[4]; + unsigned long wsm[4]; + unsigned long tba[4]; +} saved_config[2] __attribute__((common)); + +/* + * NOTE: Herein lie back-to-back mb instructions. They are magic. + * One plausible explanation is that the I/O controller does not properly + * handle the system transaction. Another involves timing. Ho hum. + */ + +/* + * BIOS32-style PCI interface: + */ + +#define DEBUG_CONFIG 0 + +#if DEBUG_CONFIG +# define DBG_CFG(args) printk args +#else +# define DBG_CFG(args) +#endif + + +/* + * Given a bus, device, and function number, compute resulting + * configuration space address + * accordingly. It is therefore not safe to have concurrent + * invocations to configuration space access routines, but there + * really shouldn't be any need for this. + * + * Note that all config space accesses use Type 1 address format. + * + * Note also that type 1 is determined by non-zero bus number. + * + * Type 1: + * + * 3 3|3 3 2 2|2 2 2 2|2 2 2 2|1 1 1 1|1 1 1 1|1 1 + * 3 2|1 0 9 8|7 6 5 4|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | | | | | | | | | |B|B|B|B|B|B|B|B|D|D|D|D|D|F|F|F|R|R|R|R|R|R|0|1| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * 31:24 reserved + * 23:16 bus number (8 bits = 128 possible buses) + * 15:11 Device number (5 bits) + * 10:8 function number + * 7:2 register number + * + * Notes: + * The function number selects which function of a multi-function device + * (e.g., SCSI and Ethernet). + * + * The register selects a DWORD (32 bit) register offset. Hence it + * doesn't get shifted by 2 bits as we want to "drop" the bottom two + * bits. + */ + +static int +mk_conf_addr(struct pci_bus *pbus, unsigned int device_fn, int where, + unsigned long *pci_addr, unsigned char *type1) +{ + struct pci_controller *hose = pbus->sysdata; + unsigned long addr; + u8 bus = pbus->number; + + DBG_CFG(("mk_conf_addr(bus=%d ,device_fn=0x%x, where=0x%x, " + "pci_addr=0x%p, type1=0x%p)\n", + bus, device_fn, where, pci_addr, type1)); + + if (!pbus->parent) /* No parent means peer PCI bus. */ + bus = 0; + *type1 = (bus != 0); + + addr = (bus << 16) | (device_fn << 8) | where; + addr |= hose->config_space_base; + + *pci_addr = addr; + DBG_CFG(("mk_conf_addr: returning pci_addr 0x%lx\n", addr)); + return 0; +} + +static int +tsunami_read_config(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 *value) +{ + unsigned long addr; + unsigned char type1; + + if (mk_conf_addr(bus, devfn, where, &addr, &type1)) + return PCIBIOS_DEVICE_NOT_FOUND; + + switch (size) { + case 1: + *value = __kernel_ldbu(*(vucp)addr); + break; + case 2: + *value = __kernel_ldwu(*(vusp)addr); + break; + case 4: + *value = *(vuip)addr; + break; + } + + return PCIBIOS_SUCCESSFUL; +} + +static int +tsunami_write_config(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 value) +{ + unsigned long addr; + unsigned char type1; + + if (mk_conf_addr(bus, devfn, where, &addr, &type1)) + return PCIBIOS_DEVICE_NOT_FOUND; + + switch (size) { + case 1: + __kernel_stb(value, *(vucp)addr); + mb(); + __kernel_ldbu(*(vucp)addr); + break; + case 2: + __kernel_stw(value, *(vusp)addr); + mb(); + __kernel_ldwu(*(vusp)addr); + break; + case 4: + *(vuip)addr = value; + mb(); + *(vuip)addr; + break; + } + + return PCIBIOS_SUCCESSFUL; +} + +struct pci_ops tsunami_pci_ops = +{ + .read = tsunami_read_config, + .write = tsunami_write_config, +}; + +void +tsunami_pci_tbi(struct pci_controller *hose, dma_addr_t start, dma_addr_t end) +{ + tsunami_pchip *pchip = hose->index ? TSUNAMI_pchip1 : TSUNAMI_pchip0; + volatile unsigned long *csr; + unsigned long value; + + /* We can invalidate up to 8 tlb entries in a go. The flush + matches against <31:16> in the pci address. */ + csr = &pchip->tlbia.csr; + if (((start ^ end) & 0xffff0000) == 0) + csr = &pchip->tlbiv.csr; + + /* For TBIA, it doesn't matter what value we write. For TBI, + it's the shifted tag bits. */ + value = (start & 0xffff0000) >> 12; + + *csr = value; + mb(); + *csr; +} + +#ifdef NXM_MACHINE_CHECKS_ON_TSUNAMI +static long __init +tsunami_probe_read(volatile unsigned long *vaddr) +{ + long dont_care, probe_result; + int cpu = smp_processor_id(); + int s = swpipl(IPL_MCHECK - 1); + + mcheck_taken(cpu) = 0; + mcheck_expected(cpu) = 1; + mb(); + dont_care = *vaddr; + draina(); + mcheck_expected(cpu) = 0; + probe_result = !mcheck_taken(cpu); + mcheck_taken(cpu) = 0; + setipl(s); + + printk("dont_care == 0x%lx\n", dont_care); + + return probe_result; +} + +static long __init +tsunami_probe_write(volatile unsigned long *vaddr) +{ + long true_contents, probe_result = 1; + + TSUNAMI_cchip->misc.csr |= (1L << 28); /* clear NXM... */ + true_contents = *vaddr; + *vaddr = 0; + draina(); + if (TSUNAMI_cchip->misc.csr & (1L << 28)) { + int source = (TSUNAMI_cchip->misc.csr >> 29) & 7; + TSUNAMI_cchip->misc.csr |= (1L << 28); /* ...and unlock NXS. */ + probe_result = 0; + printk("tsunami_probe_write: unit %d at 0x%016lx\n", source, + (unsigned long)vaddr); + } + if (probe_result) + *vaddr = true_contents; + return probe_result; +} +#else +#define tsunami_probe_read(ADDR) 1 +#endif /* NXM_MACHINE_CHECKS_ON_TSUNAMI */ + +static void __init +tsunami_init_one_pchip(tsunami_pchip *pchip, int index) +{ + struct pci_controller *hose; + + if (tsunami_probe_read(&pchip->pctl.csr) == 0) + return; + + hose = alloc_pci_controller(); + if (index == 0) + pci_isa_hose = hose; + hose->io_space = alloc_resource(); + hose->mem_space = alloc_resource(); + + /* This is for userland consumption. For some reason, the 40-bit + PIO bias that we use in the kernel through KSEG didn't work for + the page table based user mappings. So make sure we get the + 43-bit PIO bias. */ + hose->sparse_mem_base = 0; + hose->sparse_io_base = 0; + hose->dense_mem_base + = (TSUNAMI_MEM(index) & 0xffffffffffL) | 0x80000000000L; + hose->dense_io_base + = (TSUNAMI_IO(index) & 0xffffffffffL) | 0x80000000000L; + + hose->config_space_base = TSUNAMI_CONF(index); + hose->index = index; + + hose->io_space->start = TSUNAMI_IO(index) - TSUNAMI_IO_BIAS; + hose->io_space->end = hose->io_space->start + TSUNAMI_IO_SPACE - 1; + hose->io_space->name = pci_io_names[index]; + hose->io_space->flags = IORESOURCE_IO; + + hose->mem_space->start = TSUNAMI_MEM(index) - TSUNAMI_MEM_BIAS; + hose->mem_space->end = hose->mem_space->start + 0xffffffff; + hose->mem_space->name = pci_mem_names[index]; + hose->mem_space->flags = IORESOURCE_MEM; + + if (request_resource(&ioport_resource, hose->io_space) < 0) + printk(KERN_ERR "Failed to request IO on hose %d\n", index); + if (request_resource(&iomem_resource, hose->mem_space) < 0) + printk(KERN_ERR "Failed to request MEM on hose %d\n", index); + + /* + * Save the existing PCI window translations. SRM will + * need them when we go to reboot. + */ + + saved_config[index].wsba[0] = pchip->wsba[0].csr; + saved_config[index].wsm[0] = pchip->wsm[0].csr; + saved_config[index].tba[0] = pchip->tba[0].csr; + + saved_config[index].wsba[1] = pchip->wsba[1].csr; + saved_config[index].wsm[1] = pchip->wsm[1].csr; + saved_config[index].tba[1] = pchip->tba[1].csr; + + saved_config[index].wsba[2] = pchip->wsba[2].csr; + saved_config[index].wsm[2] = pchip->wsm[2].csr; + saved_config[index].tba[2] = pchip->tba[2].csr; + + saved_config[index].wsba[3] = pchip->wsba[3].csr; + saved_config[index].wsm[3] = pchip->wsm[3].csr; + saved_config[index].tba[3] = pchip->tba[3].csr; + + /* + * Set up the PCI to main memory translation windows. + * + * Note: Window 3 is scatter-gather only + * + * Window 0 is scatter-gather 8MB at 8MB (for isa) + * Window 1 is scatter-gather (up to) 1GB at 1GB + * Window 2 is direct access 2GB at 2GB + * + * NOTE: we need the align_entry settings for Acer devices on ES40, + * specifically floppy and IDE when memory is larger than 2GB. + */ + hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0); + /* Initially set for 4 PTEs, but will be overridden to 64K for ISA. */ + hose->sg_isa->align_entry = 4; + + hose->sg_pci = iommu_arena_new(hose, 0x40000000, + size_for_memory(0x40000000), 0); + hose->sg_pci->align_entry = 4; /* Tsunami caches 4 PTEs at a time */ + + __direct_map_base = 0x80000000; + __direct_map_size = 0x80000000; + + pchip->wsba[0].csr = hose->sg_isa->dma_base | 3; + pchip->wsm[0].csr = (hose->sg_isa->size - 1) & 0xfff00000; + pchip->tba[0].csr = virt_to_phys(hose->sg_isa->ptes); + + pchip->wsba[1].csr = hose->sg_pci->dma_base | 3; + pchip->wsm[1].csr = (hose->sg_pci->size - 1) & 0xfff00000; + pchip->tba[1].csr = virt_to_phys(hose->sg_pci->ptes); + + pchip->wsba[2].csr = 0x80000000 | 1; + pchip->wsm[2].csr = (0x80000000 - 1) & 0xfff00000; + pchip->tba[2].csr = 0; + + pchip->wsba[3].csr = 0; + + /* Enable the Monster Window to make DAC pci64 possible. */ + pchip->pctl.csr |= pctl_m_mwin; + + tsunami_pci_tbi(hose, 0, -1); +} + + +void __iomem * +tsunami_ioportmap(unsigned long addr) +{ + FIXUP_IOADDR_VGA(addr); + return (void __iomem *)(addr + TSUNAMI_IO_BIAS); +} + +void __iomem * +tsunami_ioremap(unsigned long addr, unsigned long size) +{ + FIXUP_MEMADDR_VGA(addr); + return (void __iomem *)(addr + TSUNAMI_MEM_BIAS); +} + +#ifndef CONFIG_ALPHA_GENERIC +EXPORT_SYMBOL(tsunami_ioportmap); +EXPORT_SYMBOL(tsunami_ioremap); +#endif + +void __init +tsunami_init_arch(void) +{ +#ifdef NXM_MACHINE_CHECKS_ON_TSUNAMI + unsigned long tmp; + + /* Ho hum.. init_arch is called before init_IRQ, but we need to be + able to handle machine checks. So install the handler now. */ + wrent(entInt, 0); + + /* NXMs just don't matter to Tsunami--unless they make it + choke completely. */ + tmp = (unsigned long)(TSUNAMI_cchip - 1); + printk("%s: probing bogus address: 0x%016lx\n", __func__, bogus_addr); + printk("\tprobe %s\n", + tsunami_probe_write((unsigned long *)bogus_addr) + ? "succeeded" : "failed"); +#endif /* NXM_MACHINE_CHECKS_ON_TSUNAMI */ + +#if 0 + printk("%s: CChip registers:\n", __func__); + printk("%s: CSR_CSC 0x%lx\n", __func__, TSUNAMI_cchip->csc.csr); + printk("%s: CSR_MTR 0x%lx\n", __func__, TSUNAMI_cchip.mtr.csr); + printk("%s: CSR_MISC 0x%lx\n", __func__, TSUNAMI_cchip->misc.csr); + printk("%s: CSR_DIM0 0x%lx\n", __func__, TSUNAMI_cchip->dim0.csr); + printk("%s: CSR_DIM1 0x%lx\n", __func__, TSUNAMI_cchip->dim1.csr); + printk("%s: CSR_DIR0 0x%lx\n", __func__, TSUNAMI_cchip->dir0.csr); + printk("%s: CSR_DIR1 0x%lx\n", __func__, TSUNAMI_cchip->dir1.csr); + printk("%s: CSR_DRIR 0x%lx\n", __func__, TSUNAMI_cchip->drir.csr); + + printk("%s: DChip registers:\n"); + printk("%s: CSR_DSC 0x%lx\n", __func__, TSUNAMI_dchip->dsc.csr); + printk("%s: CSR_STR 0x%lx\n", __func__, TSUNAMI_dchip->str.csr); + printk("%s: CSR_DREV 0x%lx\n", __func__, TSUNAMI_dchip->drev.csr); +#endif + /* With multiple PCI busses, we play with I/O as physical addrs. */ + ioport_resource.end = ~0UL; + + /* Find how many hoses we have, and initialize them. TSUNAMI + and TYPHOON can have 2, but might only have 1 (DS10). */ + + tsunami_init_one_pchip(TSUNAMI_pchip0, 0); + if (TSUNAMI_cchip->csc.csr & 1L<<14) + tsunami_init_one_pchip(TSUNAMI_pchip1, 1); + + /* Check for graphic console location (if any). */ + find_console_vga_hose(); +} + +static void +tsunami_kill_one_pchip(tsunami_pchip *pchip, int index) +{ + pchip->wsba[0].csr = saved_config[index].wsba[0]; + pchip->wsm[0].csr = saved_config[index].wsm[0]; + pchip->tba[0].csr = saved_config[index].tba[0]; + + pchip->wsba[1].csr = saved_config[index].wsba[1]; + pchip->wsm[1].csr = saved_config[index].wsm[1]; + pchip->tba[1].csr = saved_config[index].tba[1]; + + pchip->wsba[2].csr = saved_config[index].wsba[2]; + pchip->wsm[2].csr = saved_config[index].wsm[2]; + pchip->tba[2].csr = saved_config[index].tba[2]; + + pchip->wsba[3].csr = saved_config[index].wsba[3]; + pchip->wsm[3].csr = saved_config[index].wsm[3]; + pchip->tba[3].csr = saved_config[index].tba[3]; +} + +void +tsunami_kill_arch(int mode) +{ + tsunami_kill_one_pchip(TSUNAMI_pchip0, 0); + if (TSUNAMI_cchip->csc.csr & 1L<<14) + tsunami_kill_one_pchip(TSUNAMI_pchip1, 1); +} + +static inline void +tsunami_pci_clr_err_1(tsunami_pchip *pchip) +{ + pchip->perror.csr; + pchip->perror.csr = 0x040; + mb(); + pchip->perror.csr; +} + +static inline void +tsunami_pci_clr_err(void) +{ + tsunami_pci_clr_err_1(TSUNAMI_pchip0); + + /* TSUNAMI and TYPHOON can have 2, but might only have 1 (DS10) */ + if (TSUNAMI_cchip->csc.csr & 1L<<14) + tsunami_pci_clr_err_1(TSUNAMI_pchip1); +} + +void +tsunami_machine_check(unsigned long vector, unsigned long la_ptr) +{ + /* Clear error before any reporting. */ + mb(); + mb(); /* magic */ + draina(); + tsunami_pci_clr_err(); + wrmces(0x7); + mb(); + + process_mcheck_info(vector, la_ptr, "TSUNAMI", + mcheck_expected(smp_processor_id())); +} diff --git a/arch/alpha/kernel/core_wildfire.c b/arch/alpha/kernel/core_wildfire.c new file mode 100644 index 00000000..7e072443 --- /dev/null +++ b/arch/alpha/kernel/core_wildfire.c @@ -0,0 +1,657 @@ +/* + * linux/arch/alpha/kernel/core_wildfire.c + * + * Wildfire support. + * + * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE + */ + +#define __EXTERN_INLINE inline +#include <asm/io.h> +#include <asm/core_wildfire.h> +#undef __EXTERN_INLINE + +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/sched.h> +#include <linux/init.h> + +#include <asm/ptrace.h> +#include <asm/smp.h> + +#include "proto.h" +#include "pci_impl.h" + +#define DEBUG_CONFIG 0 +#define DEBUG_DUMP_REGS 0 +#define DEBUG_DUMP_CONFIG 1 + +#if DEBUG_CONFIG +# define DBG_CFG(args) printk args +#else +# define DBG_CFG(args) +#endif + +#if DEBUG_DUMP_REGS +static void wildfire_dump_pci_regs(int qbbno, int hoseno); +static void wildfire_dump_pca_regs(int qbbno, int pcano); +static void wildfire_dump_qsa_regs(int qbbno); +static void wildfire_dump_qsd_regs(int qbbno); +static void wildfire_dump_iop_regs(int qbbno); +static void wildfire_dump_gp_regs(int qbbno); +#endif +#if DEBUG_DUMP_CONFIG +static void wildfire_dump_hardware_config(void); +#endif + +unsigned char wildfire_hard_qbb_map[WILDFIRE_MAX_QBB]; +unsigned char wildfire_soft_qbb_map[WILDFIRE_MAX_QBB]; +#define QBB_MAP_EMPTY 0xff + +unsigned long wildfire_hard_qbb_mask; +unsigned long wildfire_soft_qbb_mask; +unsigned long wildfire_gp_mask; +unsigned long wildfire_hs_mask; +unsigned long wildfire_iop_mask; +unsigned long wildfire_ior_mask; +unsigned long wildfire_pca_mask; +unsigned long wildfire_cpu_mask; +unsigned long wildfire_mem_mask; + +void __init +wildfire_init_hose(int qbbno, int hoseno) +{ + struct pci_controller *hose; + wildfire_pci *pci; + + hose = alloc_pci_controller(); + hose->io_space = alloc_resource(); + hose->mem_space = alloc_resource(); + + /* This is for userland consumption. */ + hose->sparse_mem_base = 0; + hose->sparse_io_base = 0; + hose->dense_mem_base = WILDFIRE_MEM(qbbno, hoseno); + hose->dense_io_base = WILDFIRE_IO(qbbno, hoseno); + + hose->config_space_base = WILDFIRE_CONF(qbbno, hoseno); + hose->index = (qbbno << 3) + hoseno; + + hose->io_space->start = WILDFIRE_IO(qbbno, hoseno) - WILDFIRE_IO_BIAS; + hose->io_space->end = hose->io_space->start + WILDFIRE_IO_SPACE - 1; + hose->io_space->name = pci_io_names[hoseno]; + hose->io_space->flags = IORESOURCE_IO; + + hose->mem_space->start = WILDFIRE_MEM(qbbno, hoseno)-WILDFIRE_MEM_BIAS; + hose->mem_space->end = hose->mem_space->start + 0xffffffff; + hose->mem_space->name = pci_mem_names[hoseno]; + hose->mem_space->flags = IORESOURCE_MEM; + + if (request_resource(&ioport_resource, hose->io_space) < 0) + printk(KERN_ERR "Failed to request IO on qbb %d hose %d\n", + qbbno, hoseno); + if (request_resource(&iomem_resource, hose->mem_space) < 0) + printk(KERN_ERR "Failed to request MEM on qbb %d hose %d\n", + qbbno, hoseno); + +#if DEBUG_DUMP_REGS + wildfire_dump_pci_regs(qbbno, hoseno); +#endif + + /* + * Set up the PCI to main memory translation windows. + * + * Note: Window 3 is scatter-gather only + * + * Window 0 is scatter-gather 8MB at 8MB (for isa) + * Window 1 is direct access 1GB at 1GB + * Window 2 is direct access 1GB at 2GB + * Window 3 is scatter-gather 128MB at 3GB + * ??? We ought to scale window 3 memory. + * + */ + hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0); + hose->sg_pci = iommu_arena_new(hose, 0xc0000000, 0x08000000, 0); + + pci = WILDFIRE_pci(qbbno, hoseno); + + pci->pci_window[0].wbase.csr = hose->sg_isa->dma_base | 3; + pci->pci_window[0].wmask.csr = (hose->sg_isa->size - 1) & 0xfff00000; + pci->pci_window[0].tbase.csr = virt_to_phys(hose->sg_isa->ptes); + + pci->pci_window[1].wbase.csr = 0x40000000 | 1; + pci->pci_window[1].wmask.csr = (0x40000000 -1) & 0xfff00000; + pci->pci_window[1].tbase.csr = 0; + + pci->pci_window[2].wbase.csr = 0x80000000 | 1; + pci->pci_window[2].wmask.csr = (0x40000000 -1) & 0xfff00000; + pci->pci_window[2].tbase.csr = 0x40000000; + + pci->pci_window[3].wbase.csr = hose->sg_pci->dma_base | 3; + pci->pci_window[3].wmask.csr = (hose->sg_pci->size - 1) & 0xfff00000; + pci->pci_window[3].tbase.csr = virt_to_phys(hose->sg_pci->ptes); + + wildfire_pci_tbi(hose, 0, 0); /* Flush TLB at the end. */ +} + +void __init +wildfire_init_pca(int qbbno, int pcano) +{ + + /* Test for PCA existence first. */ + if (!WILDFIRE_PCA_EXISTS(qbbno, pcano)) + return; + +#if DEBUG_DUMP_REGS + wildfire_dump_pca_regs(qbbno, pcano); +#endif + + /* Do both hoses of the PCA. */ + wildfire_init_hose(qbbno, (pcano << 1) + 0); + wildfire_init_hose(qbbno, (pcano << 1) + 1); +} + +void __init +wildfire_init_qbb(int qbbno) +{ + int pcano; + + /* Test for QBB existence first. */ + if (!WILDFIRE_QBB_EXISTS(qbbno)) + return; + +#if DEBUG_DUMP_REGS + wildfire_dump_qsa_regs(qbbno); + wildfire_dump_qsd_regs(qbbno); + wildfire_dump_iop_regs(qbbno); + wildfire_dump_gp_regs(qbbno); +#endif + + /* Init all PCAs here. */ + for (pcano = 0; pcano < WILDFIRE_PCA_PER_QBB; pcano++) { + wildfire_init_pca(qbbno, pcano); + } +} + +void __init +wildfire_hardware_probe(void) +{ + unsigned long temp; + unsigned int hard_qbb, soft_qbb; + wildfire_fast_qsd *fast = WILDFIRE_fast_qsd(); + wildfire_qsd *qsd; + wildfire_qsa *qsa; + wildfire_iop *iop; + wildfire_gp *gp; + wildfire_ne *ne; + wildfire_fe *fe; + int i; + + temp = fast->qsd_whami.csr; +#if 0 + printk(KERN_ERR "fast QSD_WHAMI at base %p is 0x%lx\n", fast, temp); +#endif + + hard_qbb = (temp >> 8) & 7; + soft_qbb = (temp >> 4) & 7; + + /* Init the HW configuration variables. */ + wildfire_hard_qbb_mask = (1 << hard_qbb); + wildfire_soft_qbb_mask = (1 << soft_qbb); + + wildfire_gp_mask = 0; + wildfire_hs_mask = 0; + wildfire_iop_mask = 0; + wildfire_ior_mask = 0; + wildfire_pca_mask = 0; + + wildfire_cpu_mask = 0; + wildfire_mem_mask = 0; + + memset(wildfire_hard_qbb_map, QBB_MAP_EMPTY, WILDFIRE_MAX_QBB); + memset(wildfire_soft_qbb_map, QBB_MAP_EMPTY, WILDFIRE_MAX_QBB); + + /* First, determine which QBBs are present. */ + qsa = WILDFIRE_qsa(soft_qbb); + + temp = qsa->qsa_qbb_id.csr; +#if 0 + printk(KERN_ERR "QSA_QBB_ID at base %p is 0x%lx\n", qsa, temp); +#endif + + if (temp & 0x40) /* Is there an HS? */ + wildfire_hs_mask = 1; + + if (temp & 0x20) { /* Is there a GP? */ + gp = WILDFIRE_gp(soft_qbb); + temp = 0; + for (i = 0; i < 4; i++) { + temp |= gp->gpa_qbb_map[i].csr << (i * 8); +#if 0 + printk(KERN_ERR "GPA_QBB_MAP[%d] at base %p is 0x%lx\n", + i, gp, temp); +#endif + } + + for (hard_qbb = 0; hard_qbb < WILDFIRE_MAX_QBB; hard_qbb++) { + if (temp & 8) { /* Is there a QBB? */ + soft_qbb = temp & 7; + wildfire_hard_qbb_mask |= (1 << hard_qbb); + wildfire_soft_qbb_mask |= (1 << soft_qbb); + } + temp >>= 4; + } + wildfire_gp_mask = wildfire_soft_qbb_mask; + } + + /* Next determine each QBBs resources. */ + for (soft_qbb = 0; soft_qbb < WILDFIRE_MAX_QBB; soft_qbb++) { + if (WILDFIRE_QBB_EXISTS(soft_qbb)) { + qsd = WILDFIRE_qsd(soft_qbb); + temp = qsd->qsd_whami.csr; +#if 0 + printk(KERN_ERR "QSD_WHAMI at base %p is 0x%lx\n", qsd, temp); +#endif + hard_qbb = (temp >> 8) & 7; + wildfire_hard_qbb_map[hard_qbb] = soft_qbb; + wildfire_soft_qbb_map[soft_qbb] = hard_qbb; + + qsa = WILDFIRE_qsa(soft_qbb); + temp = qsa->qsa_qbb_pop[0].csr; +#if 0 + printk(KERN_ERR "QSA_QBB_POP_0 at base %p is 0x%lx\n", qsa, temp); +#endif + wildfire_cpu_mask |= ((temp >> 0) & 0xf) << (soft_qbb << 2); + wildfire_mem_mask |= ((temp >> 4) & 0xf) << (soft_qbb << 2); + + temp = qsa->qsa_qbb_pop[1].csr; +#if 0 + printk(KERN_ERR "QSA_QBB_POP_1 at base %p is 0x%lx\n", qsa, temp); +#endif + wildfire_iop_mask |= (1 << soft_qbb); + wildfire_ior_mask |= ((temp >> 4) & 0xf) << (soft_qbb << 2); + + temp = qsa->qsa_qbb_id.csr; +#if 0 + printk(KERN_ERR "QSA_QBB_ID at %p is 0x%lx\n", qsa, temp); +#endif + if (temp & 0x20) + wildfire_gp_mask |= (1 << soft_qbb); + + /* Probe for PCA existence here. */ + for (i = 0; i < WILDFIRE_PCA_PER_QBB; i++) { + iop = WILDFIRE_iop(soft_qbb); + ne = WILDFIRE_ne(soft_qbb, i); + fe = WILDFIRE_fe(soft_qbb, i); + + if ((iop->iop_hose[i].init.csr & 1) == 1 && + ((ne->ne_what_am_i.csr & 0xf00000300UL) == 0x100000300UL) && + ((fe->fe_what_am_i.csr & 0xf00000300UL) == 0x100000200UL)) + { + wildfire_pca_mask |= 1 << ((soft_qbb << 2) + i); + } + } + + } + } +#if DEBUG_DUMP_CONFIG + wildfire_dump_hardware_config(); +#endif +} + +void __init +wildfire_init_arch(void) +{ + int qbbno; + + /* With multiple PCI buses, we play with I/O as physical addrs. */ + ioport_resource.end = ~0UL; + + + /* Probe the hardware for info about configuration. */ + wildfire_hardware_probe(); + + /* Now init all the found QBBs. */ + for (qbbno = 0; qbbno < WILDFIRE_MAX_QBB; qbbno++) { + wildfire_init_qbb(qbbno); + } + + /* Normal direct PCI DMA mapping. */ + __direct_map_base = 0x40000000UL; + __direct_map_size = 0x80000000UL; +} + +void +wildfire_machine_check(unsigned long vector, unsigned long la_ptr) +{ + mb(); + mb(); /* magic */ + draina(); + /* FIXME: clear pci errors */ + wrmces(0x7); + mb(); + + process_mcheck_info(vector, la_ptr, "WILDFIRE", + mcheck_expected(smp_processor_id())); +} + +void +wildfire_kill_arch(int mode) +{ +} + +void +wildfire_pci_tbi(struct pci_controller *hose, dma_addr_t start, dma_addr_t end) +{ + int qbbno = hose->index >> 3; + int hoseno = hose->index & 7; + wildfire_pci *pci = WILDFIRE_pci(qbbno, hoseno); + + mb(); + pci->pci_flush_tlb.csr; /* reading does the trick */ +} + +static int +mk_conf_addr(struct pci_bus *pbus, unsigned int device_fn, int where, + unsigned long *pci_addr, unsigned char *type1) +{ + struct pci_controller *hose = pbus->sysdata; + unsigned long addr; + u8 bus = pbus->number; + + DBG_CFG(("mk_conf_addr(bus=%d ,device_fn=0x%x, where=0x%x, " + "pci_addr=0x%p, type1=0x%p)\n", + bus, device_fn, where, pci_addr, type1)); + + if (!pbus->parent) /* No parent means peer PCI bus. */ + bus = 0; + *type1 = (bus != 0); + + addr = (bus << 16) | (device_fn << 8) | where; + addr |= hose->config_space_base; + + *pci_addr = addr; + DBG_CFG(("mk_conf_addr: returning pci_addr 0x%lx\n", addr)); + return 0; +} + +static int +wildfire_read_config(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 *value) +{ + unsigned long addr; + unsigned char type1; + + if (mk_conf_addr(bus, devfn, where, &addr, &type1)) + return PCIBIOS_DEVICE_NOT_FOUND; + + switch (size) { + case 1: + *value = __kernel_ldbu(*(vucp)addr); + break; + case 2: + *value = __kernel_ldwu(*(vusp)addr); + break; + case 4: + *value = *(vuip)addr; + break; + } + + return PCIBIOS_SUCCESSFUL; +} + +static int +wildfire_write_config(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 value) +{ + unsigned long addr; + unsigned char type1; + + if (mk_conf_addr(bus, devfn, where, &addr, &type1)) + return PCIBIOS_DEVICE_NOT_FOUND; + + switch (size) { + case 1: + __kernel_stb(value, *(vucp)addr); + mb(); + __kernel_ldbu(*(vucp)addr); + break; + case 2: + __kernel_stw(value, *(vusp)addr); + mb(); + __kernel_ldwu(*(vusp)addr); + break; + case 4: + *(vuip)addr = value; + mb(); + *(vuip)addr; + break; + } + + return PCIBIOS_SUCCESSFUL; +} + +struct pci_ops wildfire_pci_ops = +{ + .read = wildfire_read_config, + .write = wildfire_write_config, +}; + + +/* + * NUMA Support + */ +int wildfire_pa_to_nid(unsigned long pa) +{ + return pa >> 36; +} + +int wildfire_cpuid_to_nid(int cpuid) +{ + /* assume 4 CPUs per node */ + return cpuid >> 2; +} + +unsigned long wildfire_node_mem_start(int nid) +{ + /* 64GB per node */ + return (unsigned long)nid * (64UL * 1024 * 1024 * 1024); +} + +unsigned long wildfire_node_mem_size(int nid) +{ + /* 64GB per node */ + return 64UL * 1024 * 1024 * 1024; +} + +#if DEBUG_DUMP_REGS + +static void __init +wildfire_dump_pci_regs(int qbbno, int hoseno) +{ + wildfire_pci *pci = WILDFIRE_pci(qbbno, hoseno); + int i; + + printk(KERN_ERR "PCI registers for QBB %d hose %d (%p)\n", + qbbno, hoseno, pci); + + printk(KERN_ERR " PCI_IO_ADDR_EXT: 0x%16lx\n", + pci->pci_io_addr_ext.csr); + printk(KERN_ERR " PCI_CTRL: 0x%16lx\n", pci->pci_ctrl.csr); + printk(KERN_ERR " PCI_ERR_SUM: 0x%16lx\n", pci->pci_err_sum.csr); + printk(KERN_ERR " PCI_ERR_ADDR: 0x%16lx\n", pci->pci_err_addr.csr); + printk(KERN_ERR " PCI_STALL_CNT: 0x%16lx\n", pci->pci_stall_cnt.csr); + printk(KERN_ERR " PCI_PEND_INT: 0x%16lx\n", pci->pci_pend_int.csr); + printk(KERN_ERR " PCI_SENT_INT: 0x%16lx\n", pci->pci_sent_int.csr); + + printk(KERN_ERR " DMA window registers for QBB %d hose %d (%p)\n", + qbbno, hoseno, pci); + for (i = 0; i < 4; i++) { + printk(KERN_ERR " window %d: 0x%16lx 0x%16lx 0x%16lx\n", i, + pci->pci_window[i].wbase.csr, + pci->pci_window[i].wmask.csr, + pci->pci_window[i].tbase.csr); + } + printk(KERN_ERR "\n"); +} + +static void __init +wildfire_dump_pca_regs(int qbbno, int pcano) +{ + wildfire_pca *pca = WILDFIRE_pca(qbbno, pcano); + int i; + + printk(KERN_ERR "PCA registers for QBB %d PCA %d (%p)\n", + qbbno, pcano, pca); + + printk(KERN_ERR " PCA_WHAT_AM_I: 0x%16lx\n", pca->pca_what_am_i.csr); + printk(KERN_ERR " PCA_ERR_SUM: 0x%16lx\n", pca->pca_err_sum.csr); + printk(KERN_ERR " PCA_PEND_INT: 0x%16lx\n", pca->pca_pend_int.csr); + printk(KERN_ERR " PCA_SENT_INT: 0x%16lx\n", pca->pca_sent_int.csr); + printk(KERN_ERR " PCA_STDIO_EL: 0x%16lx\n", + pca->pca_stdio_edge_level.csr); + + printk(KERN_ERR " PCA target registers for QBB %d PCA %d (%p)\n", + qbbno, pcano, pca); + for (i = 0; i < 4; i++) { + printk(KERN_ERR " target %d: 0x%16lx 0x%16lx\n", i, + pca->pca_int[i].target.csr, + pca->pca_int[i].enable.csr); + } + + printk(KERN_ERR "\n"); +} + +static void __init +wildfire_dump_qsa_regs(int qbbno) +{ + wildfire_qsa *qsa = WILDFIRE_qsa(qbbno); + int i; + + printk(KERN_ERR "QSA registers for QBB %d (%p)\n", qbbno, qsa); + + printk(KERN_ERR " QSA_QBB_ID: 0x%16lx\n", qsa->qsa_qbb_id.csr); + printk(KERN_ERR " QSA_PORT_ENA: 0x%16lx\n", qsa->qsa_port_ena.csr); + printk(KERN_ERR " QSA_REF_INT: 0x%16lx\n", qsa->qsa_ref_int.csr); + + for (i = 0; i < 5; i++) + printk(KERN_ERR " QSA_CONFIG_%d: 0x%16lx\n", + i, qsa->qsa_config[i].csr); + + for (i = 0; i < 2; i++) + printk(KERN_ERR " QSA_QBB_POP_%d: 0x%16lx\n", + i, qsa->qsa_qbb_pop[0].csr); + + printk(KERN_ERR "\n"); +} + +static void __init +wildfire_dump_qsd_regs(int qbbno) +{ + wildfire_qsd *qsd = WILDFIRE_qsd(qbbno); + + printk(KERN_ERR "QSD registers for QBB %d (%p)\n", qbbno, qsd); + + printk(KERN_ERR " QSD_WHAMI: 0x%16lx\n", qsd->qsd_whami.csr); + printk(KERN_ERR " QSD_REV: 0x%16lx\n", qsd->qsd_rev.csr); + printk(KERN_ERR " QSD_PORT_PRESENT: 0x%16lx\n", + qsd->qsd_port_present.csr); + printk(KERN_ERR " QSD_PORT_ACTUVE: 0x%16lx\n", + qsd->qsd_port_active.csr); + printk(KERN_ERR " QSD_FAULT_ENA: 0x%16lx\n", + qsd->qsd_fault_ena.csr); + printk(KERN_ERR " QSD_CPU_INT_ENA: 0x%16lx\n", + qsd->qsd_cpu_int_ena.csr); + printk(KERN_ERR " QSD_MEM_CONFIG: 0x%16lx\n", + qsd->qsd_mem_config.csr); + printk(KERN_ERR " QSD_ERR_SUM: 0x%16lx\n", + qsd->qsd_err_sum.csr); + + printk(KERN_ERR "\n"); +} + +static void __init +wildfire_dump_iop_regs(int qbbno) +{ + wildfire_iop *iop = WILDFIRE_iop(qbbno); + int i; + + printk(KERN_ERR "IOP registers for QBB %d (%p)\n", qbbno, iop); + + printk(KERN_ERR " IOA_CONFIG: 0x%16lx\n", iop->ioa_config.csr); + printk(KERN_ERR " IOD_CONFIG: 0x%16lx\n", iop->iod_config.csr); + printk(KERN_ERR " IOP_SWITCH_CREDITS: 0x%16lx\n", + iop->iop_switch_credits.csr); + printk(KERN_ERR " IOP_HOSE_CREDITS: 0x%16lx\n", + iop->iop_hose_credits.csr); + + for (i = 0; i < 4; i++) + printk(KERN_ERR " IOP_HOSE_%d_INIT: 0x%16lx\n", + i, iop->iop_hose[i].init.csr); + for (i = 0; i < 4; i++) + printk(KERN_ERR " IOP_DEV_INT_TARGET_%d: 0x%16lx\n", + i, iop->iop_dev_int[i].target.csr); + + printk(KERN_ERR "\n"); +} + +static void __init +wildfire_dump_gp_regs(int qbbno) +{ + wildfire_gp *gp = WILDFIRE_gp(qbbno); + int i; + + printk(KERN_ERR "GP registers for QBB %d (%p)\n", qbbno, gp); + for (i = 0; i < 4; i++) + printk(KERN_ERR " GPA_QBB_MAP_%d: 0x%16lx\n", + i, gp->gpa_qbb_map[i].csr); + + printk(KERN_ERR " GPA_MEM_POP_MAP: 0x%16lx\n", + gp->gpa_mem_pop_map.csr); + printk(KERN_ERR " GPA_SCRATCH: 0x%16lx\n", gp->gpa_scratch.csr); + printk(KERN_ERR " GPA_DIAG: 0x%16lx\n", gp->gpa_diag.csr); + printk(KERN_ERR " GPA_CONFIG_0: 0x%16lx\n", gp->gpa_config_0.csr); + printk(KERN_ERR " GPA_INIT_ID: 0x%16lx\n", gp->gpa_init_id.csr); + printk(KERN_ERR " GPA_CONFIG_2: 0x%16lx\n", gp->gpa_config_2.csr); + + printk(KERN_ERR "\n"); +} +#endif /* DUMP_REGS */ + +#if DEBUG_DUMP_CONFIG +static void __init +wildfire_dump_hardware_config(void) +{ + int i; + + printk(KERN_ERR "Probed Hardware Configuration\n"); + + printk(KERN_ERR " hard_qbb_mask: 0x%16lx\n", wildfire_hard_qbb_mask); + printk(KERN_ERR " soft_qbb_mask: 0x%16lx\n", wildfire_soft_qbb_mask); + + printk(KERN_ERR " gp_mask: 0x%16lx\n", wildfire_gp_mask); + printk(KERN_ERR " hs_mask: 0x%16lx\n", wildfire_hs_mask); + printk(KERN_ERR " iop_mask: 0x%16lx\n", wildfire_iop_mask); + printk(KERN_ERR " ior_mask: 0x%16lx\n", wildfire_ior_mask); + printk(KERN_ERR " pca_mask: 0x%16lx\n", wildfire_pca_mask); + + printk(KERN_ERR " cpu_mask: 0x%16lx\n", wildfire_cpu_mask); + printk(KERN_ERR " mem_mask: 0x%16lx\n", wildfire_mem_mask); + + printk(" hard_qbb_map: "); + for (i = 0; i < WILDFIRE_MAX_QBB; i++) + if (wildfire_hard_qbb_map[i] == QBB_MAP_EMPTY) + printk("--- "); + else + printk("%3d ", wildfire_hard_qbb_map[i]); + printk("\n"); + + printk(" soft_qbb_map: "); + for (i = 0; i < WILDFIRE_MAX_QBB; i++) + if (wildfire_soft_qbb_map[i] == QBB_MAP_EMPTY) + printk("--- "); + else + printk("%3d ", wildfire_soft_qbb_map[i]); + printk("\n"); +} +#endif /* DUMP_CONFIG */ diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S new file mode 100644 index 00000000..6d159cee --- /dev/null +++ b/arch/alpha/kernel/entry.S @@ -0,0 +1,929 @@ +/* + * arch/alpha/kernel/entry.S + * + * Kernel entry-points. + */ + +#include <asm/asm-offsets.h> +#include <asm/thread_info.h> +#include <asm/pal.h> +#include <asm/errno.h> +#include <asm/unistd.h> + + .text + .set noat + +/* Stack offsets. */ +#define SP_OFF 184 +#define SWITCH_STACK_SIZE 320 + +/* + * This defines the normal kernel pt-regs layout. + * + * regs 9-15 preserved by C code + * regs 16-18 saved by PAL-code + * regs 29-30 saved and set up by PAL-code + * JRP - Save regs 16-18 in a special area of the stack, so that + * the palcode-provided values are available to the signal handler. + */ + +#define SAVE_ALL \ + subq $sp, SP_OFF, $sp; \ + stq $0, 0($sp); \ + stq $1, 8($sp); \ + stq $2, 16($sp); \ + stq $3, 24($sp); \ + stq $4, 32($sp); \ + stq $28, 144($sp); \ + lda $2, alpha_mv; \ + stq $5, 40($sp); \ + stq $6, 48($sp); \ + stq $7, 56($sp); \ + stq $8, 64($sp); \ + stq $19, 72($sp); \ + stq $20, 80($sp); \ + stq $21, 88($sp); \ + ldq $2, HAE_CACHE($2); \ + stq $22, 96($sp); \ + stq $23, 104($sp); \ + stq $24, 112($sp); \ + stq $25, 120($sp); \ + stq $26, 128($sp); \ + stq $27, 136($sp); \ + stq $2, 152($sp); \ + stq $16, 160($sp); \ + stq $17, 168($sp); \ + stq $18, 176($sp) + +#define RESTORE_ALL \ + lda $19, alpha_mv; \ + ldq $0, 0($sp); \ + ldq $1, 8($sp); \ + ldq $2, 16($sp); \ + ldq $3, 24($sp); \ + ldq $21, 152($sp); \ + ldq $20, HAE_CACHE($19); \ + ldq $4, 32($sp); \ + ldq $5, 40($sp); \ + ldq $6, 48($sp); \ + ldq $7, 56($sp); \ + subq $20, $21, $20; \ + ldq $8, 64($sp); \ + beq $20, 99f; \ + ldq $20, HAE_REG($19); \ + stq $21, HAE_CACHE($19); \ + stq $21, 0($20); \ +99:; \ + ldq $19, 72($sp); \ + ldq $20, 80($sp); \ + ldq $21, 88($sp); \ + ldq $22, 96($sp); \ + ldq $23, 104($sp); \ + ldq $24, 112($sp); \ + ldq $25, 120($sp); \ + ldq $26, 128($sp); \ + ldq $27, 136($sp); \ + ldq $28, 144($sp); \ + addq $sp, SP_OFF, $sp + +/* + * Non-syscall kernel entry points. + */ + + .align 4 + .globl entInt + .ent entInt +entInt: + SAVE_ALL + lda $8, 0x3fff + lda $26, ret_from_sys_call + bic $sp, $8, $8 + mov $sp, $19 + jsr $31, do_entInt +.end entInt + + .align 4 + .globl entArith + .ent entArith +entArith: + SAVE_ALL + lda $8, 0x3fff + lda $26, ret_from_sys_call + bic $sp, $8, $8 + mov $sp, $18 + jsr $31, do_entArith +.end entArith + + .align 4 + .globl entMM + .ent entMM +entMM: + SAVE_ALL +/* save $9 - $15 so the inline exception code can manipulate them. */ + subq $sp, 56, $sp + stq $9, 0($sp) + stq $10, 8($sp) + stq $11, 16($sp) + stq $12, 24($sp) + stq $13, 32($sp) + stq $14, 40($sp) + stq $15, 48($sp) + addq $sp, 56, $19 +/* handle the fault */ + lda $8, 0x3fff + bic $sp, $8, $8 + jsr $26, do_page_fault +/* reload the registers after the exception code played. */ + ldq $9, 0($sp) + ldq $10, 8($sp) + ldq $11, 16($sp) + ldq $12, 24($sp) + ldq $13, 32($sp) + ldq $14, 40($sp) + ldq $15, 48($sp) + addq $sp, 56, $sp +/* finish up the syscall as normal. */ + br ret_from_sys_call +.end entMM + + .align 4 + .globl entIF + .ent entIF +entIF: + SAVE_ALL + lda $8, 0x3fff + lda $26, ret_from_sys_call + bic $sp, $8, $8 + mov $sp, $17 + jsr $31, do_entIF +.end entIF + + .align 4 + .globl entUna + .ent entUna +entUna: + lda $sp, -256($sp) + stq $0, 0($sp) + ldq $0, 256($sp) /* get PS */ + stq $1, 8($sp) + stq $2, 16($sp) + stq $3, 24($sp) + and $0, 8, $0 /* user mode? */ + stq $4, 32($sp) + bne $0, entUnaUser /* yup -> do user-level unaligned fault */ + stq $5, 40($sp) + stq $6, 48($sp) + stq $7, 56($sp) + stq $8, 64($sp) + stq $9, 72($sp) + stq $10, 80($sp) + stq $11, 88($sp) + stq $12, 96($sp) + stq $13, 104($sp) + stq $14, 112($sp) + stq $15, 120($sp) + /* 16-18 PAL-saved */ + stq $19, 152($sp) + stq $20, 160($sp) + stq $21, 168($sp) + stq $22, 176($sp) + stq $23, 184($sp) + stq $24, 192($sp) + stq $25, 200($sp) + stq $26, 208($sp) + stq $27, 216($sp) + stq $28, 224($sp) + mov $sp, $19 + stq $gp, 232($sp) + lda $8, 0x3fff + stq $31, 248($sp) + bic $sp, $8, $8 + jsr $26, do_entUna + ldq $0, 0($sp) + ldq $1, 8($sp) + ldq $2, 16($sp) + ldq $3, 24($sp) + ldq $4, 32($sp) + ldq $5, 40($sp) + ldq $6, 48($sp) + ldq $7, 56($sp) + ldq $8, 64($sp) + ldq $9, 72($sp) + ldq $10, 80($sp) + ldq $11, 88($sp) + ldq $12, 96($sp) + ldq $13, 104($sp) + ldq $14, 112($sp) + ldq $15, 120($sp) + /* 16-18 PAL-saved */ + ldq $19, 152($sp) + ldq $20, 160($sp) + ldq $21, 168($sp) + ldq $22, 176($sp) + ldq $23, 184($sp) + ldq $24, 192($sp) + ldq $25, 200($sp) + ldq $26, 208($sp) + ldq $27, 216($sp) + ldq $28, 224($sp) + ldq $gp, 232($sp) + lda $sp, 256($sp) + call_pal PAL_rti +.end entUna + + .align 4 + .ent entUnaUser +entUnaUser: + ldq $0, 0($sp) /* restore original $0 */ + lda $sp, 256($sp) /* pop entUna's stack frame */ + SAVE_ALL /* setup normal kernel stack */ + lda $sp, -56($sp) + stq $9, 0($sp) + stq $10, 8($sp) + stq $11, 16($sp) + stq $12, 24($sp) + stq $13, 32($sp) + stq $14, 40($sp) + stq $15, 48($sp) + lda $8, 0x3fff + addq $sp, 56, $19 + bic $sp, $8, $8 + jsr $26, do_entUnaUser + ldq $9, 0($sp) + ldq $10, 8($sp) + ldq $11, 16($sp) + ldq $12, 24($sp) + ldq $13, 32($sp) + ldq $14, 40($sp) + ldq $15, 48($sp) + lda $sp, 56($sp) + br ret_from_sys_call +.end entUnaUser + + .align 4 + .globl entDbg + .ent entDbg +entDbg: + SAVE_ALL + lda $8, 0x3fff + lda $26, ret_from_sys_call + bic $sp, $8, $8 + mov $sp, $16 + jsr $31, do_entDbg +.end entDbg + +/* + * The system call entry point is special. Most importantly, it looks + * like a function call to userspace as far as clobbered registers. We + * do preserve the argument registers (for syscall restarts) and $26 + * (for leaf syscall functions). + * + * So much for theory. We don't take advantage of this yet. + * + * Note that a0-a2 are not saved by PALcode as with the other entry points. + */ + + .align 4 + .globl entSys + .globl ret_from_sys_call + .ent entSys +entSys: + SAVE_ALL + lda $8, 0x3fff + bic $sp, $8, $8 + lda $4, NR_SYSCALLS($31) + stq $16, SP_OFF+24($sp) + lda $5, sys_call_table + lda $27, sys_ni_syscall + cmpult $0, $4, $4 + ldl $3, TI_FLAGS($8) + stq $17, SP_OFF+32($sp) + s8addq $0, $5, $5 + stq $18, SP_OFF+40($sp) + blbs $3, strace + beq $4, 1f + ldq $27, 0($5) +1: jsr $26, ($27), alpha_ni_syscall + ldgp $gp, 0($26) + blt $0, $syscall_error /* the call failed */ + stq $0, 0($sp) + stq $31, 72($sp) /* a3=0 => no error */ + + .align 4 +ret_from_sys_call: + cmovne $26, 0, $19 /* $19 = 0 => non-restartable */ + ldq $0, SP_OFF($sp) + and $0, 8, $0 + beq $0, ret_to_kernel +ret_to_user: + /* Make sure need_resched and sigpending don't change between + sampling and the rti. */ + lda $16, 7 + call_pal PAL_swpipl + ldl $5, TI_FLAGS($8) + and $5, _TIF_WORK_MASK, $2 + bne $2, work_pending +restore_all: + RESTORE_ALL + call_pal PAL_rti + +ret_to_kernel: + lda $16, 7 + call_pal PAL_swpipl + br restore_all + + .align 3 +$syscall_error: + /* + * Some system calls (e.g., ptrace) can return arbitrary + * values which might normally be mistaken as error numbers. + * Those functions must zero $0 (v0) directly in the stack + * frame to indicate that a negative return value wasn't an + * error number.. + */ + ldq $19, 0($sp) /* old syscall nr (zero if success) */ + beq $19, $ret_success + + ldq $20, 72($sp) /* .. and this a3 */ + subq $31, $0, $0 /* with error in v0 */ + addq $31, 1, $1 /* set a3 for errno return */ + stq $0, 0($sp) + mov $31, $26 /* tell "ret_from_sys_call" we can restart */ + stq $1, 72($sp) /* a3 for return */ + br ret_from_sys_call + +$ret_success: + stq $0, 0($sp) + stq $31, 72($sp) /* a3=0 => no error */ + br ret_from_sys_call +.end entSys + +/* + * Do all cleanup when returning from all interrupts and system calls. + * + * Arguments: + * $5: TI_FLAGS. + * $8: current. + * $19: The old syscall number, or zero if this is not a return + * from a syscall that errored and is possibly restartable. + * $20: The old a3 value + */ + + .align 4 + .ent work_pending +work_pending: + and $5, _TIF_NEED_RESCHED, $2 + beq $2, $work_notifysig + +$work_resched: + subq $sp, 16, $sp + stq $19, 0($sp) /* save syscall nr */ + stq $20, 8($sp) /* and error indication (a3) */ + jsr $26, schedule + ldq $19, 0($sp) + ldq $20, 8($sp) + addq $sp, 16, $sp + /* Make sure need_resched and sigpending don't change between + sampling and the rti. */ + lda $16, 7 + call_pal PAL_swpipl + ldl $5, TI_FLAGS($8) + and $5, _TIF_WORK_MASK, $2 + beq $2, restore_all + and $5, _TIF_NEED_RESCHED, $2 + bne $2, $work_resched + +$work_notifysig: + mov $sp, $16 + bsr $1, do_switch_stack + mov $sp, $17 + mov $5, $18 + mov $19, $9 /* save old syscall number */ + mov $20, $10 /* save old a3 */ + and $5, _TIF_SIGPENDING, $2 + cmovne $2, 0, $9 /* we don't want double syscall restarts */ + jsr $26, do_notify_resume + mov $9, $19 + mov $10, $20 + bsr $1, undo_switch_stack + br ret_to_user +.end work_pending + +/* + * PTRACE syscall handler + */ + + .align 4 + .ent strace +strace: + /* set up signal stack, call syscall_trace */ + bsr $1, do_switch_stack + jsr $26, syscall_trace + bsr $1, undo_switch_stack + + /* get the system call number and the arguments back.. */ + ldq $0, 0($sp) + ldq $16, SP_OFF+24($sp) + ldq $17, SP_OFF+32($sp) + ldq $18, SP_OFF+40($sp) + ldq $19, 72($sp) + ldq $20, 80($sp) + ldq $21, 88($sp) + + /* get the system call pointer.. */ + lda $1, NR_SYSCALLS($31) + lda $2, sys_call_table + lda $27, alpha_ni_syscall + cmpult $0, $1, $1 + s8addq $0, $2, $2 + beq $1, 1f + ldq $27, 0($2) +1: jsr $26, ($27), sys_gettimeofday +ret_from_straced: + ldgp $gp, 0($26) + + /* check return.. */ + blt $0, $strace_error /* the call failed */ + stq $31, 72($sp) /* a3=0 => no error */ +$strace_success: + stq $0, 0($sp) /* save return value */ + + bsr $1, do_switch_stack + jsr $26, syscall_trace + bsr $1, undo_switch_stack + br $31, ret_from_sys_call + + .align 3 +$strace_error: + ldq $19, 0($sp) /* old syscall nr (zero if success) */ + beq $19, $strace_success + ldq $20, 72($sp) /* .. and this a3 */ + + subq $31, $0, $0 /* with error in v0 */ + addq $31, 1, $1 /* set a3 for errno return */ + stq $0, 0($sp) + stq $1, 72($sp) /* a3 for return */ + + bsr $1, do_switch_stack + mov $19, $9 /* save old syscall number */ + mov $20, $10 /* save old a3 */ + jsr $26, syscall_trace + mov $9, $19 + mov $10, $20 + bsr $1, undo_switch_stack + + mov $31, $26 /* tell "ret_from_sys_call" we can restart */ + br ret_from_sys_call +.end strace + +/* + * Save and restore the switch stack -- aka the balance of the user context. + */ + + .align 4 + .ent do_switch_stack +do_switch_stack: + lda $sp, -SWITCH_STACK_SIZE($sp) + stq $9, 0($sp) + stq $10, 8($sp) + stq $11, 16($sp) + stq $12, 24($sp) + stq $13, 32($sp) + stq $14, 40($sp) + stq $15, 48($sp) + stq $26, 56($sp) + stt $f0, 64($sp) + stt $f1, 72($sp) + stt $f2, 80($sp) + stt $f3, 88($sp) + stt $f4, 96($sp) + stt $f5, 104($sp) + stt $f6, 112($sp) + stt $f7, 120($sp) + stt $f8, 128($sp) + stt $f9, 136($sp) + stt $f10, 144($sp) + stt $f11, 152($sp) + stt $f12, 160($sp) + stt $f13, 168($sp) + stt $f14, 176($sp) + stt $f15, 184($sp) + stt $f16, 192($sp) + stt $f17, 200($sp) + stt $f18, 208($sp) + stt $f19, 216($sp) + stt $f20, 224($sp) + stt $f21, 232($sp) + stt $f22, 240($sp) + stt $f23, 248($sp) + stt $f24, 256($sp) + stt $f25, 264($sp) + stt $f26, 272($sp) + stt $f27, 280($sp) + mf_fpcr $f0 # get fpcr + stt $f28, 288($sp) + stt $f29, 296($sp) + stt $f30, 304($sp) + stt $f0, 312($sp) # save fpcr in slot of $f31 + ldt $f0, 64($sp) # dont let "do_switch_stack" change fp state. + ret $31, ($1), 1 +.end do_switch_stack + + .align 4 + .ent undo_switch_stack +undo_switch_stack: + ldq $9, 0($sp) + ldq $10, 8($sp) + ldq $11, 16($sp) + ldq $12, 24($sp) + ldq $13, 32($sp) + ldq $14, 40($sp) + ldq $15, 48($sp) + ldq $26, 56($sp) + ldt $f30, 312($sp) # get saved fpcr + ldt $f0, 64($sp) + ldt $f1, 72($sp) + ldt $f2, 80($sp) + ldt $f3, 88($sp) + mt_fpcr $f30 # install saved fpcr + ldt $f4, 96($sp) + ldt $f5, 104($sp) + ldt $f6, 112($sp) + ldt $f7, 120($sp) + ldt $f8, 128($sp) + ldt $f9, 136($sp) + ldt $f10, 144($sp) + ldt $f11, 152($sp) + ldt $f12, 160($sp) + ldt $f13, 168($sp) + ldt $f14, 176($sp) + ldt $f15, 184($sp) + ldt $f16, 192($sp) + ldt $f17, 200($sp) + ldt $f18, 208($sp) + ldt $f19, 216($sp) + ldt $f20, 224($sp) + ldt $f21, 232($sp) + ldt $f22, 240($sp) + ldt $f23, 248($sp) + ldt $f24, 256($sp) + ldt $f25, 264($sp) + ldt $f26, 272($sp) + ldt $f27, 280($sp) + ldt $f28, 288($sp) + ldt $f29, 296($sp) + ldt $f30, 304($sp) + lda $sp, SWITCH_STACK_SIZE($sp) + ret $31, ($1), 1 +.end undo_switch_stack + +/* + * The meat of the context switch code. + */ + + .align 4 + .globl alpha_switch_to + .ent alpha_switch_to +alpha_switch_to: + .prologue 0 + bsr $1, do_switch_stack + call_pal PAL_swpctx + lda $8, 0x3fff + bsr $1, undo_switch_stack + bic $sp, $8, $8 + mov $17, $0 + ret +.end alpha_switch_to + +/* + * New processes begin life here. + */ + + .globl ret_from_fork + .align 4 + .ent ret_from_fork +ret_from_fork: + lda $26, ret_from_sys_call + mov $17, $16 + jmp $31, schedule_tail +.end ret_from_fork + +/* + * kernel_thread(fn, arg, clone_flags) + */ + .align 4 + .globl kernel_thread + .ent kernel_thread +kernel_thread: + /* We can be called from a module. */ + ldgp $gp, 0($27) + .prologue 1 + subq $sp, SP_OFF+6*8, $sp + br $1, 2f /* load start address */ + + /* We've now "returned" from a fake system call. */ + unop + blt $0, 1f /* error? */ + ldi $1, 0x3fff + beq $20, 1f /* parent or child? */ + + bic $sp, $1, $8 /* in child. */ + jsr $26, ($27) + ldgp $gp, 0($26) + mov $0, $16 + mov $31, $26 + jmp $31, sys_exit + +1: ret /* in parent. */ + + .align 4 +2: /* Fake a system call stack frame, as we can't do system calls + from kernel space. Note that we store FN and ARG as they + need to be set up in the child for the call. Also store $8 + and $26 for use in the parent. */ + stq $31, SP_OFF($sp) /* ps */ + stq $1, SP_OFF+8($sp) /* pc */ + stq $gp, SP_OFF+16($sp) /* gp */ + stq $16, 136($sp) /* $27; FN for child */ + stq $17, SP_OFF+24($sp) /* $16; ARG for child */ + stq $8, 64($sp) /* $8 */ + stq $26, 128($sp) /* $26 */ + /* Avoid the HAE being gratuitously wrong, to avoid restoring it. */ + ldq $2, alpha_mv+HAE_CACHE + stq $2, 152($sp) /* HAE */ + + /* Shuffle FLAGS to the front; add CLONE_VM. */ + ldi $1, CLONE_VM|CLONE_UNTRACED + or $18, $1, $16 + bsr $26, sys_clone + + /* We don't actually care for a3 success widgetry in the kernel. + Not for positive errno values. */ + stq $0, 0($sp) /* $0 */ + br ret_to_kernel +.end kernel_thread + +/* + * kernel_execve(path, argv, envp) + */ + .align 4 + .globl kernel_execve + .ent kernel_execve +kernel_execve: + /* We can be called from a module. */ + ldgp $gp, 0($27) + lda $sp, -(32+SIZEOF_PT_REGS+8)($sp) + .frame $sp, 32+SIZEOF_PT_REGS+8, $26, 0 + stq $26, 0($sp) + stq $16, 8($sp) + stq $17, 16($sp) + stq $18, 24($sp) + .prologue 1 + + lda $16, 32($sp) + lda $17, 0 + lda $18, SIZEOF_PT_REGS + bsr $26, memset !samegp + + /* Avoid the HAE being gratuitously wrong, which would cause us + to do the whole turn off interrupts thing and restore it. */ + ldq $2, alpha_mv+HAE_CACHE + stq $2, 152+32($sp) + + ldq $16, 8($sp) + ldq $17, 16($sp) + ldq $18, 24($sp) + lda $19, 32($sp) + bsr $26, do_execve !samegp + + ldq $26, 0($sp) + bne $0, 1f /* error! */ + + /* Move the temporary pt_regs struct from its current location + to the top of the kernel stack frame. See copy_thread for + details for a normal process. */ + lda $16, 0x4000 - SIZEOF_PT_REGS($8) + lda $17, 32($sp) + lda $18, SIZEOF_PT_REGS + bsr $26, memmove !samegp + + /* Take that over as our new stack frame and visit userland! */ + lda $sp, 0x4000 - SIZEOF_PT_REGS($8) + br $31, ret_from_sys_call + +1: lda $sp, 32+SIZEOF_PT_REGS+8($sp) + ret +.end kernel_execve + + +/* + * Special system calls. Most of these are special in that they either + * have to play switch_stack games or in some way use the pt_regs struct. + */ + .align 4 + .globl sys_fork + .ent sys_fork +sys_fork: + .prologue 0 + mov $sp, $21 + bsr $1, do_switch_stack + bis $31, SIGCHLD, $16 + mov $31, $17 + mov $31, $18 + mov $31, $19 + mov $31, $20 + jsr $26, alpha_clone + bsr $1, undo_switch_stack + ret +.end sys_fork + + .align 4 + .globl sys_clone + .ent sys_clone +sys_clone: + .prologue 0 + mov $sp, $21 + bsr $1, do_switch_stack + /* $16, $17, $18, $19, $20 come from the user. */ + jsr $26, alpha_clone + bsr $1, undo_switch_stack + ret +.end sys_clone + + .align 4 + .globl sys_vfork + .ent sys_vfork +sys_vfork: + .prologue 0 + mov $sp, $16 + bsr $1, do_switch_stack + jsr $26, alpha_vfork + bsr $1, undo_switch_stack + ret +.end sys_vfork + + .align 4 + .globl sys_sigreturn + .ent sys_sigreturn +sys_sigreturn: + .prologue 0 + lda $9, ret_from_straced + cmpult $26, $9, $9 + mov $sp, $17 + lda $18, -SWITCH_STACK_SIZE($sp) + lda $sp, -SWITCH_STACK_SIZE($sp) + jsr $26, do_sigreturn + bne $9, 1f + jsr $26, syscall_trace +1: br $1, undo_switch_stack + br ret_from_sys_call +.end sys_sigreturn + + .align 4 + .globl sys_rt_sigreturn + .ent sys_rt_sigreturn +sys_rt_sigreturn: + .prologue 0 + lda $9, ret_from_straced + cmpult $26, $9, $9 + mov $sp, $17 + lda $18, -SWITCH_STACK_SIZE($sp) + lda $sp, -SWITCH_STACK_SIZE($sp) + jsr $26, do_rt_sigreturn + bne $9, 1f + jsr $26, syscall_trace +1: br $1, undo_switch_stack + br ret_from_sys_call +.end sys_rt_sigreturn + + .align 4 + .globl sys_sethae + .ent sys_sethae +sys_sethae: + .prologue 0 + stq $16, 152($sp) + ret +.end sys_sethae + + .align 4 + .globl osf_getpriority + .ent osf_getpriority +osf_getpriority: + lda $sp, -16($sp) + stq $26, 0($sp) + .prologue 0 + + jsr $26, sys_getpriority + + ldq $26, 0($sp) + blt $0, 1f + + /* Return value is the unbiased priority, i.e. 20 - prio. + This does result in negative return values, so signal + no error by writing into the R0 slot. */ + lda $1, 20 + stq $31, 16($sp) + subl $1, $0, $0 + unop + +1: lda $sp, 16($sp) + ret +.end osf_getpriority + + .align 4 + .globl sys_getxuid + .ent sys_getxuid +sys_getxuid: + .prologue 0 + ldq $2, TI_TASK($8) + ldq $3, TASK_CRED($2) + ldl $0, CRED_UID($3) + ldl $1, CRED_EUID($3) + stq $1, 80($sp) + ret +.end sys_getxuid + + .align 4 + .globl sys_getxgid + .ent sys_getxgid +sys_getxgid: + .prologue 0 + ldq $2, TI_TASK($8) + ldq $3, TASK_CRED($2) + ldl $0, CRED_GID($3) + ldl $1, CRED_EGID($3) + stq $1, 80($sp) + ret +.end sys_getxgid + + .align 4 + .globl sys_getxpid + .ent sys_getxpid +sys_getxpid: + .prologue 0 + ldq $2, TI_TASK($8) + + /* See linux/kernel/timer.c sys_getppid for discussion + about this loop. */ + ldq $3, TASK_GROUP_LEADER($2) + ldq $4, TASK_REAL_PARENT($3) + ldl $0, TASK_TGID($2) +1: ldl $1, TASK_TGID($4) +#ifdef CONFIG_SMP + mov $4, $5 + mb + ldq $3, TASK_GROUP_LEADER($2) + ldq $4, TASK_REAL_PARENT($3) + cmpeq $4, $5, $5 + beq $5, 1b +#endif + stq $1, 80($sp) + ret +.end sys_getxpid + + .align 4 + .globl sys_alpha_pipe + .ent sys_alpha_pipe +sys_alpha_pipe: + lda $sp, -16($sp) + stq $26, 0($sp) + .prologue 0 + + mov $31, $17 + lda $16, 8($sp) + jsr $26, do_pipe_flags + + ldq $26, 0($sp) + bne $0, 1f + + /* The return values are in $0 and $20. */ + ldl $1, 12($sp) + ldl $0, 8($sp) + + stq $1, 80+16($sp) +1: lda $sp, 16($sp) + ret +.end sys_alpha_pipe + + .align 4 + .globl sys_execve + .ent sys_execve +sys_execve: + .prologue 0 + mov $sp, $19 + jmp $31, do_sys_execve +.end sys_execve + + .align 4 + .globl alpha_ni_syscall + .ent alpha_ni_syscall +alpha_ni_syscall: + .prologue 0 + /* Special because it also implements overflow handling via + syscall number 0. And if you recall, zero is a special + trigger for "not an error". Store large non-zero there. */ + lda $0, -ENOSYS + unop + stq $0, 0($sp) + ret +.end alpha_ni_syscall diff --git a/arch/alpha/kernel/err_common.c b/arch/alpha/kernel/err_common.c new file mode 100644 index 00000000..13d53b1c --- /dev/null +++ b/arch/alpha/kernel/err_common.c @@ -0,0 +1,320 @@ +/* + * linux/arch/alpha/kernel/err_common.c + * + * Copyright (C) 2000 Jeff Wiedemeier (Compaq Computer Corporation) + * + * Error handling code supporting Alpha systems + */ + +#include <linux/init.h> +#include <linux/sched.h> + +#include <asm/io.h> +#include <asm/hwrpb.h> +#include <asm/smp.h> +#include <asm/err_common.h> + +#include "err_impl.h" +#include "proto.h" + +/* + * err_print_prefix -- error handling print routines should prefix + * all prints with this + */ +char *err_print_prefix = KERN_NOTICE; + + +/* + * Generic + */ +void +mchk_dump_mem(void *data, size_t length, char **annotation) +{ + unsigned long *ldata = data; + size_t i; + + for (i = 0; (i * sizeof(*ldata)) < length; i++) { + if (annotation && !annotation[i]) + annotation = NULL; + printk("%s %08x: %016lx %s\n", + err_print_prefix, + (unsigned)(i * sizeof(*ldata)), ldata[i], + annotation ? annotation[i] : ""); + } +} + +void +mchk_dump_logout_frame(struct el_common *mchk_header) +{ + printk("%s -- Frame Header --\n" + " Frame Size: %d (0x%x) bytes\n" + " Flags: %s%s\n" + " MCHK Code: 0x%x\n" + " Frame Rev: %d\n" + " Proc Offset: 0x%08x\n" + " Sys Offset: 0x%08x\n" + " -- Processor Region --\n", + err_print_prefix, + mchk_header->size, mchk_header->size, + mchk_header->retry ? "RETRY " : "", + mchk_header->err2 ? "SECOND_ERR " : "", + mchk_header->code, + mchk_header->frame_rev, + mchk_header->proc_offset, + mchk_header->sys_offset); + + mchk_dump_mem((void *) + ((unsigned long)mchk_header + mchk_header->proc_offset), + mchk_header->sys_offset - mchk_header->proc_offset, + NULL); + + printk("%s -- System Region --\n", err_print_prefix); + mchk_dump_mem((void *) + ((unsigned long)mchk_header + mchk_header->sys_offset), + mchk_header->size - mchk_header->sys_offset, + NULL); + printk("%s -- End of Frame --\n", err_print_prefix); +} + + +/* + * Console Data Log + */ +/* Data */ +static struct el_subpacket_handler *subpacket_handler_list = NULL; +static struct el_subpacket_annotation *subpacket_annotation_list = NULL; + +static struct el_subpacket * +el_process_header_subpacket(struct el_subpacket *header) +{ + union el_timestamp timestamp; + char *name = "UNKNOWN EVENT"; + int packet_count = 0; + int length = 0; + + if (header->class != EL_CLASS__HEADER) { + printk("%s** Unexpected header CLASS %d TYPE %d, aborting\n", + err_print_prefix, + header->class, header->type); + return NULL; + } + + switch(header->type) { + case EL_TYPE__HEADER__SYSTEM_ERROR_FRAME: + name = "SYSTEM ERROR"; + length = header->by_type.sys_err.frame_length; + packet_count = + header->by_type.sys_err.frame_packet_count; + timestamp.as_int = 0; + break; + case EL_TYPE__HEADER__SYSTEM_EVENT_FRAME: + name = "SYSTEM EVENT"; + length = header->by_type.sys_event.frame_length; + packet_count = + header->by_type.sys_event.frame_packet_count; + timestamp = header->by_type.sys_event.timestamp; + break; + case EL_TYPE__HEADER__HALT_FRAME: + name = "ERROR HALT"; + length = header->by_type.err_halt.frame_length; + packet_count = + header->by_type.err_halt.frame_packet_count; + timestamp = header->by_type.err_halt.timestamp; + break; + case EL_TYPE__HEADER__LOGOUT_FRAME: + name = "LOGOUT FRAME"; + length = header->by_type.logout_header.frame_length; + packet_count = 1; + timestamp.as_int = 0; + break; + default: /* Unknown */ + printk("%s** Unknown header - CLASS %d TYPE %d, aborting\n", + err_print_prefix, + header->class, header->type); + return NULL; + } + + printk("%s*** %s:\n" + " CLASS %d, TYPE %d\n", + err_print_prefix, + name, + header->class, header->type); + el_print_timestamp(×tamp); + + /* + * Process the subpackets + */ + el_process_subpackets(header, packet_count); + + /* return the next header */ + header = (struct el_subpacket *) + ((unsigned long)header + header->length + length); + return header; +} + +static struct el_subpacket * +el_process_subpacket_reg(struct el_subpacket *header) +{ + struct el_subpacket *next = NULL; + struct el_subpacket_handler *h = subpacket_handler_list; + + for (; h && h->class != header->class; h = h->next); + if (h) next = h->handler(header); + + return next; +} + +void +el_print_timestamp(union el_timestamp *timestamp) +{ + if (timestamp->as_int) + printk("%s TIMESTAMP: %d/%d/%02d %d:%02d:%0d\n", + err_print_prefix, + timestamp->b.month, timestamp->b.day, + timestamp->b.year, timestamp->b.hour, + timestamp->b.minute, timestamp->b.second); +} + +void +el_process_subpackets(struct el_subpacket *header, int packet_count) +{ + struct el_subpacket *subpacket; + int i; + + subpacket = (struct el_subpacket *) + ((unsigned long)header + header->length); + + for (i = 0; subpacket && i < packet_count; i++) { + printk("%sPROCESSING SUBPACKET %d\n", err_print_prefix, i); + subpacket = el_process_subpacket(subpacket); + } +} + +struct el_subpacket * +el_process_subpacket(struct el_subpacket *header) +{ + struct el_subpacket *next = NULL; + + switch(header->class) { + case EL_CLASS__TERMINATION: + /* Termination packet, there are no more */ + break; + case EL_CLASS__HEADER: + next = el_process_header_subpacket(header); + break; + default: + if (NULL == (next = el_process_subpacket_reg(header))) { + printk("%s** Unexpected header CLASS %d TYPE %d" + " -- aborting.\n", + err_print_prefix, + header->class, header->type); + } + break; + } + + return next; +} + +void +el_annotate_subpacket(struct el_subpacket *header) +{ + struct el_subpacket_annotation *a; + char **annotation = NULL; + + for (a = subpacket_annotation_list; a; a = a->next) { + if (a->class == header->class && + a->type == header->type && + a->revision == header->revision) { + /* + * We found the annotation + */ + annotation = a->annotation; + printk("%s %s\n", err_print_prefix, a->description); + break; + } + } + + mchk_dump_mem(header, header->length, annotation); +} + +static void __init +cdl_process_console_data_log(int cpu, struct percpu_struct *pcpu) +{ + struct el_subpacket *header = (struct el_subpacket *) + (IDENT_ADDR | pcpu->console_data_log_pa); + int err; + + printk("%s******* CONSOLE DATA LOG FOR CPU %d. *******\n" + "*** Error(s) were logged on a previous boot\n", + err_print_prefix, cpu); + + for (err = 0; header && (header->class != EL_CLASS__TERMINATION); err++) + header = el_process_subpacket(header); + + /* let the console know it's ok to clear the error(s) at restart */ + pcpu->console_data_log_pa = 0; + + printk("%s*** %d total error(s) logged\n" + "**** END OF CONSOLE DATA LOG FOR CPU %d ****\n", + err_print_prefix, err, cpu); +} + +void __init +cdl_check_console_data_log(void) +{ + struct percpu_struct *pcpu; + unsigned long cpu; + + for (cpu = 0; cpu < hwrpb->nr_processors; cpu++) { + pcpu = (struct percpu_struct *) + ((unsigned long)hwrpb + hwrpb->processor_offset + + cpu * hwrpb->processor_size); + if (pcpu->console_data_log_pa) + cdl_process_console_data_log(cpu, pcpu); + } + +} + +int __init +cdl_register_subpacket_annotation(struct el_subpacket_annotation *new) +{ + struct el_subpacket_annotation *a = subpacket_annotation_list; + + if (a == NULL) subpacket_annotation_list = new; + else { + for (; a->next != NULL; a = a->next) { + if ((a->class == new->class && a->type == new->type) || + a == new) { + printk("Attempted to re-register " + "subpacket annotation\n"); + return -EINVAL; + } + } + a->next = new; + } + new->next = NULL; + + return 0; +} + +int __init +cdl_register_subpacket_handler(struct el_subpacket_handler *new) +{ + struct el_subpacket_handler *h = subpacket_handler_list; + + if (h == NULL) subpacket_handler_list = new; + else { + for (; h->next != NULL; h = h->next) { + if (h->class == new->class || h == new) { + printk("Attempted to re-register " + "subpacket handler\n"); + return -EINVAL; + } + } + h->next = new; + } + new->next = NULL; + + return 0; +} + diff --git a/arch/alpha/kernel/err_ev6.c b/arch/alpha/kernel/err_ev6.c new file mode 100644 index 00000000..253cf1a8 --- /dev/null +++ b/arch/alpha/kernel/err_ev6.c @@ -0,0 +1,276 @@ +/* + * linux/arch/alpha/kernel/err_ev6.c + * + * Copyright (C) 2000 Jeff Wiedemeier (Compaq Computer Corporation) + * + * Error handling code supporting Alpha systems + */ + +#include <linux/init.h> +#include <linux/sched.h> + +#include <asm/io.h> +#include <asm/irq_regs.h> +#include <asm/hwrpb.h> +#include <asm/smp.h> +#include <asm/err_common.h> +#include <asm/err_ev6.h> + +#include "err_impl.h" +#include "proto.h" + +static int +ev6_parse_ibox(u64 i_stat, int print) +{ + int status = MCHK_DISPOSITION_REPORT; + +#define EV6__I_STAT__PAR (1UL << 29) +#define EV6__I_STAT__ERRMASK (EV6__I_STAT__PAR) + + if (!(i_stat & EV6__I_STAT__ERRMASK)) + return MCHK_DISPOSITION_UNKNOWN_ERROR; + + if (!print) + return status; + + if (i_stat & EV6__I_STAT__PAR) + printk("%s Icache parity error\n", err_print_prefix); + + return status; +} + +static int +ev6_parse_mbox(u64 mm_stat, u64 d_stat, u64 c_stat, int print) +{ + int status = MCHK_DISPOSITION_REPORT; + +#define EV6__MM_STAT__DC_TAG_PERR (1UL << 10) +#define EV6__MM_STAT__ERRMASK (EV6__MM_STAT__DC_TAG_PERR) +#define EV6__D_STAT__TPERR_P0 (1UL << 0) +#define EV6__D_STAT__TPERR_P1 (1UL << 1) +#define EV6__D_STAT__ECC_ERR_ST (1UL << 2) +#define EV6__D_STAT__ECC_ERR_LD (1UL << 3) +#define EV6__D_STAT__SEO (1UL << 4) +#define EV6__D_STAT__ERRMASK (EV6__D_STAT__TPERR_P0 | \ + EV6__D_STAT__TPERR_P1 | \ + EV6__D_STAT__ECC_ERR_ST | \ + EV6__D_STAT__ECC_ERR_LD | \ + EV6__D_STAT__SEO) + + if (!(d_stat & EV6__D_STAT__ERRMASK) && + !(mm_stat & EV6__MM_STAT__ERRMASK)) + return MCHK_DISPOSITION_UNKNOWN_ERROR; + + if (!print) + return status; + + if (mm_stat & EV6__MM_STAT__DC_TAG_PERR) + printk("%s Dcache tag parity error on probe\n", + err_print_prefix); + if (d_stat & EV6__D_STAT__TPERR_P0) + printk("%s Dcache tag parity error - pipe 0\n", + err_print_prefix); + if (d_stat & EV6__D_STAT__TPERR_P1) + printk("%s Dcache tag parity error - pipe 1\n", + err_print_prefix); + if (d_stat & EV6__D_STAT__ECC_ERR_ST) + printk("%s ECC error occurred on a store\n", + err_print_prefix); + if (d_stat & EV6__D_STAT__ECC_ERR_LD) + printk("%s ECC error occurred on a %s load\n", + err_print_prefix, + c_stat ? "" : "speculative "); + if (d_stat & EV6__D_STAT__SEO) + printk("%s Dcache second error\n", err_print_prefix); + + return status; +} + +static int +ev6_parse_cbox(u64 c_addr, u64 c1_syn, u64 c2_syn, + u64 c_stat, u64 c_sts, int print) +{ + static const char * const sourcename[] = { + "UNKNOWN", "UNKNOWN", "UNKNOWN", + "MEMORY", "BCACHE", "DCACHE", + "BCACHE PROBE", "BCACHE PROBE" + }; + static const char * const streamname[] = { "D", "I" }; + static const char * const bitsname[] = { "SINGLE", "DOUBLE" }; + int status = MCHK_DISPOSITION_REPORT; + int source = -1, stream = -1, bits = -1; + +#define EV6__C_STAT__BC_PERR (0x01) +#define EV6__C_STAT__DC_PERR (0x02) +#define EV6__C_STAT__DSTREAM_MEM_ERR (0x03) +#define EV6__C_STAT__DSTREAM_BC_ERR (0x04) +#define EV6__C_STAT__DSTREAM_DC_ERR (0x05) +#define EV6__C_STAT__PROBE_BC_ERR0 (0x06) /* both 6 and 7 indicate... */ +#define EV6__C_STAT__PROBE_BC_ERR1 (0x07) /* ...probe bc error. */ +#define EV6__C_STAT__ISTREAM_MEM_ERR (0x0B) +#define EV6__C_STAT__ISTREAM_BC_ERR (0x0C) +#define EV6__C_STAT__DSTREAM_MEM_DBL (0x13) +#define EV6__C_STAT__DSTREAM_BC_DBL (0x14) +#define EV6__C_STAT__ISTREAM_MEM_DBL (0x1B) +#define EV6__C_STAT__ISTREAM_BC_DBL (0x1C) +#define EV6__C_STAT__SOURCE_MEMORY (0x03) +#define EV6__C_STAT__SOURCE_BCACHE (0x04) +#define EV6__C_STAT__SOURCE__S (0) +#define EV6__C_STAT__SOURCE__M (0x07) +#define EV6__C_STAT__ISTREAM__S (3) +#define EV6__C_STAT__ISTREAM__M (0x01) +#define EV6__C_STAT__DOUBLE__S (4) +#define EV6__C_STAT__DOUBLE__M (0x01) +#define EV6__C_STAT__ERRMASK (0x1F) +#define EV6__C_STS__SHARED (1 << 0) +#define EV6__C_STS__DIRTY (1 << 1) +#define EV6__C_STS__VALID (1 << 2) +#define EV6__C_STS__PARITY (1 << 3) + + if (!(c_stat & EV6__C_STAT__ERRMASK)) + return MCHK_DISPOSITION_UNKNOWN_ERROR; + + if (!print) + return status; + + source = EXTRACT(c_stat, EV6__C_STAT__SOURCE); + stream = EXTRACT(c_stat, EV6__C_STAT__ISTREAM); + bits = EXTRACT(c_stat, EV6__C_STAT__DOUBLE); + + if (c_stat & EV6__C_STAT__BC_PERR) { + printk("%s Bcache tag parity error\n", err_print_prefix); + source = -1; + } + + if (c_stat & EV6__C_STAT__DC_PERR) { + printk("%s Dcache tag parity error\n", err_print_prefix); + source = -1; + } + + if (c_stat == EV6__C_STAT__PROBE_BC_ERR0 || + c_stat == EV6__C_STAT__PROBE_BC_ERR1) { + printk("%s Bcache single-bit error on a probe hit\n", + err_print_prefix); + source = -1; + } + + if (source != -1) + printk("%s %s-STREAM %s-BIT ECC error from %s\n", + err_print_prefix, + streamname[stream], bitsname[bits], sourcename[source]); + + printk("%s Address: 0x%016llx\n" + " Syndrome[upper.lower]: %02llx.%02llx\n", + err_print_prefix, + c_addr, + c2_syn, c1_syn); + + if (source == EV6__C_STAT__SOURCE_MEMORY || + source == EV6__C_STAT__SOURCE_BCACHE) + printk("%s Block status: %s%s%s%s\n", + err_print_prefix, + (c_sts & EV6__C_STS__SHARED) ? "SHARED " : "", + (c_sts & EV6__C_STS__DIRTY) ? "DIRTY " : "", + (c_sts & EV6__C_STS__VALID) ? "VALID " : "", + (c_sts & EV6__C_STS__PARITY) ? "PARITY " : ""); + + return status; +} + +void +ev6_register_error_handlers(void) +{ + /* None right now. */ +} + +int +ev6_process_logout_frame(struct el_common *mchk_header, int print) +{ + struct el_common_EV6_mcheck *ev6mchk = + (struct el_common_EV6_mcheck *)mchk_header; + int status = MCHK_DISPOSITION_UNKNOWN_ERROR; + + status |= ev6_parse_ibox(ev6mchk->I_STAT, print); + status |= ev6_parse_mbox(ev6mchk->MM_STAT, ev6mchk->DC_STAT, + ev6mchk->C_STAT, print); + status |= ev6_parse_cbox(ev6mchk->C_ADDR, ev6mchk->DC1_SYNDROME, + ev6mchk->DC0_SYNDROME, ev6mchk->C_STAT, + ev6mchk->C_STS, print); + + if (!print) + return status; + + if (status != MCHK_DISPOSITION_DISMISS) { + char *saved_err_prefix = err_print_prefix; + + /* + * Dump some additional information from the frame + */ + printk("%s EXC_ADDR: 0x%016lx IER_CM: 0x%016lx" + " ISUM: 0x%016lx\n" + " PAL_BASE: 0x%016lx I_CTL: 0x%016lx" + " PCTX: 0x%016lx\n", + err_print_prefix, + ev6mchk->EXC_ADDR, ev6mchk->IER_CM, ev6mchk->ISUM, + ev6mchk->PAL_BASE, ev6mchk->I_CTL, ev6mchk->PCTX); + + if (status == MCHK_DISPOSITION_UNKNOWN_ERROR) { + printk("%s UNKNOWN error, frame follows:\n", + err_print_prefix); + } else { + /* had decode -- downgrade print level for frame */ + err_print_prefix = KERN_NOTICE; + } + + mchk_dump_logout_frame(mchk_header); + + err_print_prefix = saved_err_prefix; + } + + return status; +} + +void +ev6_machine_check(unsigned long vector, unsigned long la_ptr) +{ + struct el_common *mchk_header = (struct el_common *)la_ptr; + + /* + * Sync the processor + */ + mb(); + draina(); + + /* + * Parse the logout frame without printing first. If the only error(s) + * found are have a disposition of "dismiss", then just dismiss them + * and don't print any message + */ + if (ev6_process_logout_frame(mchk_header, 0) != + MCHK_DISPOSITION_DISMISS) { + char *saved_err_prefix = err_print_prefix; + err_print_prefix = KERN_CRIT; + + /* + * Either a nondismissable error was detected or no + * recognized error was detected in the logout frame + * -- report the error in either case + */ + printk("%s*CPU %s Error (Vector 0x%x) reported on CPU %d:\n", + err_print_prefix, + (vector == SCB_Q_PROCERR)?"Correctable":"Uncorrectable", + (unsigned int)vector, (int)smp_processor_id()); + + ev6_process_logout_frame(mchk_header, 1); + dik_show_regs(get_irq_regs(), NULL); + + err_print_prefix = saved_err_prefix; + } + + /* + * Release the logout frame + */ + wrmces(0x7); + mb(); +} + diff --git a/arch/alpha/kernel/err_ev7.c b/arch/alpha/kernel/err_ev7.c new file mode 100644 index 00000000..d738a671 --- /dev/null +++ b/arch/alpha/kernel/err_ev7.c @@ -0,0 +1,286 @@ +/* + * linux/arch/alpha/kernel/err_ev7.c + * + * Copyright (C) 2000 Jeff Wiedemeier (Compaq Computer Corporation) + * + * Error handling code supporting Alpha systems + */ + +#include <linux/init.h> +#include <linux/sched.h> + +#include <asm/io.h> +#include <asm/hwrpb.h> +#include <asm/smp.h> +#include <asm/err_common.h> +#include <asm/err_ev7.h> + +#include "err_impl.h" +#include "proto.h" + +struct ev7_lf_subpackets * +ev7_collect_logout_frame_subpackets(struct el_subpacket *el_ptr, + struct ev7_lf_subpackets *lf_subpackets) +{ + struct el_subpacket *subpacket; + int i; + + /* + * A Marvel machine check frame is always packaged in an + * el_subpacket of class HEADER, type LOGOUT_FRAME. + */ + if (el_ptr->class != EL_CLASS__HEADER || + el_ptr->type != EL_TYPE__HEADER__LOGOUT_FRAME) + return NULL; + + /* + * It is a logout frame header. Look at the one subpacket. + */ + el_ptr = (struct el_subpacket *) + ((unsigned long)el_ptr + el_ptr->length); + + /* + * It has to be class PAL, type LOGOUT_FRAME. + */ + if (el_ptr->class != EL_CLASS__PAL || + el_ptr->type != EL_TYPE__PAL__LOGOUT_FRAME) + return NULL; + + lf_subpackets->logout = (struct ev7_pal_logout_subpacket *) + el_ptr->by_type.raw.data_start; + + /* + * Process the subpackets. + */ + subpacket = (struct el_subpacket *) + ((unsigned long)el_ptr + el_ptr->length); + for (i = 0; + subpacket && i < lf_subpackets->logout->subpacket_count; + subpacket = (struct el_subpacket *) + ((unsigned long)subpacket + subpacket->length), i++) { + /* + * All subpackets should be class PAL. + */ + if (subpacket->class != EL_CLASS__PAL) { + printk("%s**UNEXPECTED SUBPACKET CLASS %d " + "IN LOGOUT FRAME (packet %d\n", + err_print_prefix, subpacket->class, i); + return NULL; + } + + /* + * Remember the subpacket. + */ + switch(subpacket->type) { + case EL_TYPE__PAL__EV7_PROCESSOR: + lf_subpackets->ev7 = + (struct ev7_pal_processor_subpacket *) + subpacket->by_type.raw.data_start; + break; + + case EL_TYPE__PAL__EV7_RBOX: + lf_subpackets->rbox = (struct ev7_pal_rbox_subpacket *) + subpacket->by_type.raw.data_start; + break; + + case EL_TYPE__PAL__EV7_ZBOX: + lf_subpackets->zbox = (struct ev7_pal_zbox_subpacket *) + subpacket->by_type.raw.data_start; + break; + + case EL_TYPE__PAL__EV7_IO: + lf_subpackets->io = (struct ev7_pal_io_subpacket *) + subpacket->by_type.raw.data_start; + break; + + case EL_TYPE__PAL__ENV__AMBIENT_TEMPERATURE: + case EL_TYPE__PAL__ENV__AIRMOVER_FAN: + case EL_TYPE__PAL__ENV__VOLTAGE: + case EL_TYPE__PAL__ENV__INTRUSION: + case EL_TYPE__PAL__ENV__POWER_SUPPLY: + case EL_TYPE__PAL__ENV__LAN: + case EL_TYPE__PAL__ENV__HOT_PLUG: + lf_subpackets->env[ev7_lf_env_index(subpacket->type)] = + (struct ev7_pal_environmental_subpacket *) + subpacket->by_type.raw.data_start; + break; + + default: + /* + * Don't know what kind of frame this is. + */ + return NULL; + } + } + + return lf_subpackets; +} + +void +ev7_machine_check(unsigned long vector, unsigned long la_ptr) +{ + struct el_subpacket *el_ptr = (struct el_subpacket *)la_ptr; + char *saved_err_prefix = err_print_prefix; + + /* + * Sync the processor + */ + mb(); + draina(); + + err_print_prefix = KERN_CRIT; + printk("%s*CPU %s Error (Vector 0x%x) reported on CPU %d\n", + err_print_prefix, + (vector == SCB_Q_PROCERR) ? "Correctable" : "Uncorrectable", + (unsigned int)vector, (int)smp_processor_id()); + el_process_subpacket(el_ptr); + err_print_prefix = saved_err_prefix; + + /* + * Release the logout frame + */ + wrmces(0x7); + mb(); +} + +static char *el_ev7_processor_subpacket_annotation[] = { + "Subpacket Header", "I_STAT", "DC_STAT", + "C_ADDR", "C_SYNDROME_1", "C_SYNDROME_0", + "C_STAT", "C_STS", "MM_STAT", + "EXC_ADDR", "IER_CM", "ISUM", + "PAL_BASE", "I_CTL", "PROCESS_CONTEXT", + "CBOX_CTL", "CBOX_STP_CTL", "CBOX_ACC_CTL", + "CBOX_LCL_SET", "CBOX_GLB_SET", "BBOX_CTL", + "BBOX_ERR_STS", "BBOX_ERR_IDX", "CBOX_DDP_ERR_STS", + "BBOX_DAT_RMP", NULL +}; + +static char *el_ev7_zbox_subpacket_annotation[] = { + "Subpacket Header", + "ZBOX(0): DRAM_ERR_STATUS_2 / DRAM_ERR_STATUS_1", + "ZBOX(0): DRAM_ERROR_CTL / DRAM_ERR_STATUS_3", + "ZBOX(0): DIFT_TIMEOUT / DRAM_ERR_ADR", + "ZBOX(0): FRC_ERR_ADR / DRAM_MAPPER_CTL", + "ZBOX(0): reserved / DIFT_ERR_STATUS", + "ZBOX(1): DRAM_ERR_STATUS_2 / DRAM_ERR_STATUS_1", + "ZBOX(1): DRAM_ERROR_CTL / DRAM_ERR_STATUS_3", + "ZBOX(1): DIFT_TIMEOUT / DRAM_ERR_ADR", + "ZBOX(1): FRC_ERR_ADR / DRAM_MAPPER_CTL", + "ZBOX(1): reserved / DIFT_ERR_STATUS", + "CBOX_CTL", "CBOX_STP_CTL", + "ZBOX(0)_ERROR_PA", "ZBOX(1)_ERROR_PA", + "ZBOX(0)_ORED_SYNDROME","ZBOX(1)_ORED_SYNDROME", + NULL +}; + +static char *el_ev7_rbox_subpacket_annotation[] = { + "Subpacket Header", "RBOX_CFG", "RBOX_N_CFG", + "RBOX_S_CFG", "RBOX_E_CFG", "RBOX_W_CFG", + "RBOX_N_ERR", "RBOX_S_ERR", "RBOX_E_ERR", + "RBOX_W_ERR", "RBOX_IO_CFG", "RBOX_IO_ERR", + "RBOX_L_ERR", "RBOX_WHOAMI", "RBOX_IMASL", + "RBOX_INTQ", "RBOX_INT", NULL +}; + +static char *el_ev7_io_subpacket_annotation[] = { + "Subpacket Header", "IO_ASIC_REV", "IO_SYS_REV", + "IO7_UPH", "HPI_CTL", "CRD_CTL", + "HEI_CTL", "PO7_ERROR_SUM","PO7_UNCRR_SYM", + "PO7_CRRCT_SYM", "PO7_UGBGE_SYM","PO7_ERR_PKT0", + "PO7_ERR_PKT1", "reserved", "reserved", + "PO0_ERR_SUM", "PO0_TLB_ERR", "PO0_SPL_COMPLT", + "PO0_TRANS_SUM", "PO0_FIRST_ERR","PO0_MULT_ERR", + "DM CSR PH", "DM CSR PH", "DM CSR PH", + "DM CSR PH", "reserved", + "PO1_ERR_SUM", "PO1_TLB_ERR", "PO1_SPL_COMPLT", + "PO1_TRANS_SUM", "PO1_FIRST_ERR","PO1_MULT_ERR", + "DM CSR PH", "DM CSR PH", "DM CSR PH", + "DM CSR PH", "reserved", + "PO2_ERR_SUM", "PO2_TLB_ERR", "PO2_SPL_COMPLT", + "PO2_TRANS_SUM", "PO2_FIRST_ERR","PO2_MULT_ERR", + "DM CSR PH", "DM CSR PH", "DM CSR PH", + "DM CSR PH", "reserved", + "PO3_ERR_SUM", "PO3_TLB_ERR", "PO3_SPL_COMPLT", + "PO3_TRANS_SUM", "PO3_FIRST_ERR","PO3_MULT_ERR", + "DM CSR PH", "DM CSR PH", "DM CSR PH", + "DM CSR PH", "reserved", + NULL +}; + +static struct el_subpacket_annotation el_ev7_pal_annotations[] = { + SUBPACKET_ANNOTATION(EL_CLASS__PAL, + EL_TYPE__PAL__EV7_PROCESSOR, + 1, + "EV7 Processor Subpacket", + el_ev7_processor_subpacket_annotation), + SUBPACKET_ANNOTATION(EL_CLASS__PAL, + EL_TYPE__PAL__EV7_ZBOX, + 1, + "EV7 ZBOX Subpacket", + el_ev7_zbox_subpacket_annotation), + SUBPACKET_ANNOTATION(EL_CLASS__PAL, + EL_TYPE__PAL__EV7_RBOX, + 1, + "EV7 RBOX Subpacket", + el_ev7_rbox_subpacket_annotation), + SUBPACKET_ANNOTATION(EL_CLASS__PAL, + EL_TYPE__PAL__EV7_IO, + 1, + "EV7 IO Subpacket", + el_ev7_io_subpacket_annotation) +}; + +static struct el_subpacket * +ev7_process_pal_subpacket(struct el_subpacket *header) +{ + struct ev7_pal_subpacket *packet; + + if (header->class != EL_CLASS__PAL) { + printk("%s ** Unexpected header CLASS %d TYPE %d, aborting\n", + err_print_prefix, + header->class, header->type); + return NULL; + } + + packet = (struct ev7_pal_subpacket *)header->by_type.raw.data_start; + + switch(header->type) { + case EL_TYPE__PAL__LOGOUT_FRAME: + printk("%s*** MCHK occurred on LPID %lld (RBOX %llx)\n", + err_print_prefix, + packet->by_type.logout.whami, + packet->by_type.logout.rbox_whami); + el_print_timestamp(&packet->by_type.logout.timestamp); + printk("%s EXC_ADDR: %016llx\n" + " HALT_CODE: %llx\n", + err_print_prefix, + packet->by_type.logout.exc_addr, + packet->by_type.logout.halt_code); + el_process_subpackets(header, + packet->by_type.logout.subpacket_count); + break; + default: + printk("%s ** PAL TYPE %d SUBPACKET\n", + err_print_prefix, + header->type); + el_annotate_subpacket(header); + break; + } + + return (struct el_subpacket *)((unsigned long)header + header->length); +} + +struct el_subpacket_handler ev7_pal_subpacket_handler = + SUBPACKET_HANDLER_INIT(EL_CLASS__PAL, ev7_process_pal_subpacket); + +void __init +ev7_register_error_handlers(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(el_ev7_pal_annotations); i++) + cdl_register_subpacket_annotation(&el_ev7_pal_annotations[i]); + + cdl_register_subpacket_handler(&ev7_pal_subpacket_handler); +} + diff --git a/arch/alpha/kernel/err_impl.h b/arch/alpha/kernel/err_impl.h new file mode 100644 index 00000000..ae529c41 --- /dev/null +++ b/arch/alpha/kernel/err_impl.h @@ -0,0 +1,87 @@ +/* + * linux/arch/alpha/kernel/err_impl.h + * + * Copyright (C) 2000 Jeff Wiedemeier (Compaq Computer Corporation) + * + * Contains declarations and macros to support Alpha error handling + * implementations. + */ + +#include <asm/mce.h> + +union el_timestamp; +struct el_subpacket; +struct ev7_lf_subpackets; + +struct el_subpacket_annotation { + struct el_subpacket_annotation *next; + u16 class; + u16 type; + u16 revision; + char *description; + char **annotation; +}; +#define SUBPACKET_ANNOTATION(c, t, r, d, a) {NULL, (c), (t), (r), (d), (a)} + +struct el_subpacket_handler { + struct el_subpacket_handler *next; + u16 class; + struct el_subpacket *(*handler)(struct el_subpacket *); +}; +#define SUBPACKET_HANDLER_INIT(c, h) {NULL, (c), (h)} + +/* + * Manipulate a field from a register given it's name. defines + * for the LSB (__S - shift count) and bitmask (__M) are required + * + * EXTRACT(u, f) - extracts the field and places it at bit position 0 + * GEN_MASK(f) - creates an in-position mask for the field + */ +#define EXTRACT(u, f) (((u) >> f##__S) & f##__M) +#define GEN_MASK(f) ((u64)f##__M << f##__S) + +/* + * err_common.c + */ +extern char *err_print_prefix; + +extern void mchk_dump_mem(void *, size_t, char **); +extern void mchk_dump_logout_frame(struct el_common *); +extern void el_print_timestamp(union el_timestamp *); +extern void el_process_subpackets(struct el_subpacket *, int); +extern struct el_subpacket *el_process_subpacket(struct el_subpacket *); +extern void el_annotate_subpacket(struct el_subpacket *); +extern void cdl_check_console_data_log(void); +extern int cdl_register_subpacket_annotation(struct el_subpacket_annotation *); +extern int cdl_register_subpacket_handler(struct el_subpacket_handler *); + +/* + * err_ev7.c + */ +extern struct ev7_lf_subpackets * +ev7_collect_logout_frame_subpackets(struct el_subpacket *, + struct ev7_lf_subpackets *); +extern void ev7_register_error_handlers(void); +extern void ev7_machine_check(unsigned long, unsigned long); + +/* + * err_ev6.c + */ +extern void ev6_register_error_handlers(void); +extern int ev6_process_logout_frame(struct el_common *, int); +extern void ev6_machine_check(unsigned long, unsigned long); + +/* + * err_marvel.c + */ +extern void marvel_machine_check(unsigned long, unsigned long); +extern void marvel_register_error_handlers(void); + +/* + * err_titan.c + */ +extern int titan_process_logout_frame(struct el_common *, int); +extern void titan_machine_check(unsigned long, unsigned long); +extern void titan_register_error_handlers(void); +extern int privateer_process_logout_frame(struct el_common *, int); +extern void privateer_machine_check(unsigned long, unsigned long); diff --git a/arch/alpha/kernel/err_marvel.c b/arch/alpha/kernel/err_marvel.c new file mode 100644 index 00000000..ae54ad91 --- /dev/null +++ b/arch/alpha/kernel/err_marvel.c @@ -0,0 +1,1160 @@ +/* + * linux/arch/alpha/kernel/err_marvel.c + * + * Copyright (C) 2001 Jeff Wiedemeier (Compaq Computer Corporation) + * + */ + +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/sched.h> + +#include <asm/io.h> +#include <asm/console.h> +#include <asm/core_marvel.h> +#include <asm/hwrpb.h> +#include <asm/smp.h> +#include <asm/err_common.h> +#include <asm/err_ev7.h> + +#include "err_impl.h" +#include "proto.h" + +static void +marvel_print_680_frame(struct ev7_lf_subpackets *lf_subpackets) +{ +#ifdef CONFIG_VERBOSE_MCHECK + struct ev7_pal_environmental_subpacket *env; + struct { int type; char *name; } ev_packets[] = { + { EL_TYPE__PAL__ENV__AMBIENT_TEMPERATURE, + "Ambient Temperature" }, + { EL_TYPE__PAL__ENV__AIRMOVER_FAN, + "AirMover / Fan" }, + { EL_TYPE__PAL__ENV__VOLTAGE, + "Voltage" }, + { EL_TYPE__PAL__ENV__INTRUSION, + "Intrusion" }, + { EL_TYPE__PAL__ENV__POWER_SUPPLY, + "Power Supply" }, + { EL_TYPE__PAL__ENV__LAN, + "LAN" }, + { EL_TYPE__PAL__ENV__HOT_PLUG, + "Hot Plug" }, + { 0, NULL } + }; + int i; + + for (i = 0; ev_packets[i].type != 0; i++) { + env = lf_subpackets->env[ev7_lf_env_index(ev_packets[i].type)]; + if (!env) + continue; + + printk("%s**%s event (cabinet %d, drawer %d)\n", + err_print_prefix, + ev_packets[i].name, + env->cabinet, + env->drawer); + printk("%s Module Type: 0x%x - Unit ID 0x%x - " + "Condition 0x%x\n", + err_print_prefix, + env->module_type, + env->unit_id, + env->condition); + } +#endif /* CONFIG_VERBOSE_MCHECK */ +} + +static int +marvel_process_680_frame(struct ev7_lf_subpackets *lf_subpackets, int print) +{ + int status = MCHK_DISPOSITION_UNKNOWN_ERROR; + int i; + + for (i = ev7_lf_env_index(EL_TYPE__PAL__ENV__AMBIENT_TEMPERATURE); + i <= ev7_lf_env_index(EL_TYPE__PAL__ENV__HOT_PLUG); + i++) { + if (lf_subpackets->env[i]) + status = MCHK_DISPOSITION_REPORT; + } + + if (print) + marvel_print_680_frame(lf_subpackets); + + return status; +} + +#ifdef CONFIG_VERBOSE_MCHECK + +static void +marvel_print_err_cyc(u64 err_cyc) +{ + static char *packet_desc[] = { + "No Error", + "UNKNOWN", + "1 cycle (1 or 2 flit packet)", + "2 cycles (3 flit packet)", + "9 cycles (18 flit packet)", + "10 cycles (19 flit packet)", + "UNKNOWN", + "UNKNOWN", + "UNKNOWN" + }; + +#define IO7__ERR_CYC__ODD_FLT (1UL << 0) +#define IO7__ERR_CYC__EVN_FLT (1UL << 1) +#define IO7__ERR_CYC__PACKET__S (6) +#define IO7__ERR_CYC__PACKET__M (0x7) +#define IO7__ERR_CYC__LOC (1UL << 5) +#define IO7__ERR_CYC__CYCLE__S (2) +#define IO7__ERR_CYC__CYCLE__M (0x7) + + printk("%s Packet In Error: %s\n" + "%s Error in %s, cycle %lld%s%s\n", + err_print_prefix, + packet_desc[EXTRACT(err_cyc, IO7__ERR_CYC__PACKET)], + err_print_prefix, + (err_cyc & IO7__ERR_CYC__LOC) ? "DATA" : "HEADER", + EXTRACT(err_cyc, IO7__ERR_CYC__CYCLE), + (err_cyc & IO7__ERR_CYC__ODD_FLT) ? " [ODD Flit]": "", + (err_cyc & IO7__ERR_CYC__EVN_FLT) ? " [Even Flit]": ""); +} + +static void +marvel_print_po7_crrct_sym(u64 crrct_sym) +{ +#define IO7__PO7_CRRCT_SYM__SYN__S (0) +#define IO7__PO7_CRRCT_SYM__SYN__M (0x7f) +#define IO7__PO7_CRRCT_SYM__ERR_CYC__S (7) /* ERR_CYC + ODD_FLT + EVN_FLT */ +#define IO7__PO7_CRRCT_SYM__ERR_CYC__M (0x1ff) + + + printk("%s Correctable Error Symptoms:\n" + "%s Syndrome: 0x%llx\n", + err_print_prefix, + err_print_prefix, EXTRACT(crrct_sym, IO7__PO7_CRRCT_SYM__SYN)); + marvel_print_err_cyc(EXTRACT(crrct_sym, IO7__PO7_CRRCT_SYM__ERR_CYC)); +} + +static void +marvel_print_po7_uncrr_sym(u64 uncrr_sym, u64 valid_mask) +{ + static char *clk_names[] = { "_h[0]", "_h[1]", "_n[0]", "_n[1]" }; + static char *clk_decode[] = { + "No Error", + "One extra rising edge", + "Two extra rising edges", + "Lost one clock" + }; + static char *port_names[] = { "Port 0", "Port 1", + "Port 2", "Port 3", + "Unknown Port", "Unknown Port", + "Unknown Port", "Port 7" }; + int scratch, i; + +#define IO7__PO7_UNCRR_SYM__SYN__S (0) +#define IO7__PO7_UNCRR_SYM__SYN__M (0x7f) +#define IO7__PO7_UNCRR_SYM__ERR_CYC__S (7) /* ERR_CYC + ODD_FLT... */ +#define IO7__PO7_UNCRR_SYM__ERR_CYC__M (0x1ff) /* ... + EVN_FLT */ +#define IO7__PO7_UNCRR_SYM__CLK__S (16) +#define IO7__PO7_UNCRR_SYM__CLK__M (0xff) +#define IO7__PO7_UNCRR_SYM__CDT_OVF_TO__REQ (1UL << 24) +#define IO7__PO7_UNCRR_SYM__CDT_OVF_TO__RIO (1UL << 25) +#define IO7__PO7_UNCRR_SYM__CDT_OVF_TO__WIO (1UL << 26) +#define IO7__PO7_UNCRR_SYM__CDT_OVF_TO__BLK (1UL << 27) +#define IO7__PO7_UNCRR_SYM__CDT_OVF_TO__NBK (1UL << 28) +#define IO7__PO7_UNCRR_SYM__OVF__READIO (1UL << 29) +#define IO7__PO7_UNCRR_SYM__OVF__WRITEIO (1UL << 30) +#define IO7__PO7_UNCRR_SYM__OVF__FWD (1UL << 31) +#define IO7__PO7_UNCRR_SYM__VICTIM_SP__S (32) +#define IO7__PO7_UNCRR_SYM__VICTIM_SP__M (0xff) +#define IO7__PO7_UNCRR_SYM__DETECT_SP__S (40) +#define IO7__PO7_UNCRR_SYM__DETECT_SP__M (0xff) +#define IO7__PO7_UNCRR_SYM__STRV_VTR__S (48) +#define IO7__PO7_UNCRR_SYM__STRV_VTR__M (0x3ff) + +#define IO7__STRV_VTR__LSI__INTX__S (0) +#define IO7__STRV_VTR__LSI__INTX__M (0x3) +#define IO7__STRV_VTR__LSI__SLOT__S (2) +#define IO7__STRV_VTR__LSI__SLOT__M (0x7) +#define IO7__STRV_VTR__LSI__BUS__S (5) +#define IO7__STRV_VTR__LSI__BUS__M (0x3) +#define IO7__STRV_VTR__MSI__INTNUM__S (0) +#define IO7__STRV_VTR__MSI__INTNUM__M (0x1ff) +#define IO7__STRV_VTR__IS_MSI (1UL << 9) + + printk("%s Uncorrectable Error Symptoms:\n", err_print_prefix); + uncrr_sym &= valid_mask; + + if (EXTRACT(valid_mask, IO7__PO7_UNCRR_SYM__SYN)) + printk("%s Syndrome: 0x%llx\n", + err_print_prefix, + EXTRACT(uncrr_sym, IO7__PO7_UNCRR_SYM__SYN)); + + if (EXTRACT(valid_mask, IO7__PO7_UNCRR_SYM__ERR_CYC)) + marvel_print_err_cyc(EXTRACT(uncrr_sym, + IO7__PO7_UNCRR_SYM__ERR_CYC)); + + scratch = EXTRACT(uncrr_sym, IO7__PO7_UNCRR_SYM__CLK); + for (i = 0; i < 4; i++, scratch >>= 2) { + if (scratch & 0x3) + printk("%s Clock %s: %s\n", + err_print_prefix, + clk_names[i], clk_decode[scratch & 0x3]); + } + + if (uncrr_sym & IO7__PO7_UNCRR_SYM__CDT_OVF_TO__REQ) + printk("%s REQ Credit Timeout or Overflow\n", + err_print_prefix); + if (uncrr_sym & IO7__PO7_UNCRR_SYM__CDT_OVF_TO__RIO) + printk("%s RIO Credit Timeout or Overflow\n", + err_print_prefix); + if (uncrr_sym & IO7__PO7_UNCRR_SYM__CDT_OVF_TO__WIO) + printk("%s WIO Credit Timeout or Overflow\n", + err_print_prefix); + if (uncrr_sym & IO7__PO7_UNCRR_SYM__CDT_OVF_TO__BLK) + printk("%s BLK Credit Timeout or Overflow\n", + err_print_prefix); + if (uncrr_sym & IO7__PO7_UNCRR_SYM__CDT_OVF_TO__NBK) + printk("%s NBK Credit Timeout or Overflow\n", + err_print_prefix); + + if (uncrr_sym & IO7__PO7_UNCRR_SYM__OVF__READIO) + printk("%s Read I/O Buffer Overflow\n", + err_print_prefix); + if (uncrr_sym & IO7__PO7_UNCRR_SYM__OVF__WRITEIO) + printk("%s Write I/O Buffer Overflow\n", + err_print_prefix); + if (uncrr_sym & IO7__PO7_UNCRR_SYM__OVF__FWD) + printk("%s FWD Buffer Overflow\n", + err_print_prefix); + + if ((scratch = EXTRACT(uncrr_sym, IO7__PO7_UNCRR_SYM__VICTIM_SP))) { + int lost = scratch & (1UL << 4); + scratch &= ~lost; + for (i = 0; i < 8; i++, scratch >>= 1) { + if (!(scratch & 1)) + continue; + printk("%s Error Response sent to %s", + err_print_prefix, port_names[i]); + } + if (lost) + printk("%s Lost Error sent somewhere else\n", + err_print_prefix); + } + + if ((scratch = EXTRACT(uncrr_sym, IO7__PO7_UNCRR_SYM__DETECT_SP))) { + for (i = 0; i < 8; i++, scratch >>= 1) { + if (!(scratch & 1)) + continue; + printk("%s Error Reported by %s", + err_print_prefix, port_names[i]); + } + } + + if (EXTRACT(valid_mask, IO7__PO7_UNCRR_SYM__STRV_VTR)) { + char starvation_message[80]; + + scratch = EXTRACT(uncrr_sym, IO7__PO7_UNCRR_SYM__STRV_VTR); + if (scratch & IO7__STRV_VTR__IS_MSI) + sprintf(starvation_message, + "MSI Interrupt 0x%x", + EXTRACT(scratch, IO7__STRV_VTR__MSI__INTNUM)); + else + sprintf(starvation_message, + "LSI INT%c for Bus:Slot (%d:%d)\n", + 'A' + EXTRACT(scratch, + IO7__STRV_VTR__LSI__INTX), + EXTRACT(scratch, IO7__STRV_VTR__LSI__BUS), + EXTRACT(scratch, IO7__STRV_VTR__LSI__SLOT)); + + printk("%s Starvation Int Trigger By: %s\n", + err_print_prefix, starvation_message); + } +} + +static void +marvel_print_po7_ugbge_sym(u64 ugbge_sym) +{ + char opcode_str[10]; + +#define IO7__PO7_UGBGE_SYM__UPH_PKT_OFF__S (6) +#define IO7__PO7_UGBGE_SYM__UPH_PKT_OFF__M (0xfffffffful) +#define IO7__PO7_UGBGE_SYM__UPH_OPCODE__S (40) +#define IO7__PO7_UGBGE_SYM__UPH_OPCODE__M (0xff) +#define IO7__PO7_UGBGE_SYM__UPH_SRC_PORT__S (48) +#define IO7__PO7_UGBGE_SYM__UPH_SRC_PORT__M (0xf) +#define IO7__PO7_UGBGE_SYM__UPH_DEST_PID__S (52) +#define IO7__PO7_UGBGE_SYM__UPH_DEST_PID__M (0x7ff) +#define IO7__PO7_UGBGE_SYM__VALID (1UL << 63) + + if (!(ugbge_sym & IO7__PO7_UGBGE_SYM__VALID)) + return; + + switch(EXTRACT(ugbge_sym, IO7__PO7_UGBGE_SYM__UPH_OPCODE)) { + case 0x51: + sprintf(opcode_str, "Wr32"); + break; + case 0x50: + sprintf(opcode_str, "WrQW"); + break; + case 0x54: + sprintf(opcode_str, "WrIPR"); + break; + case 0xD8: + sprintf(opcode_str, "Victim"); + break; + case 0xC5: + sprintf(opcode_str, "BlkIO"); + break; + default: + sprintf(opcode_str, "0x%llx\n", + EXTRACT(ugbge_sym, IO7__PO7_UGBGE_SYM__UPH_OPCODE)); + break; + } + + printk("%s Up Hose Garbage Symptom:\n" + "%s Source Port: %lld - Dest PID: %lld - OpCode: %s\n", + err_print_prefix, + err_print_prefix, + EXTRACT(ugbge_sym, IO7__PO7_UGBGE_SYM__UPH_SRC_PORT), + EXTRACT(ugbge_sym, IO7__PO7_UGBGE_SYM__UPH_DEST_PID), + opcode_str); + + if (0xC5 != EXTRACT(ugbge_sym, IO7__PO7_UGBGE_SYM__UPH_OPCODE)) + printk("%s Packet Offset 0x%08llx\n", + err_print_prefix, + EXTRACT(ugbge_sym, IO7__PO7_UGBGE_SYM__UPH_PKT_OFF)); +} + +static void +marvel_print_po7_err_sum(struct ev7_pal_io_subpacket *io) +{ + u64 uncrr_sym_valid = 0; + +#define IO7__PO7_ERRSUM__CR_SBE (1UL << 32) +#define IO7__PO7_ERRSUM__CR_SBE2 (1UL << 33) +#define IO7__PO7_ERRSUM__CR_PIO_WBYTE (1UL << 34) +#define IO7__PO7_ERRSUM__CR_CSR_NXM (1UL << 35) +#define IO7__PO7_ERRSUM__CR_RPID_ACV (1UL << 36) +#define IO7__PO7_ERRSUM__CR_RSP_NXM (1UL << 37) +#define IO7__PO7_ERRSUM__CR_ERR_RESP (1UL << 38) +#define IO7__PO7_ERRSUM__CR_CLK_DERR (1UL << 39) +#define IO7__PO7_ERRSUM__CR_DAT_DBE (1UL << 40) +#define IO7__PO7_ERRSUM__CR_DAT_GRBG (1UL << 41) +#define IO7__PO7_ERRSUM__MAF_TO (1UL << 42) +#define IO7__PO7_ERRSUM__UGBGE (1UL << 43) +#define IO7__PO7_ERRSUM__UN_MAF_LOST (1UL << 44) +#define IO7__PO7_ERRSUM__UN_PKT_OVF (1UL << 45) +#define IO7__PO7_ERRSUM__UN_CDT_OVF (1UL << 46) +#define IO7__PO7_ERRSUM__UN_DEALLOC (1UL << 47) +#define IO7__PO7_ERRSUM__BH_CDT_TO (1UL << 51) +#define IO7__PO7_ERRSUM__BH_CLK_HDR (1UL << 52) +#define IO7__PO7_ERRSUM__BH_DBE_HDR (1UL << 53) +#define IO7__PO7_ERRSUM__BH_GBG_HDR (1UL << 54) +#define IO7__PO7_ERRSUM__BH_BAD_CMD (1UL << 55) +#define IO7__PO7_ERRSUM__HLT_INT (1UL << 56) +#define IO7__PO7_ERRSUM__HP_INT (1UL << 57) +#define IO7__PO7_ERRSUM__CRD_INT (1UL << 58) +#define IO7__PO7_ERRSUM__STV_INT (1UL << 59) +#define IO7__PO7_ERRSUM__HRD_INT (1UL << 60) +#define IO7__PO7_ERRSUM__BH_SUM (1UL << 61) +#define IO7__PO7_ERRSUM__ERR_LST (1UL << 62) +#define IO7__PO7_ERRSUM__ERR_VALID (1UL << 63) + +#define IO7__PO7_ERRSUM__ERR_MASK (IO7__PO7_ERRSUM__ERR_VALID | \ + IO7__PO7_ERRSUM__CR_SBE) + + /* + * Single bit errors aren't covered by ERR_VALID. + */ + if (io->po7_error_sum & IO7__PO7_ERRSUM__CR_SBE) { + printk("%s %sSingle Bit Error(s) detected/corrected\n", + err_print_prefix, + (io->po7_error_sum & IO7__PO7_ERRSUM__CR_SBE2) + ? "Multiple " : ""); + marvel_print_po7_crrct_sym(io->po7_crrct_sym); + } + + /* + * Neither are the interrupt status bits + */ + if (io->po7_error_sum & IO7__PO7_ERRSUM__HLT_INT) + printk("%s Halt Interrupt posted", err_print_prefix); + if (io->po7_error_sum & IO7__PO7_ERRSUM__HP_INT) { + printk("%s Hot Plug Event Interrupt posted", + err_print_prefix); + uncrr_sym_valid |= GEN_MASK(IO7__PO7_UNCRR_SYM__DETECT_SP); + } + if (io->po7_error_sum & IO7__PO7_ERRSUM__CRD_INT) + printk("%s Correctable Error Interrupt posted", + err_print_prefix); + if (io->po7_error_sum & IO7__PO7_ERRSUM__STV_INT) { + printk("%s Starvation Interrupt posted", err_print_prefix); + uncrr_sym_valid |= GEN_MASK(IO7__PO7_UNCRR_SYM__STRV_VTR); + } + if (io->po7_error_sum & IO7__PO7_ERRSUM__HRD_INT) { + printk("%s Hard Error Interrupt posted", err_print_prefix); + uncrr_sym_valid |= GEN_MASK(IO7__PO7_UNCRR_SYM__DETECT_SP); + } + + /* + * Everything else is valid only with ERR_VALID, so skip to the end + * (uncrr_sym check) unless ERR_VALID is set. + */ + if (!(io->po7_error_sum & IO7__PO7_ERRSUM__ERR_VALID)) + goto check_uncrr_sym; + + /* + * Since ERR_VALID is set, VICTIM_SP in uncrr_sym is valid. + * For bits [29:0] to also be valid, the following bits must + * not be set: + * CR_PIO_WBYTE CR_CSR_NXM CR_RSP_NXM + * CR_ERR_RESP MAF_TO + */ + uncrr_sym_valid |= GEN_MASK(IO7__PO7_UNCRR_SYM__VICTIM_SP); + if (!(io->po7_error_sum & (IO7__PO7_ERRSUM__CR_PIO_WBYTE | + IO7__PO7_ERRSUM__CR_CSR_NXM | + IO7__PO7_ERRSUM__CR_RSP_NXM | + IO7__PO7_ERRSUM__CR_ERR_RESP | + IO7__PO7_ERRSUM__MAF_TO))) + uncrr_sym_valid |= 0x3ffffffful; + + if (io->po7_error_sum & IO7__PO7_ERRSUM__CR_PIO_WBYTE) + printk("%s Write byte into IO7 CSR\n", err_print_prefix); + if (io->po7_error_sum & IO7__PO7_ERRSUM__CR_CSR_NXM) + printk("%s PIO to non-existent CSR\n", err_print_prefix); + if (io->po7_error_sum & IO7__PO7_ERRSUM__CR_RPID_ACV) + printk("%s Bus Requester PID (Access Violation)\n", + err_print_prefix); + if (io->po7_error_sum & IO7__PO7_ERRSUM__CR_RSP_NXM) + printk("%s Received NXM response from EV7\n", + err_print_prefix); + if (io->po7_error_sum & IO7__PO7_ERRSUM__CR_ERR_RESP) + printk("%s Received ERROR RESPONSE\n", err_print_prefix); + if (io->po7_error_sum & IO7__PO7_ERRSUM__CR_CLK_DERR) + printk("%s Clock error on data flit\n", err_print_prefix); + if (io->po7_error_sum & IO7__PO7_ERRSUM__CR_DAT_DBE) + printk("%s Double Bit Error Data Error Detected\n", + err_print_prefix); + if (io->po7_error_sum & IO7__PO7_ERRSUM__CR_DAT_GRBG) + printk("%s Garbage Encoding Detected on the data\n", + err_print_prefix); + if (io->po7_error_sum & IO7__PO7_ERRSUM__UGBGE) { + printk("%s Garbage Encoding sent up hose\n", + err_print_prefix); + marvel_print_po7_ugbge_sym(io->po7_ugbge_sym); + } + if (io->po7_error_sum & IO7__PO7_ERRSUM__UN_MAF_LOST) + printk("%s Orphan response (unexpected response)\n", + err_print_prefix); + if (io->po7_error_sum & IO7__PO7_ERRSUM__UN_PKT_OVF) + printk("%s Down hose packet overflow\n", err_print_prefix); + if (io->po7_error_sum & IO7__PO7_ERRSUM__UN_CDT_OVF) + printk("%s Down hose credit overflow\n", err_print_prefix); + if (io->po7_error_sum & IO7__PO7_ERRSUM__UN_DEALLOC) + printk("%s Unexpected or bad dealloc field\n", + err_print_prefix); + + /* + * The black hole events. + */ + if (io->po7_error_sum & IO7__PO7_ERRSUM__MAF_TO) + printk("%s BLACK HOLE: Timeout for all responses\n", + err_print_prefix); + if (io->po7_error_sum & IO7__PO7_ERRSUM__BH_CDT_TO) + printk("%s BLACK HOLE: Credit Timeout\n", err_print_prefix); + if (io->po7_error_sum & IO7__PO7_ERRSUM__BH_CLK_HDR) + printk("%s BLACK HOLE: Clock check on header\n", + err_print_prefix); + if (io->po7_error_sum & IO7__PO7_ERRSUM__BH_DBE_HDR) + printk("%s BLACK HOLE: Uncorrectable Error on header\n", + err_print_prefix); + if (io->po7_error_sum & IO7__PO7_ERRSUM__BH_GBG_HDR) + printk("%s BLACK HOLE: Garbage on header\n", + err_print_prefix); + if (io->po7_error_sum & IO7__PO7_ERRSUM__BH_BAD_CMD) + printk("%s BLACK HOLE: Bad EV7 command\n", + err_print_prefix); + + if (io->po7_error_sum & IO7__PO7_ERRSUM__ERR_LST) + printk("%s Lost Error\n", err_print_prefix); + + printk("%s Failing Packet:\n" + "%s Cycle 1: %016llx\n" + "%s Cycle 2: %016llx\n", + err_print_prefix, + err_print_prefix, io->po7_err_pkt0, + err_print_prefix, io->po7_err_pkt1); + /* + * If there are any valid bits in UNCRR sym for this err, + * print UNCRR_SYM as well. + */ +check_uncrr_sym: + if (uncrr_sym_valid) + marvel_print_po7_uncrr_sym(io->po7_uncrr_sym, uncrr_sym_valid); +} + +static void +marvel_print_pox_tlb_err(u64 tlb_err) +{ + static char *tlb_errors[] = { + "No Error", + "North Port Signaled Error fetching TLB entry", + "PTE invalid or UCC or GBG error on this entry", + "Address did not hit any DMA window" + }; + +#define IO7__POX_TLBERR__ERR_VALID (1UL << 63) +#define IO7__POX_TLBERR__ERRCODE__S (0) +#define IO7__POX_TLBERR__ERRCODE__M (0x3) +#define IO7__POX_TLBERR__ERR_TLB_PTR__S (3) +#define IO7__POX_TLBERR__ERR_TLB_PTR__M (0x7) +#define IO7__POX_TLBERR__FADDR__S (6) +#define IO7__POX_TLBERR__FADDR__M (0x3fffffffffful) + + if (!(tlb_err & IO7__POX_TLBERR__ERR_VALID)) + return; + + printk("%s TLB Error on index 0x%llx:\n" + "%s - %s\n" + "%s - Addr: 0x%016llx\n", + err_print_prefix, + EXTRACT(tlb_err, IO7__POX_TLBERR__ERR_TLB_PTR), + err_print_prefix, + tlb_errors[EXTRACT(tlb_err, IO7__POX_TLBERR__ERRCODE)], + err_print_prefix, + EXTRACT(tlb_err, IO7__POX_TLBERR__FADDR) << 6); +} + +static void +marvel_print_pox_spl_cmplt(u64 spl_cmplt) +{ + char message[80]; + +#define IO7__POX_SPLCMPLT__MESSAGE__S (0) +#define IO7__POX_SPLCMPLT__MESSAGE__M (0x0fffffffful) +#define IO7__POX_SPLCMPLT__SOURCE_BUS__S (40) +#define IO7__POX_SPLCMPLT__SOURCE_BUS__M (0xfful) +#define IO7__POX_SPLCMPLT__SOURCE_DEV__S (35) +#define IO7__POX_SPLCMPLT__SOURCE_DEV__M (0x1ful) +#define IO7__POX_SPLCMPLT__SOURCE_FUNC__S (32) +#define IO7__POX_SPLCMPLT__SOURCE_FUNC__M (0x07ul) + +#define IO7__POX_SPLCMPLT__MSG_CLASS__S (28) +#define IO7__POX_SPLCMPLT__MSG_CLASS__M (0xf) +#define IO7__POX_SPLCMPLT__MSG_INDEX__S (20) +#define IO7__POX_SPLCMPLT__MSG_INDEX__M (0xff) +#define IO7__POX_SPLCMPLT__MSG_CLASSINDEX__S (20) +#define IO7__POX_SPLCMPLT__MSG_CLASSINDEX__M (0xfff) +#define IO7__POX_SPLCMPLT__REM_LOWER_ADDR__S (12) +#define IO7__POX_SPLCMPLT__REM_LOWER_ADDR__M (0x7f) +#define IO7__POX_SPLCMPLT__REM_BYTE_COUNT__S (0) +#define IO7__POX_SPLCMPLT__REM_BYTE_COUNT__M (0xfff) + + printk("%s Split Completion Error:\n" + "%s Source (Bus:Dev:Func): %lld:%lld:%lld\n", + err_print_prefix, + err_print_prefix, + EXTRACT(spl_cmplt, IO7__POX_SPLCMPLT__SOURCE_BUS), + EXTRACT(spl_cmplt, IO7__POX_SPLCMPLT__SOURCE_DEV), + EXTRACT(spl_cmplt, IO7__POX_SPLCMPLT__SOURCE_FUNC)); + + switch(EXTRACT(spl_cmplt, IO7__POX_SPLCMPLT__MSG_CLASSINDEX)) { + case 0x000: + sprintf(message, "Normal completion"); + break; + case 0x100: + sprintf(message, "Bridge - Master Abort"); + break; + case 0x101: + sprintf(message, "Bridge - Target Abort"); + break; + case 0x102: + sprintf(message, "Bridge - Uncorrectable Write Data Error"); + break; + case 0x200: + sprintf(message, "Byte Count Out of Range"); + break; + case 0x201: + sprintf(message, "Uncorrectable Split Write Data Error"); + break; + default: + sprintf(message, "%08llx\n", + EXTRACT(spl_cmplt, IO7__POX_SPLCMPLT__MESSAGE)); + break; + } + printk("%s Message: %s\n", err_print_prefix, message); +} + +static void +marvel_print_pox_trans_sum(u64 trans_sum) +{ + static const char * const pcix_cmd[] = { + "Interrupt Acknowledge", + "Special Cycle", + "I/O Read", + "I/O Write", + "Reserved", + "Reserved / Device ID Message", + "Memory Read", + "Memory Write", + "Reserved / Alias to Memory Read Block", + "Reserved / Alias to Memory Write Block", + "Configuration Read", + "Configuration Write", + "Memory Read Multiple / Split Completion", + "Dual Address Cycle", + "Memory Read Line / Memory Read Block", + "Memory Write and Invalidate / Memory Write Block" + }; + +#define IO7__POX_TRANSUM__PCI_ADDR__S (0) +#define IO7__POX_TRANSUM__PCI_ADDR__M (0x3fffffffffffful) +#define IO7__POX_TRANSUM__DAC (1UL << 50) +#define IO7__POX_TRANSUM__PCIX_MASTER_SLOT__S (52) +#define IO7__POX_TRANSUM__PCIX_MASTER_SLOT__M (0xf) +#define IO7__POX_TRANSUM__PCIX_CMD__S (56) +#define IO7__POX_TRANSUM__PCIX_CMD__M (0xf) +#define IO7__POX_TRANSUM__ERR_VALID (1UL << 63) + + if (!(trans_sum & IO7__POX_TRANSUM__ERR_VALID)) + return; + + printk("%s Transaction Summary:\n" + "%s Command: 0x%llx - %s\n" + "%s Address: 0x%016llx%s\n" + "%s PCI-X Master Slot: 0x%llx\n", + err_print_prefix, + err_print_prefix, + EXTRACT(trans_sum, IO7__POX_TRANSUM__PCIX_CMD), + pcix_cmd[EXTRACT(trans_sum, IO7__POX_TRANSUM__PCIX_CMD)], + err_print_prefix, + EXTRACT(trans_sum, IO7__POX_TRANSUM__PCI_ADDR), + (trans_sum & IO7__POX_TRANSUM__DAC) ? " (DAC)" : "", + err_print_prefix, + EXTRACT(trans_sum, IO7__POX_TRANSUM__PCIX_MASTER_SLOT)); +} + +static void +marvel_print_pox_err(u64 err_sum, struct ev7_pal_io_one_port *port) +{ +#define IO7__POX_ERRSUM__AGP_REQQ_OVFL (1UL << 4) +#define IO7__POX_ERRSUM__AGP_SYNC_ERR (1UL << 5) +#define IO7__POX_ERRSUM__MRETRY_TO (1UL << 6) +#define IO7__POX_ERRSUM__PCIX_UX_SPL (1UL << 7) +#define IO7__POX_ERRSUM__PCIX_SPLIT_TO (1UL << 8) +#define IO7__POX_ERRSUM__PCIX_DISCARD_SPL (1UL << 9) +#define IO7__POX_ERRSUM__DMA_RD_TO (1UL << 10) +#define IO7__POX_ERRSUM__CSR_NXM_RD (1UL << 11) +#define IO7__POX_ERRSUM__CSR_NXM_WR (1UL << 12) +#define IO7__POX_ERRSUM__DMA_TO (1UL << 13) +#define IO7__POX_ERRSUM__ALL_MABORTS (1UL << 14) +#define IO7__POX_ERRSUM__MABORT (1UL << 15) +#define IO7__POX_ERRSUM__MABORT_MASK (IO7__POX_ERRSUM__ALL_MABORTS|\ + IO7__POX_ERRSUM__MABORT) +#define IO7__POX_ERRSUM__PT_TABORT (1UL << 16) +#define IO7__POX_ERRSUM__PM_TABORT (1UL << 17) +#define IO7__POX_ERRSUM__TABORT_MASK (IO7__POX_ERRSUM__PT_TABORT | \ + IO7__POX_ERRSUM__PM_TABORT) +#define IO7__POX_ERRSUM__SERR (1UL << 18) +#define IO7__POX_ERRSUM__ADDRERR_STB (1UL << 19) +#define IO7__POX_ERRSUM__DETECTED_SERR (1UL << 20) +#define IO7__POX_ERRSUM__PERR (1UL << 21) +#define IO7__POX_ERRSUM__DATAERR_STB_NIOW (1UL << 22) +#define IO7__POX_ERRSUM__DETECTED_PERR (1UL << 23) +#define IO7__POX_ERRSUM__PM_PERR (1UL << 24) +#define IO7__POX_ERRSUM__PT_SCERROR (1UL << 26) +#define IO7__POX_ERRSUM__HUNG_BUS (1UL << 28) +#define IO7__POX_ERRSUM__UPE_ERROR__S (51) +#define IO7__POX_ERRSUM__UPE_ERROR__M (0xffUL) +#define IO7__POX_ERRSUM__UPE_ERROR GEN_MASK(IO7__POX_ERRSUM__UPE_ERROR) +#define IO7__POX_ERRSUM__TLB_ERR (1UL << 59) +#define IO7__POX_ERRSUM__ERR_VALID (1UL << 63) + +#define IO7__POX_ERRSUM__TRANS_SUM__MASK (IO7__POX_ERRSUM__MRETRY_TO | \ + IO7__POX_ERRSUM__PCIX_UX_SPL | \ + IO7__POX_ERRSUM__PCIX_SPLIT_TO | \ + IO7__POX_ERRSUM__DMA_TO | \ + IO7__POX_ERRSUM__MABORT_MASK | \ + IO7__POX_ERRSUM__TABORT_MASK | \ + IO7__POX_ERRSUM__SERR | \ + IO7__POX_ERRSUM__ADDRERR_STB | \ + IO7__POX_ERRSUM__PERR | \ + IO7__POX_ERRSUM__DATAERR_STB_NIOW |\ + IO7__POX_ERRSUM__DETECTED_PERR | \ + IO7__POX_ERRSUM__PM_PERR | \ + IO7__POX_ERRSUM__PT_SCERROR | \ + IO7__POX_ERRSUM__UPE_ERROR) + + if (!(err_sum & IO7__POX_ERRSUM__ERR_VALID)) + return; + + /* + * First the transaction summary errors + */ + if (err_sum & IO7__POX_ERRSUM__MRETRY_TO) + printk("%s IO7 Master Retry Timeout expired\n", + err_print_prefix); + if (err_sum & IO7__POX_ERRSUM__PCIX_UX_SPL) + printk("%s Unexpected Split Completion\n", + err_print_prefix); + if (err_sum & IO7__POX_ERRSUM__PCIX_SPLIT_TO) + printk("%s IO7 Split Completion Timeout expired\n", + err_print_prefix); + if (err_sum & IO7__POX_ERRSUM__DMA_TO) + printk("%s Hung bus during DMA transaction\n", + err_print_prefix); + if (err_sum & IO7__POX_ERRSUM__MABORT_MASK) + printk("%s Master Abort\n", err_print_prefix); + if (err_sum & IO7__POX_ERRSUM__PT_TABORT) + printk("%s IO7 Asserted Target Abort\n", err_print_prefix); + if (err_sum & IO7__POX_ERRSUM__PM_TABORT) + printk("%s IO7 Received Target Abort\n", err_print_prefix); + if (err_sum & IO7__POX_ERRSUM__ADDRERR_STB) { + printk("%s Address or PCI-X Attribute Parity Error\n", + err_print_prefix); + if (err_sum & IO7__POX_ERRSUM__SERR) + printk("%s IO7 Asserted SERR\n", err_print_prefix); + } + if (err_sum & IO7__POX_ERRSUM__PERR) { + if (err_sum & IO7__POX_ERRSUM__DATAERR_STB_NIOW) + printk("%s IO7 Detected Data Parity Error\n", + err_print_prefix); + else + printk("%s Split Completion Response with " + "Parity Error\n", err_print_prefix); + } + if (err_sum & IO7__POX_ERRSUM__DETECTED_PERR) + printk("%s PERR detected\n", err_print_prefix); + if (err_sum & IO7__POX_ERRSUM__PM_PERR) + printk("%s PERR while IO7 is master\n", err_print_prefix); + if (err_sum & IO7__POX_ERRSUM__PT_SCERROR) { + printk("%s IO7 Received Split Completion Error message\n", + err_print_prefix); + marvel_print_pox_spl_cmplt(port->pox_spl_cmplt); + } + if (err_sum & IO7__POX_ERRSUM__UPE_ERROR) { + unsigned int upe_error = EXTRACT(err_sum, + IO7__POX_ERRSUM__UPE_ERROR); + int i; + static char *upe_errors[] = { + "Parity Error on MSI write data", + "MSI read (MSI window is write only", + "TLB - Invalid WR transaction", + "TLB - Invalid RD transaction", + "DMA - WR error (see north port)", + "DMA - RD error (see north port)", + "PPR - WR error (see north port)", + "PPR - RD error (see north port)" + }; + + printk("%s UPE Error:\n", err_print_prefix); + for (i = 0; i < 8; i++) { + if (upe_error & (1 << i)) + printk("%s %s\n", err_print_prefix, + upe_errors[i]); + } + } + + /* + * POx_TRANS_SUM, if appropriate. + */ + if (err_sum & IO7__POX_ERRSUM__TRANS_SUM__MASK) + marvel_print_pox_trans_sum(port->pox_trans_sum); + + /* + * Then TLB_ERR. + */ + if (err_sum & IO7__POX_ERRSUM__TLB_ERR) { + printk("%s TLB ERROR\n", err_print_prefix); + marvel_print_pox_tlb_err(port->pox_tlb_err); + } + + /* + * And the single bit status errors. + */ + if (err_sum & IO7__POX_ERRSUM__AGP_REQQ_OVFL) + printk("%s AGP Request Queue Overflow\n", err_print_prefix); + if (err_sum & IO7__POX_ERRSUM__AGP_SYNC_ERR) + printk("%s AGP Sync Error\n", err_print_prefix); + if (err_sum & IO7__POX_ERRSUM__PCIX_DISCARD_SPL) + printk("%s Discarded split completion\n", err_print_prefix); + if (err_sum & IO7__POX_ERRSUM__DMA_RD_TO) + printk("%s DMA Read Timeout\n", err_print_prefix); + if (err_sum & IO7__POX_ERRSUM__CSR_NXM_RD) + printk("%s CSR NXM READ\n", err_print_prefix); + if (err_sum & IO7__POX_ERRSUM__CSR_NXM_WR) + printk("%s CSR NXM WRITE\n", err_print_prefix); + if (err_sum & IO7__POX_ERRSUM__DETECTED_SERR) + printk("%s SERR detected\n", err_print_prefix); + if (err_sum & IO7__POX_ERRSUM__HUNG_BUS) + printk("%s HUNG BUS detected\n", err_print_prefix); +} + +#endif /* CONFIG_VERBOSE_MCHECK */ + +static struct ev7_pal_io_subpacket * +marvel_find_io7_with_error(struct ev7_lf_subpackets *lf_subpackets) +{ + struct ev7_pal_io_subpacket *io = lf_subpackets->io; + struct io7 *io7; + int i; + + /* + * Caller must provide the packet to fill + */ + if (!io) + return NULL; + + /* + * Fill the subpacket with the console's standard fill pattern + */ + memset(io, 0x55, sizeof(*io)); + + for (io7 = NULL; NULL != (io7 = marvel_next_io7(io7)); ) { + unsigned long err_sum = 0; + + err_sum |= io7->csrs->PO7_ERROR_SUM.csr; + for (i = 0; i < IO7_NUM_PORTS; i++) { + if (!io7->ports[i].enabled) + continue; + err_sum |= io7->ports[i].csrs->POx_ERR_SUM.csr; + } + + /* + * Is there at least one error? + */ + if (err_sum & (1UL << 63)) + break; + } + + /* + * Did we find an IO7 with an error? + */ + if (!io7) + return NULL; + + /* + * We have an IO7 with an error. + * + * Fill in the IO subpacket. + */ + io->io_asic_rev = io7->csrs->IO_ASIC_REV.csr; + io->io_sys_rev = io7->csrs->IO_SYS_REV.csr; + io->io7_uph = io7->csrs->IO7_UPH.csr; + io->hpi_ctl = io7->csrs->HPI_CTL.csr; + io->crd_ctl = io7->csrs->CRD_CTL.csr; + io->hei_ctl = io7->csrs->HEI_CTL.csr; + io->po7_error_sum = io7->csrs->PO7_ERROR_SUM.csr; + io->po7_uncrr_sym = io7->csrs->PO7_UNCRR_SYM.csr; + io->po7_crrct_sym = io7->csrs->PO7_CRRCT_SYM.csr; + io->po7_ugbge_sym = io7->csrs->PO7_UGBGE_SYM.csr; + io->po7_err_pkt0 = io7->csrs->PO7_ERR_PKT[0].csr; + io->po7_err_pkt1 = io7->csrs->PO7_ERR_PKT[1].csr; + + for (i = 0; i < IO7_NUM_PORTS; i++) { + io7_ioport_csrs *csrs = io7->ports[i].csrs; + + if (!io7->ports[i].enabled) + continue; + + io->ports[i].pox_err_sum = csrs->POx_ERR_SUM.csr; + io->ports[i].pox_tlb_err = csrs->POx_TLB_ERR.csr; + io->ports[i].pox_spl_cmplt = csrs->POx_SPL_COMPLT.csr; + io->ports[i].pox_trans_sum = csrs->POx_TRANS_SUM.csr; + io->ports[i].pox_first_err = csrs->POx_FIRST_ERR.csr; + io->ports[i].pox_mult_err = csrs->POx_MULT_ERR.csr; + io->ports[i].pox_dm_source = csrs->POx_DM_SOURCE.csr; + io->ports[i].pox_dm_dest = csrs->POx_DM_DEST.csr; + io->ports[i].pox_dm_size = csrs->POx_DM_SIZE.csr; + io->ports[i].pox_dm_ctrl = csrs->POx_DM_CTRL.csr; + + /* + * Ack this port's errors, if any. POx_ERR_SUM must be last. + * + * Most of the error registers get cleared and unlocked when + * the associated bits in POx_ERR_SUM are cleared (by writing + * 1). POx_TLB_ERR is an exception and must be explicitly + * cleared. + */ + csrs->POx_TLB_ERR.csr = io->ports[i].pox_tlb_err; + csrs->POx_ERR_SUM.csr = io->ports[i].pox_err_sum; + mb(); + csrs->POx_ERR_SUM.csr; + } + + /* + * Ack any port 7 error(s). + */ + io7->csrs->PO7_ERROR_SUM.csr = io->po7_error_sum; + mb(); + io7->csrs->PO7_ERROR_SUM.csr; + + /* + * Correct the io7_pid. + */ + lf_subpackets->io_pid = io7->pe; + + return io; +} + +static int +marvel_process_io_error(struct ev7_lf_subpackets *lf_subpackets, int print) +{ + int status = MCHK_DISPOSITION_UNKNOWN_ERROR; + +#ifdef CONFIG_VERBOSE_MCHECK + struct ev7_pal_io_subpacket *io = lf_subpackets->io; + int i; +#endif /* CONFIG_VERBOSE_MCHECK */ + +#define MARVEL_IO_ERR_VALID(x) ((x) & (1UL << 63)) + + if (!lf_subpackets->logout || !lf_subpackets->io) + return status; + + /* + * The PALcode only builds an IO subpacket if there is a + * locally connected IO7. In the cases of + * 1) a uniprocessor kernel + * 2) an mp kernel before the local secondary has called in + * error interrupts are all directed to the primary processor. + * In that case, we may not have an IO subpacket at all and, event + * if we do, it may not be the right now. + * + * If the RBOX indicates an I/O error interrupt, make sure we have + * the correct IO7 information. If we don't have an IO subpacket + * or it's the wrong one, try to find the right one. + * + * RBOX I/O error interrupts are indicated by RBOX_INT<29> and + * RBOX_INT<10>. + */ + if ((lf_subpackets->io->po7_error_sum & (1UL << 32)) || + ((lf_subpackets->io->po7_error_sum | + lf_subpackets->io->ports[0].pox_err_sum | + lf_subpackets->io->ports[1].pox_err_sum | + lf_subpackets->io->ports[2].pox_err_sum | + lf_subpackets->io->ports[3].pox_err_sum) & (1UL << 63))) { + /* + * Either we have no IO subpacket or no error is + * indicated in the one we do have. Try find the + * one with the error. + */ + if (!marvel_find_io7_with_error(lf_subpackets)) + return status; + } + + /* + * We have an IO7 indicating an error - we're going to report it + */ + status = MCHK_DISPOSITION_REPORT; + +#ifdef CONFIG_VERBOSE_MCHECK + + if (!print) + return status; + + printk("%s*Error occurred on IO7 at PID %u\n", + err_print_prefix, lf_subpackets->io_pid); + + /* + * Check port 7 first + */ + if (lf_subpackets->io->po7_error_sum & IO7__PO7_ERRSUM__ERR_MASK) { + marvel_print_po7_err_sum(io); + +#if 0 + printk("%s PORT 7 ERROR:\n" + "%s PO7_ERROR_SUM: %016llx\n" + "%s PO7_UNCRR_SYM: %016llx\n" + "%s PO7_CRRCT_SYM: %016llx\n" + "%s PO7_UGBGE_SYM: %016llx\n" + "%s PO7_ERR_PKT0: %016llx\n" + "%s PO7_ERR_PKT1: %016llx\n", + err_print_prefix, + err_print_prefix, io->po7_error_sum, + err_print_prefix, io->po7_uncrr_sym, + err_print_prefix, io->po7_crrct_sym, + err_print_prefix, io->po7_ugbge_sym, + err_print_prefix, io->po7_err_pkt0, + err_print_prefix, io->po7_err_pkt1); +#endif + } + + /* + * Then loop through the ports + */ + for (i = 0; i < IO7_NUM_PORTS; i++) { + if (!MARVEL_IO_ERR_VALID(io->ports[i].pox_err_sum)) + continue; + + printk("%s PID %u PORT %d POx_ERR_SUM: %016llx\n", + err_print_prefix, + lf_subpackets->io_pid, i, io->ports[i].pox_err_sum); + marvel_print_pox_err(io->ports[i].pox_err_sum, &io->ports[i]); + + printk("%s [ POx_FIRST_ERR: %016llx ]\n", + err_print_prefix, io->ports[i].pox_first_err); + marvel_print_pox_err(io->ports[i].pox_first_err, + &io->ports[i]); + + } + + +#endif /* CONFIG_VERBOSE_MCHECK */ + + return status; +} + +static int +marvel_process_logout_frame(struct ev7_lf_subpackets *lf_subpackets, int print) +{ + int status = MCHK_DISPOSITION_UNKNOWN_ERROR; + + /* + * I/O error? + */ +#define EV7__RBOX_INT__IO_ERROR__MASK 0x20000400ul + if (lf_subpackets->logout && + (lf_subpackets->logout->rbox_int & 0x20000400ul)) + status = marvel_process_io_error(lf_subpackets, print); + + /* + * Probing behind PCI-X bridges can cause machine checks on + * Marvel when the probe is handled by the bridge as a split + * completion transaction. The symptom is an ERROR_RESPONSE + * to a CONFIG address. Since these errors will happen in + * normal operation, dismiss them. + * + * Dismiss if: + * C_STAT = 0x14 (Error Response) + * C_STS<3> = 0 (C_ADDR valid) + * C_ADDR<42> = 1 (I/O) + * C_ADDR<31:22> = 111110xxb (PCI Config space) + */ + if (lf_subpackets->ev7 && + (lf_subpackets->ev7->c_stat == 0x14) && + !(lf_subpackets->ev7->c_sts & 0x8) && + ((lf_subpackets->ev7->c_addr & 0x400ff000000ul) + == 0x400fe000000ul)) + status = MCHK_DISPOSITION_DISMISS; + + return status; +} + +void +marvel_machine_check(unsigned long vector, unsigned long la_ptr) +{ + struct el_subpacket *el_ptr = (struct el_subpacket *)la_ptr; + int (*process_frame)(struct ev7_lf_subpackets *, int) = NULL; + struct ev7_lf_subpackets subpacket_collection = { NULL, }; + struct ev7_pal_io_subpacket scratch_io_packet = { 0, }; + struct ev7_lf_subpackets *lf_subpackets = NULL; + int disposition = MCHK_DISPOSITION_UNKNOWN_ERROR; + char *saved_err_prefix = err_print_prefix; + char *error_type = NULL; + + /* + * Sync the processor + */ + mb(); + draina(); + + switch(vector) { + case SCB_Q_SYSEVENT: + process_frame = marvel_process_680_frame; + error_type = "System Event"; + break; + + case SCB_Q_SYSMCHK: + process_frame = marvel_process_logout_frame; + error_type = "System Uncorrectable Error"; + break; + + case SCB_Q_SYSERR: + process_frame = marvel_process_logout_frame; + error_type = "System Correctable Error"; + break; + + default: + /* Don't know it - pass it up. */ + ev7_machine_check(vector, la_ptr); + return; + } + + /* + * A system event or error has occurred, handle it here. + * + * Any errors in the logout frame have already been cleared by the + * PALcode, so just parse it. + */ + err_print_prefix = KERN_CRIT; + + /* + * Parse the logout frame without printing first. If the only error(s) + * found are classified as "dismissable", then just dismiss them and + * don't print any message + */ + lf_subpackets = + ev7_collect_logout_frame_subpackets(el_ptr, + &subpacket_collection); + if (process_frame && lf_subpackets && lf_subpackets->logout) { + /* + * We might not have the correct (or any) I/O subpacket. + * [ See marvel_process_io_error() for explanation. ] + * If we don't have one, point the io subpacket in + * lf_subpackets at scratch_io_packet so that + * marvel_find_io7_with_error() will have someplace to + * store the info. + */ + if (!lf_subpackets->io) + lf_subpackets->io = &scratch_io_packet; + + /* + * Default io_pid to the processor reporting the error + * [this will get changed in marvel_find_io7_with_error() + * if a different one is needed] + */ + lf_subpackets->io_pid = lf_subpackets->logout->whami; + + /* + * Evaluate the frames. + */ + disposition = process_frame(lf_subpackets, 0); + } + switch(disposition) { + case MCHK_DISPOSITION_DISMISS: + /* Nothing to do. */ + break; + + case MCHK_DISPOSITION_REPORT: + /* Recognized error, report it. */ + printk("%s*%s (Vector 0x%x) reported on CPU %d\n", + err_print_prefix, error_type, + (unsigned int)vector, (int)smp_processor_id()); + el_print_timestamp(&lf_subpackets->logout->timestamp); + process_frame(lf_subpackets, 1); + break; + + default: + /* Unknown - dump the annotated subpackets. */ + printk("%s*%s (Vector 0x%x) reported on CPU %d\n", + err_print_prefix, error_type, + (unsigned int)vector, (int)smp_processor_id()); + el_process_subpacket(el_ptr); + break; + + } + + err_print_prefix = saved_err_prefix; + + /* Release the logout frame. */ + wrmces(0x7); + mb(); +} + +void __init +marvel_register_error_handlers(void) +{ + ev7_register_error_handlers(); +} diff --git a/arch/alpha/kernel/err_titan.c b/arch/alpha/kernel/err_titan.c new file mode 100644 index 00000000..14b26c46 --- /dev/null +++ b/arch/alpha/kernel/err_titan.c @@ -0,0 +1,760 @@ +/* + * linux/arch/alpha/kernel/err_titan.c + * + * Copyright (C) 2000 Jeff Wiedemeier (Compaq Computer Corporation) + * + * Error handling code supporting TITAN systems + */ + +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/sched.h> + +#include <asm/io.h> +#include <asm/core_titan.h> +#include <asm/hwrpb.h> +#include <asm/smp.h> +#include <asm/err_common.h> +#include <asm/err_ev6.h> +#include <asm/irq_regs.h> + +#include "err_impl.h" +#include "proto.h" + + +static int +titan_parse_c_misc(u64 c_misc, int print) +{ +#ifdef CONFIG_VERBOSE_MCHECK + char *src; + int nxs = 0; +#endif + int status = MCHK_DISPOSITION_REPORT; + +#define TITAN__CCHIP_MISC__NXM (1UL << 28) +#define TITAN__CCHIP_MISC__NXS__S (29) +#define TITAN__CCHIP_MISC__NXS__M (0x7) + + if (!(c_misc & TITAN__CCHIP_MISC__NXM)) + return MCHK_DISPOSITION_UNKNOWN_ERROR; + +#ifdef CONFIG_VERBOSE_MCHECK + if (!print) + return status; + + nxs = EXTRACT(c_misc, TITAN__CCHIP_MISC__NXS); + switch(nxs) { + case 0: /* CPU 0 */ + case 1: /* CPU 1 */ + case 2: /* CPU 2 */ + case 3: /* CPU 3 */ + src = "CPU"; + /* num is already the CPU number */ + break; + case 4: /* Pchip 0 */ + case 5: /* Pchip 1 */ + src = "Pchip"; + nxs -= 4; + break; + default:/* reserved */ + src = "Unknown, NXS ="; + /* leave num untouched */ + break; + } + + printk("%s Non-existent memory access from: %s %d\n", + err_print_prefix, src, nxs); +#endif /* CONFIG_VERBOSE_MCHECK */ + + return status; +} + +static int +titan_parse_p_serror(int which, u64 serror, int print) +{ + int status = MCHK_DISPOSITION_REPORT; + +#ifdef CONFIG_VERBOSE_MCHECK + static const char * const serror_src[] = { + "GPCI", "APCI", "AGP HP", "AGP LP" + }; + static const char * const serror_cmd[] = { + "DMA Read", "DMA RMW", "SGTE Read", "Reserved" + }; +#endif /* CONFIG_VERBOSE_MCHECK */ + +#define TITAN__PCHIP_SERROR__LOST_UECC (1UL << 0) +#define TITAN__PCHIP_SERROR__UECC (1UL << 1) +#define TITAN__PCHIP_SERROR__CRE (1UL << 2) +#define TITAN__PCHIP_SERROR__NXIO (1UL << 3) +#define TITAN__PCHIP_SERROR__LOST_CRE (1UL << 4) +#define TITAN__PCHIP_SERROR__ECCMASK (TITAN__PCHIP_SERROR__UECC | \ + TITAN__PCHIP_SERROR__CRE) +#define TITAN__PCHIP_SERROR__ERRMASK (TITAN__PCHIP_SERROR__LOST_UECC | \ + TITAN__PCHIP_SERROR__UECC | \ + TITAN__PCHIP_SERROR__CRE | \ + TITAN__PCHIP_SERROR__NXIO | \ + TITAN__PCHIP_SERROR__LOST_CRE) +#define TITAN__PCHIP_SERROR__SRC__S (52) +#define TITAN__PCHIP_SERROR__SRC__M (0x3) +#define TITAN__PCHIP_SERROR__CMD__S (54) +#define TITAN__PCHIP_SERROR__CMD__M (0x3) +#define TITAN__PCHIP_SERROR__SYN__S (56) +#define TITAN__PCHIP_SERROR__SYN__M (0xff) +#define TITAN__PCHIP_SERROR__ADDR__S (15) +#define TITAN__PCHIP_SERROR__ADDR__M (0xffffffffUL) + + if (!(serror & TITAN__PCHIP_SERROR__ERRMASK)) + return MCHK_DISPOSITION_UNKNOWN_ERROR; + +#ifdef CONFIG_VERBOSE_MCHECK + if (!print) + return status; + + printk("%s PChip %d SERROR: %016llx\n", + err_print_prefix, which, serror); + if (serror & TITAN__PCHIP_SERROR__ECCMASK) { + printk("%s %sorrectable ECC Error:\n" + " Source: %-6s Command: %-8s Syndrome: 0x%08x\n" + " Address: 0x%llx\n", + err_print_prefix, + (serror & TITAN__PCHIP_SERROR__UECC) ? "Unc" : "C", + serror_src[EXTRACT(serror, TITAN__PCHIP_SERROR__SRC)], + serror_cmd[EXTRACT(serror, TITAN__PCHIP_SERROR__CMD)], + (unsigned)EXTRACT(serror, TITAN__PCHIP_SERROR__SYN), + EXTRACT(serror, TITAN__PCHIP_SERROR__ADDR)); + } + if (serror & TITAN__PCHIP_SERROR__NXIO) + printk("%s Non Existent I/O Error\n", err_print_prefix); + if (serror & TITAN__PCHIP_SERROR__LOST_UECC) + printk("%s Lost Uncorrectable ECC Error\n", + err_print_prefix); + if (serror & TITAN__PCHIP_SERROR__LOST_CRE) + printk("%s Lost Correctable ECC Error\n", err_print_prefix); +#endif /* CONFIG_VERBOSE_MCHECK */ + + return status; +} + +static int +titan_parse_p_perror(int which, int port, u64 perror, int print) +{ + int cmd; + unsigned long addr; + int status = MCHK_DISPOSITION_REPORT; + +#ifdef CONFIG_VERBOSE_MCHECK + static const char * const perror_cmd[] = { + "Interrupt Acknowledge", "Special Cycle", + "I/O Read", "I/O Write", + "Reserved", "Reserved", + "Memory Read", "Memory Write", + "Reserved", "Reserved", + "Configuration Read", "Configuration Write", + "Memory Read Multiple", "Dual Address Cycle", + "Memory Read Line", "Memory Write and Invalidate" + }; +#endif /* CONFIG_VERBOSE_MCHECK */ + +#define TITAN__PCHIP_PERROR__LOST (1UL << 0) +#define TITAN__PCHIP_PERROR__SERR (1UL << 1) +#define TITAN__PCHIP_PERROR__PERR (1UL << 2) +#define TITAN__PCHIP_PERROR__DCRTO (1UL << 3) +#define TITAN__PCHIP_PERROR__SGE (1UL << 4) +#define TITAN__PCHIP_PERROR__APE (1UL << 5) +#define TITAN__PCHIP_PERROR__TA (1UL << 6) +#define TITAN__PCHIP_PERROR__DPE (1UL << 7) +#define TITAN__PCHIP_PERROR__NDS (1UL << 8) +#define TITAN__PCHIP_PERROR__IPTPR (1UL << 9) +#define TITAN__PCHIP_PERROR__IPTPW (1UL << 10) +#define TITAN__PCHIP_PERROR__ERRMASK (TITAN__PCHIP_PERROR__LOST | \ + TITAN__PCHIP_PERROR__SERR | \ + TITAN__PCHIP_PERROR__PERR | \ + TITAN__PCHIP_PERROR__DCRTO | \ + TITAN__PCHIP_PERROR__SGE | \ + TITAN__PCHIP_PERROR__APE | \ + TITAN__PCHIP_PERROR__TA | \ + TITAN__PCHIP_PERROR__DPE | \ + TITAN__PCHIP_PERROR__NDS | \ + TITAN__PCHIP_PERROR__IPTPR | \ + TITAN__PCHIP_PERROR__IPTPW) +#define TITAN__PCHIP_PERROR__DAC (1UL << 47) +#define TITAN__PCHIP_PERROR__MWIN (1UL << 48) +#define TITAN__PCHIP_PERROR__CMD__S (52) +#define TITAN__PCHIP_PERROR__CMD__M (0x0f) +#define TITAN__PCHIP_PERROR__ADDR__S (14) +#define TITAN__PCHIP_PERROR__ADDR__M (0x1fffffffful) + + if (!(perror & TITAN__PCHIP_PERROR__ERRMASK)) + return MCHK_DISPOSITION_UNKNOWN_ERROR; + + cmd = EXTRACT(perror, TITAN__PCHIP_PERROR__CMD); + addr = EXTRACT(perror, TITAN__PCHIP_PERROR__ADDR) << 2; + + /* + * Initializing the BIOS on a video card on a bus without + * a south bridge (subtractive decode agent) can result in + * master aborts as the BIOS probes the capabilities of the + * card. XFree86 does such initialization. If the error + * is a master abort (No DevSel as PCI Master) and the command + * is an I/O read or write below the address where we start + * assigning PCI I/O spaces (SRM uses 0x1000), then mark the + * error as dismissable so starting XFree86 doesn't result + * in a series of uncorrectable errors being reported. Also + * dismiss master aborts to VGA frame buffer space + * (0xA0000 - 0xC0000) and legacy BIOS space (0xC0000 - 0x100000) + * for the same reason. + * + * Also mark the error dismissible if it looks like the right + * error but only the Lost bit is set. Since the BIOS initialization + * can cause multiple master aborts and the error interrupt can + * be handled on a different CPU than the BIOS code is run on, + * it is possible for a second master abort to occur between the + * time the PALcode reads PERROR and the time it writes PERROR + * to acknowledge the error. If this timing happens, a second + * error will be signalled after the first, and if no additional + * errors occur, will look like a Lost error with no additional + * errors on the same transaction as the previous error. + */ + if (((perror & TITAN__PCHIP_PERROR__NDS) || + ((perror & TITAN__PCHIP_PERROR__ERRMASK) == + TITAN__PCHIP_PERROR__LOST)) && + ((((cmd & 0xE) == 2) && (addr < 0x1000)) || + (((cmd & 0xE) == 6) && (addr >= 0xA0000) && (addr < 0x100000)))) { + status = MCHK_DISPOSITION_DISMISS; + } + +#ifdef CONFIG_VERBOSE_MCHECK + if (!print) + return status; + + printk("%s PChip %d %cPERROR: %016llx\n", + err_print_prefix, which, + port ? 'A' : 'G', perror); + if (perror & TITAN__PCHIP_PERROR__IPTPW) + printk("%s Invalid Peer-to-Peer Write\n", err_print_prefix); + if (perror & TITAN__PCHIP_PERROR__IPTPR) + printk("%s Invalid Peer-to-Peer Read\n", err_print_prefix); + if (perror & TITAN__PCHIP_PERROR__NDS) + printk("%s No DEVSEL as PCI Master [Master Abort]\n", + err_print_prefix); + if (perror & TITAN__PCHIP_PERROR__DPE) + printk("%s Data Parity Error\n", err_print_prefix); + if (perror & TITAN__PCHIP_PERROR__TA) + printk("%s Target Abort\n", err_print_prefix); + if (perror & TITAN__PCHIP_PERROR__APE) + printk("%s Address Parity Error\n", err_print_prefix); + if (perror & TITAN__PCHIP_PERROR__SGE) + printk("%s Scatter-Gather Error, Invalid PTE\n", + err_print_prefix); + if (perror & TITAN__PCHIP_PERROR__DCRTO) + printk("%s Delayed-Completion Retry Timeout\n", + err_print_prefix); + if (perror & TITAN__PCHIP_PERROR__PERR) + printk("%s PERR Asserted\n", err_print_prefix); + if (perror & TITAN__PCHIP_PERROR__SERR) + printk("%s SERR Asserted\n", err_print_prefix); + if (perror & TITAN__PCHIP_PERROR__LOST) + printk("%s Lost Error\n", err_print_prefix); + printk("%s Command: 0x%x - %s\n" + " Address: 0x%lx\n", + err_print_prefix, + cmd, perror_cmd[cmd], + addr); + if (perror & TITAN__PCHIP_PERROR__DAC) + printk("%s Dual Address Cycle\n", err_print_prefix); + if (perror & TITAN__PCHIP_PERROR__MWIN) + printk("%s Hit in Monster Window\n", err_print_prefix); +#endif /* CONFIG_VERBOSE_MCHECK */ + + return status; +} + +static int +titan_parse_p_agperror(int which, u64 agperror, int print) +{ + int status = MCHK_DISPOSITION_REPORT; +#ifdef CONFIG_VERBOSE_MCHECK + int cmd, len; + unsigned long addr; + + static const char * const agperror_cmd[] = { + "Read (low-priority)", "Read (high-priority)", + "Write (low-priority)", "Write (high-priority)", + "Reserved", "Reserved", + "Flush", "Fence" + }; +#endif /* CONFIG_VERBOSE_MCHECK */ + +#define TITAN__PCHIP_AGPERROR__LOST (1UL << 0) +#define TITAN__PCHIP_AGPERROR__LPQFULL (1UL << 1) +#define TITAN__PCHIP_AGPERROR__HPQFULL (1UL << 2) +#define TITAN__PCHIP_AGPERROR__RESCMD (1UL << 3) +#define TITAN__PCHIP_AGPERROR__IPTE (1UL << 4) +#define TITAN__PCHIP_AGPERROR__PTP (1UL << 5) +#define TITAN__PCHIP_AGPERROR__NOWINDOW (1UL << 6) +#define TITAN__PCHIP_AGPERROR__ERRMASK (TITAN__PCHIP_AGPERROR__LOST | \ + TITAN__PCHIP_AGPERROR__LPQFULL | \ + TITAN__PCHIP_AGPERROR__HPQFULL | \ + TITAN__PCHIP_AGPERROR__RESCMD | \ + TITAN__PCHIP_AGPERROR__IPTE | \ + TITAN__PCHIP_AGPERROR__PTP | \ + TITAN__PCHIP_AGPERROR__NOWINDOW) +#define TITAN__PCHIP_AGPERROR__DAC (1UL << 48) +#define TITAN__PCHIP_AGPERROR__MWIN (1UL << 49) +#define TITAN__PCHIP_AGPERROR__FENCE (1UL << 59) +#define TITAN__PCHIP_AGPERROR__CMD__S (50) +#define TITAN__PCHIP_AGPERROR__CMD__M (0x07) +#define TITAN__PCHIP_AGPERROR__ADDR__S (15) +#define TITAN__PCHIP_AGPERROR__ADDR__M (0xffffffffUL) +#define TITAN__PCHIP_AGPERROR__LEN__S (53) +#define TITAN__PCHIP_AGPERROR__LEN__M (0x3f) + + if (!(agperror & TITAN__PCHIP_AGPERROR__ERRMASK)) + return MCHK_DISPOSITION_UNKNOWN_ERROR; + +#ifdef CONFIG_VERBOSE_MCHECK + if (!print) + return status; + + cmd = EXTRACT(agperror, TITAN__PCHIP_AGPERROR__CMD); + addr = EXTRACT(agperror, TITAN__PCHIP_AGPERROR__ADDR) << 3; + len = EXTRACT(agperror, TITAN__PCHIP_AGPERROR__LEN); + + printk("%s PChip %d AGPERROR: %016llx\n", err_print_prefix, + which, agperror); + if (agperror & TITAN__PCHIP_AGPERROR__NOWINDOW) + printk("%s No Window\n", err_print_prefix); + if (agperror & TITAN__PCHIP_AGPERROR__PTP) + printk("%s Peer-to-Peer set\n", err_print_prefix); + if (agperror & TITAN__PCHIP_AGPERROR__IPTE) + printk("%s Invalid PTE\n", err_print_prefix); + if (agperror & TITAN__PCHIP_AGPERROR__RESCMD) + printk("%s Reserved Command\n", err_print_prefix); + if (agperror & TITAN__PCHIP_AGPERROR__HPQFULL) + printk("%s HP Transaction Received while Queue Full\n", + err_print_prefix); + if (agperror & TITAN__PCHIP_AGPERROR__LPQFULL) + printk("%s LP Transaction Received while Queue Full\n", + err_print_prefix); + if (agperror & TITAN__PCHIP_AGPERROR__LOST) + printk("%s Lost Error\n", err_print_prefix); + printk("%s Command: 0x%x - %s, %d Quadwords%s\n" + " Address: 0x%lx\n", + err_print_prefix, cmd, agperror_cmd[cmd], len, + (agperror & TITAN__PCHIP_AGPERROR__FENCE) ? ", FENCE" : "", + addr); + if (agperror & TITAN__PCHIP_AGPERROR__DAC) + printk("%s Dual Address Cycle\n", err_print_prefix); + if (agperror & TITAN__PCHIP_AGPERROR__MWIN) + printk("%s Hit in Monster Window\n", err_print_prefix); +#endif /* CONFIG_VERBOSE_MCHECK */ + + return status; +} + +static int +titan_parse_p_chip(int which, u64 serror, u64 gperror, + u64 aperror, u64 agperror, int print) +{ + int status = MCHK_DISPOSITION_UNKNOWN_ERROR; + status |= titan_parse_p_serror(which, serror, print); + status |= titan_parse_p_perror(which, 0, gperror, print); + status |= titan_parse_p_perror(which, 1, aperror, print); + status |= titan_parse_p_agperror(which, agperror, print); + return status; +} + +int +titan_process_logout_frame(struct el_common *mchk_header, int print) +{ + struct el_TITAN_sysdata_mcheck *tmchk = + (struct el_TITAN_sysdata_mcheck *) + ((unsigned long)mchk_header + mchk_header->sys_offset); + int status = MCHK_DISPOSITION_UNKNOWN_ERROR; + + status |= titan_parse_c_misc(tmchk->c_misc, print); + status |= titan_parse_p_chip(0, tmchk->p0_serror, tmchk->p0_gperror, + tmchk->p0_aperror, tmchk->p0_agperror, + print); + status |= titan_parse_p_chip(1, tmchk->p1_serror, tmchk->p1_gperror, + tmchk->p1_aperror, tmchk->p1_agperror, + print); + + return status; +} + +void +titan_machine_check(unsigned long vector, unsigned long la_ptr) +{ + struct el_common *mchk_header = (struct el_common *)la_ptr; + struct el_TITAN_sysdata_mcheck *tmchk = + (struct el_TITAN_sysdata_mcheck *) + ((unsigned long)mchk_header + mchk_header->sys_offset); + u64 irqmask; + + /* + * Mask of Titan interrupt sources which are reported as machine checks + * + * 63 - CChip Error + * 62 - PChip 0 H_Error + * 61 - PChip 1 H_Error + * 60 - PChip 0 C_Error + * 59 - PChip 1 C_Error + */ +#define TITAN_MCHECK_INTERRUPT_MASK 0xF800000000000000UL + + /* + * Sync the processor + */ + mb(); + draina(); + + /* + * Only handle system errors here + */ + if ((vector != SCB_Q_SYSMCHK) && (vector != SCB_Q_SYSERR)) { + ev6_machine_check(vector, la_ptr); + return; + } + + /* + * It's a system error, handle it here + * + * The PALcode has already cleared the error, so just parse it + */ + + /* + * Parse the logout frame without printing first. If the only error(s) + * found are classified as "dismissable", then just dismiss them and + * don't print any message + */ + if (titan_process_logout_frame(mchk_header, 0) != + MCHK_DISPOSITION_DISMISS) { + char *saved_err_prefix = err_print_prefix; + err_print_prefix = KERN_CRIT; + + /* + * Either a nondismissable error was detected or no + * recognized error was detected in the logout frame + * -- report the error in either case + */ + printk("%s" + "*System %s Error (Vector 0x%x) reported on CPU %d:\n", + err_print_prefix, + (vector == SCB_Q_SYSERR)?"Correctable":"Uncorrectable", + (unsigned int)vector, (int)smp_processor_id()); + +#ifdef CONFIG_VERBOSE_MCHECK + titan_process_logout_frame(mchk_header, alpha_verbose_mcheck); + if (alpha_verbose_mcheck) + dik_show_regs(get_irq_regs(), NULL); +#endif /* CONFIG_VERBOSE_MCHECK */ + + err_print_prefix = saved_err_prefix; + + /* + * Convert any pending interrupts which report as system + * machine checks to interrupts + */ + irqmask = tmchk->c_dirx & TITAN_MCHECK_INTERRUPT_MASK; + titan_dispatch_irqs(irqmask); + } + + + /* + * Release the logout frame + */ + wrmces(0x7); + mb(); +} + +/* + * Subpacket Annotations + */ +static char *el_titan_pchip0_extended_annotation[] = { + "Subpacket Header", "P0_SCTL", "P0_SERREN", + "P0_APCTL", "P0_APERREN", "P0_AGPERREN", + "P0_ASPRST", "P0_AWSBA0", "P0_AWSBA1", + "P0_AWSBA2", "P0_AWSBA3", "P0_AWSM0", + "P0_AWSM1", "P0_AWSM2", "P0_AWSM3", + "P0_ATBA0", "P0_ATBA1", "P0_ATBA2", + "P0_ATBA3", "P0_GPCTL", "P0_GPERREN", + "P0_GSPRST", "P0_GWSBA0", "P0_GWSBA1", + "P0_GWSBA2", "P0_GWSBA3", "P0_GWSM0", + "P0_GWSM1", "P0_GWSM2", "P0_GWSM3", + "P0_GTBA0", "P0_GTBA1", "P0_GTBA2", + "P0_GTBA3", NULL +}; +static char *el_titan_pchip1_extended_annotation[] = { + "Subpacket Header", "P1_SCTL", "P1_SERREN", + "P1_APCTL", "P1_APERREN", "P1_AGPERREN", + "P1_ASPRST", "P1_AWSBA0", "P1_AWSBA1", + "P1_AWSBA2", "P1_AWSBA3", "P1_AWSM0", + "P1_AWSM1", "P1_AWSM2", "P1_AWSM3", + "P1_ATBA0", "P1_ATBA1", "P1_ATBA2", + "P1_ATBA3", "P1_GPCTL", "P1_GPERREN", + "P1_GSPRST", "P1_GWSBA0", "P1_GWSBA1", + "P1_GWSBA2", "P1_GWSBA3", "P1_GWSM0", + "P1_GWSM1", "P1_GWSM2", "P1_GWSM3", + "P1_GTBA0", "P1_GTBA1", "P1_GTBA2", + "P1_GTBA3", NULL +}; +static char *el_titan_memory_extended_annotation[] = { + "Subpacket Header", "AAR0", "AAR1", + "AAR2", "AAR3", "P0_SCTL", + "P0_GPCTL", "P0_APCTL", "P1_SCTL", + "P1_GPCTL", "P1_SCTL", NULL +}; + +static struct el_subpacket_annotation el_titan_annotations[] = { + SUBPACKET_ANNOTATION(EL_CLASS__REGATTA_FAMILY, + EL_TYPE__REGATTA__TITAN_PCHIP0_EXTENDED, + 1, + "Titan PChip 0 Extended Frame", + el_titan_pchip0_extended_annotation), + SUBPACKET_ANNOTATION(EL_CLASS__REGATTA_FAMILY, + EL_TYPE__REGATTA__TITAN_PCHIP1_EXTENDED, + 1, + "Titan PChip 1 Extended Frame", + el_titan_pchip1_extended_annotation), + SUBPACKET_ANNOTATION(EL_CLASS__REGATTA_FAMILY, + EL_TYPE__REGATTA__TITAN_MEMORY_EXTENDED, + 1, + "Titan Memory Extended Frame", + el_titan_memory_extended_annotation), + SUBPACKET_ANNOTATION(EL_CLASS__REGATTA_FAMILY, + EL_TYPE__TERMINATION__TERMINATION, + 1, + "Termination Subpacket", + NULL) +}; + +static struct el_subpacket * +el_process_regatta_subpacket(struct el_subpacket *header) +{ + if (header->class != EL_CLASS__REGATTA_FAMILY) { + printk("%s ** Unexpected header CLASS %d TYPE %d, aborting\n", + err_print_prefix, + header->class, header->type); + return NULL; + } + + switch(header->type) { + case EL_TYPE__REGATTA__PROCESSOR_ERROR_FRAME: + case EL_TYPE__REGATTA__SYSTEM_ERROR_FRAME: + case EL_TYPE__REGATTA__ENVIRONMENTAL_FRAME: + case EL_TYPE__REGATTA__PROCESSOR_DBL_ERROR_HALT: + case EL_TYPE__REGATTA__SYSTEM_DBL_ERROR_HALT: + printk("%s ** Occurred on CPU %d:\n", + err_print_prefix, + (int)header->by_type.regatta_frame.cpuid); + privateer_process_logout_frame((struct el_common *) + header->by_type.regatta_frame.data_start, 1); + break; + default: + printk("%s ** REGATTA TYPE %d SUBPACKET\n", + err_print_prefix, header->type); + el_annotate_subpacket(header); + break; + } + + + return (struct el_subpacket *)((unsigned long)header + header->length); +} + +static struct el_subpacket_handler titan_subpacket_handler = + SUBPACKET_HANDLER_INIT(EL_CLASS__REGATTA_FAMILY, + el_process_regatta_subpacket); + +void __init +titan_register_error_handlers(void) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE (el_titan_annotations); i++) + cdl_register_subpacket_annotation(&el_titan_annotations[i]); + + cdl_register_subpacket_handler(&titan_subpacket_handler); + + ev6_register_error_handlers(); +} + + +/* + * Privateer + */ + +static int +privateer_process_680_frame(struct el_common *mchk_header, int print) +{ + int status = MCHK_DISPOSITION_UNKNOWN_ERROR; +#ifdef CONFIG_VERBOSE_MCHECK + struct el_PRIVATEER_envdata_mcheck *emchk = + (struct el_PRIVATEER_envdata_mcheck *) + ((unsigned long)mchk_header + mchk_header->sys_offset); + + /* TODO - categorize errors, for now, no error */ + + if (!print) + return status; + + /* TODO - decode instead of just dumping... */ + printk("%s Summary Flags: %016llx\n" + " CChip DIRx: %016llx\n" + " System Management IR: %016llx\n" + " CPU IR: %016llx\n" + " Power Supply IR: %016llx\n" + " LM78 Fault Status: %016llx\n" + " System Doors: %016llx\n" + " Temperature Warning: %016llx\n" + " Fan Control: %016llx\n" + " Fatal Power Down Code: %016llx\n", + err_print_prefix, + emchk->summary, + emchk->c_dirx, + emchk->smir, + emchk->cpuir, + emchk->psir, + emchk->fault, + emchk->sys_doors, + emchk->temp_warn, + emchk->fan_ctrl, + emchk->code); +#endif /* CONFIG_VERBOSE_MCHECK */ + + return status; +} + +int +privateer_process_logout_frame(struct el_common *mchk_header, int print) +{ + struct el_common_EV6_mcheck *ev6mchk = + (struct el_common_EV6_mcheck *)mchk_header; + int status = MCHK_DISPOSITION_UNKNOWN_ERROR; + + /* + * Machine check codes + */ +#define PRIVATEER_MCHK__CORR_ECC 0x86 /* 630 */ +#define PRIVATEER_MCHK__DC_TAG_PERR 0x9E /* 630 */ +#define PRIVATEER_MCHK__PAL_BUGCHECK 0x8E /* 670 */ +#define PRIVATEER_MCHK__OS_BUGCHECK 0x90 /* 670 */ +#define PRIVATEER_MCHK__PROC_HRD_ERR 0x98 /* 670 */ +#define PRIVATEER_MCHK__ISTREAM_CMOV_PRX 0xA0 /* 670 */ +#define PRIVATEER_MCHK__ISTREAM_CMOV_FLT 0xA2 /* 670 */ +#define PRIVATEER_MCHK__SYS_HRD_ERR 0x202 /* 660 */ +#define PRIVATEER_MCHK__SYS_CORR_ERR 0x204 /* 620 */ +#define PRIVATEER_MCHK__SYS_ENVIRON 0x206 /* 680 */ + + switch(ev6mchk->MCHK_Code) { + /* + * Vector 630 - Processor, Correctable + */ + case PRIVATEER_MCHK__CORR_ECC: + case PRIVATEER_MCHK__DC_TAG_PERR: + /* + * Fall through to vector 670 for processing... + */ + /* + * Vector 670 - Processor, Uncorrectable + */ + case PRIVATEER_MCHK__PAL_BUGCHECK: + case PRIVATEER_MCHK__OS_BUGCHECK: + case PRIVATEER_MCHK__PROC_HRD_ERR: + case PRIVATEER_MCHK__ISTREAM_CMOV_PRX: + case PRIVATEER_MCHK__ISTREAM_CMOV_FLT: + status |= ev6_process_logout_frame(mchk_header, print); + break; + + /* + * Vector 620 - System, Correctable + */ + case PRIVATEER_MCHK__SYS_CORR_ERR: + /* + * Fall through to vector 660 for processing... + */ + /* + * Vector 660 - System, Uncorrectable + */ + case PRIVATEER_MCHK__SYS_HRD_ERR: + status |= titan_process_logout_frame(mchk_header, print); + break; + + /* + * Vector 680 - System, Environmental + */ + case PRIVATEER_MCHK__SYS_ENVIRON: /* System, Environmental */ + status |= privateer_process_680_frame(mchk_header, print); + break; + + /* + * Unknown + */ + default: + status |= MCHK_DISPOSITION_REPORT; + if (print) { + printk("%s** Unknown Error, frame follows\n", + err_print_prefix); + mchk_dump_logout_frame(mchk_header); + } + + } + + return status; +} + +void +privateer_machine_check(unsigned long vector, unsigned long la_ptr) +{ + struct el_common *mchk_header = (struct el_common *)la_ptr; + struct el_TITAN_sysdata_mcheck *tmchk = + (struct el_TITAN_sysdata_mcheck *) + (la_ptr + mchk_header->sys_offset); + u64 irqmask; + char *saved_err_prefix = err_print_prefix; + +#define PRIVATEER_680_INTERRUPT_MASK (0xE00UL) +#define PRIVATEER_HOTPLUG_INTERRUPT_MASK (0xE00UL) + + /* + * Sync the processor. + */ + mb(); + draina(); + + /* + * Only handle system events here. + */ + if (vector != SCB_Q_SYSEVENT) + return titan_machine_check(vector, la_ptr); + + /* + * Report the event - System Events should be reported even if no + * error is indicated since the event could indicate the return + * to normal status. + */ + err_print_prefix = KERN_CRIT; + printk("%s*System Event (Vector 0x%x) reported on CPU %d:\n", + err_print_prefix, + (unsigned int)vector, (int)smp_processor_id()); + privateer_process_680_frame(mchk_header, 1); + err_print_prefix = saved_err_prefix; + + /* + * Convert any pending interrupts which report as 680 machine + * checks to interrupts. + */ + irqmask = tmchk->c_dirx & PRIVATEER_680_INTERRUPT_MASK; + + /* + * Dispatch the interrupt(s). + */ + titan_dispatch_irqs(irqmask); + + /* + * Release the logout frame. + */ + wrmces(0x7); + mb(); +} diff --git a/arch/alpha/kernel/es1888.c b/arch/alpha/kernel/es1888.c new file mode 100644 index 00000000..d584c85f --- /dev/null +++ b/arch/alpha/kernel/es1888.c @@ -0,0 +1,49 @@ +/* + * linux/arch/alpha/kernel/es1888.c + * + * Init the built-in ES1888 sound chip (SB16 compatible) + */ + +#include <linux/init.h> +#include <asm/io.h> +#include "proto.h" + +void __init +es1888_init(void) +{ + /* Sequence of IO reads to init the audio controller */ + inb(0x0229); + inb(0x0229); + inb(0x0229); + inb(0x022b); + inb(0x0229); + inb(0x022b); + inb(0x0229); + inb(0x0229); + inb(0x022b); + inb(0x0229); + inb(0x0220); /* This sets the base address to 0x220 */ + + /* Sequence to set DMA channels */ + outb(0x01, 0x0226); /* reset */ + inb(0x0226); /* pause */ + outb(0x00, 0x0226); /* release reset */ + while (!(inb(0x022e) & 0x80)) /* wait for bit 7 to assert*/ + continue; + inb(0x022a); /* pause */ + outb(0xc6, 0x022c); /* enable extended mode */ + inb(0x022a); /* pause, also forces the write */ + while (inb(0x022c) & 0x80) /* wait for bit 7 to deassert */ + continue; + outb(0xb1, 0x022c); /* setup for write to Interrupt CR */ + while (inb(0x022c) & 0x80) /* wait for bit 7 to deassert */ + continue; + outb(0x14, 0x022c); /* set IRQ 5 */ + while (inb(0x022c) & 0x80) /* wait for bit 7 to deassert */ + continue; + outb(0xb2, 0x022c); /* setup for write to DMA CR */ + while (inb(0x022c) & 0x80) /* wait for bit 7 to deassert */ + continue; + outb(0x18, 0x022c); /* set DMA channel 1 */ + inb(0x022c); /* force the write */ +} diff --git a/arch/alpha/kernel/gct.c b/arch/alpha/kernel/gct.c new file mode 100644 index 00000000..c0c71554 --- /dev/null +++ b/arch/alpha/kernel/gct.c @@ -0,0 +1,47 @@ +/* + * linux/arch/alpha/kernel/gct.c + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/errno.h> + +#include <asm/hwrpb.h> +#include <asm/gct.h> + +int +gct6_find_nodes(gct6_node *node, gct6_search_struct *search) +{ + gct6_search_struct *wanted; + int status = 0; + + /* First check the magic number. */ + if (node->magic != GCT_NODE_MAGIC) { + printk(KERN_ERR "GCT Node MAGIC incorrect - GCT invalid\n"); + return -EINVAL; + } + + /* Check against the search struct. */ + for (wanted = search; + wanted && (wanted->type | wanted->subtype); + wanted++) { + if (node->type != wanted->type) + continue; + if (node->subtype != wanted->subtype) + continue; + + /* Found it -- call out. */ + if (wanted->callout) + wanted->callout(node); + } + + /* Now walk the tree, siblings first. */ + if (node->next) + status |= gct6_find_nodes(GCT_NODE_PTR(node->next), search); + + /* Then the children. */ + if (node->child) + status |= gct6_find_nodes(GCT_NODE_PTR(node->child), search); + + return status; +} diff --git a/arch/alpha/kernel/head.S b/arch/alpha/kernel/head.S new file mode 100644 index 00000000..c352499a --- /dev/null +++ b/arch/alpha/kernel/head.S @@ -0,0 +1,98 @@ +/* + * arch/alpha/kernel/head.S + * + * initial boot stuff.. At this point, the bootloader has already + * switched into OSF/1 PAL-code, and loaded us at the correct address + * (START_ADDR). So there isn't much left for us to do: just set up + * the kernel global pointer and jump to the kernel entry-point. + */ + +#include <linux/init.h> +#include <asm/asm-offsets.h> +#include <asm/pal.h> +#include <asm/setup.h> + +__HEAD +.globl _stext + .set noreorder + .globl __start + .ent __start +_stext: +__start: + .prologue 0 + br $27,1f +1: ldgp $29,0($27) + /* We need to get current_task_info loaded up... */ + lda $8,init_thread_union + /* ... and find our stack ... */ + lda $30,0x4000 - SIZEOF_PT_REGS($8) + /* ... and then we can start the kernel. */ + jsr $26,start_kernel + call_pal PAL_halt + .end __start + +#ifdef CONFIG_SMP + .align 3 + .globl __smp_callin + .ent __smp_callin + /* On entry here from SRM console, the HWPCB of the per-cpu + slot for this processor has been loaded. We've arranged + for the UNIQUE value for this process to contain the PCBB + of the target idle task. */ +__smp_callin: + .prologue 1 + ldgp $29,0($27) # First order of business, load the GP. + + call_pal PAL_rduniq # Grab the target PCBB. + mov $0,$16 # Install it. + call_pal PAL_swpctx + + lda $8,0x3fff # Find "current". + bic $30,$8,$8 + + jsr $26,smp_callin + call_pal PAL_halt + .end __smp_callin +#endif /* CONFIG_SMP */ + + # + # The following two functions are needed for supporting SRM PALcode + # on the PC164 (at least), since that PALcode manages the interrupt + # masking, and we cannot duplicate the effort without causing problems + # + + .align 3 + .globl cserve_ena + .ent cserve_ena +cserve_ena: + .prologue 0 + bis $16,$16,$17 + lda $16,52($31) + call_pal PAL_cserve + ret ($26) + .end cserve_ena + + .align 3 + .globl cserve_dis + .ent cserve_dis +cserve_dis: + .prologue 0 + bis $16,$16,$17 + lda $16,53($31) + call_pal PAL_cserve + ret ($26) + .end cserve_dis + + # + # It is handy, on occasion, to make halt actually just loop. + # Putting it here means we dont have to recompile the whole + # kernel. + # + + .align 3 + .globl halt + .ent halt +halt: + .prologue 0 + call_pal PAL_halt + .end halt diff --git a/arch/alpha/kernel/init_task.c b/arch/alpha/kernel/init_task.c new file mode 100644 index 00000000..6f80ca4f --- /dev/null +++ b/arch/alpha/kernel/init_task.c @@ -0,0 +1,17 @@ +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/init_task.h> +#include <linux/fs.h> +#include <linux/mqueue.h> +#include <asm/uaccess.h> + + +static struct signal_struct init_signals = INIT_SIGNALS(init_signals); +static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); +struct task_struct init_task = INIT_TASK(init_task); +EXPORT_SYMBOL(init_task); + +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; diff --git a/arch/alpha/kernel/io.c b/arch/alpha/kernel/io.c new file mode 100644 index 00000000..19c5875a --- /dev/null +++ b/arch/alpha/kernel/io.c @@ -0,0 +1,630 @@ +/* + * Alpha IO and memory functions. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/string.h> +#include <linux/module.h> +#include <asm/io.h> + +/* Out-of-line versions of the i/o routines that redirect into the + platform-specific version. Note that "platform-specific" may mean + "generic", which bumps through the machine vector. */ + +unsigned int +ioread8(void __iomem *addr) +{ + unsigned int ret = IO_CONCAT(__IO_PREFIX,ioread8)(addr); + mb(); + return ret; +} + +unsigned int ioread16(void __iomem *addr) +{ + unsigned int ret = IO_CONCAT(__IO_PREFIX,ioread16)(addr); + mb(); + return ret; +} + +unsigned int ioread32(void __iomem *addr) +{ + unsigned int ret = IO_CONCAT(__IO_PREFIX,ioread32)(addr); + mb(); + return ret; +} + +void iowrite8(u8 b, void __iomem *addr) +{ + IO_CONCAT(__IO_PREFIX,iowrite8)(b, addr); + mb(); +} + +void iowrite16(u16 b, void __iomem *addr) +{ + IO_CONCAT(__IO_PREFIX,iowrite16)(b, addr); + mb(); +} + +void iowrite32(u32 b, void __iomem *addr) +{ + IO_CONCAT(__IO_PREFIX,iowrite32)(b, addr); + mb(); +} + +EXPORT_SYMBOL(ioread8); +EXPORT_SYMBOL(ioread16); +EXPORT_SYMBOL(ioread32); +EXPORT_SYMBOL(iowrite8); +EXPORT_SYMBOL(iowrite16); +EXPORT_SYMBOL(iowrite32); + +u8 inb(unsigned long port) +{ + return ioread8(ioport_map(port, 1)); +} + +u16 inw(unsigned long port) +{ + return ioread16(ioport_map(port, 2)); +} + +u32 inl(unsigned long port) +{ + return ioread32(ioport_map(port, 4)); +} + +void outb(u8 b, unsigned long port) +{ + iowrite8(b, ioport_map(port, 1)); +} + +void outw(u16 b, unsigned long port) +{ + iowrite16(b, ioport_map(port, 2)); +} + +void outl(u32 b, unsigned long port) +{ + iowrite32(b, ioport_map(port, 4)); +} + +EXPORT_SYMBOL(inb); +EXPORT_SYMBOL(inw); +EXPORT_SYMBOL(inl); +EXPORT_SYMBOL(outb); +EXPORT_SYMBOL(outw); +EXPORT_SYMBOL(outl); + +u8 __raw_readb(const volatile void __iomem *addr) +{ + return IO_CONCAT(__IO_PREFIX,readb)(addr); +} + +u16 __raw_readw(const volatile void __iomem *addr) +{ + return IO_CONCAT(__IO_PREFIX,readw)(addr); +} + +u32 __raw_readl(const volatile void __iomem *addr) +{ + return IO_CONCAT(__IO_PREFIX,readl)(addr); +} + +u64 __raw_readq(const volatile void __iomem *addr) +{ + return IO_CONCAT(__IO_PREFIX,readq)(addr); +} + +void __raw_writeb(u8 b, volatile void __iomem *addr) +{ + IO_CONCAT(__IO_PREFIX,writeb)(b, addr); +} + +void __raw_writew(u16 b, volatile void __iomem *addr) +{ + IO_CONCAT(__IO_PREFIX,writew)(b, addr); +} + +void __raw_writel(u32 b, volatile void __iomem *addr) +{ + IO_CONCAT(__IO_PREFIX,writel)(b, addr); +} + +void __raw_writeq(u64 b, volatile void __iomem *addr) +{ + IO_CONCAT(__IO_PREFIX,writeq)(b, addr); +} + +EXPORT_SYMBOL(__raw_readb); +EXPORT_SYMBOL(__raw_readw); +EXPORT_SYMBOL(__raw_readl); +EXPORT_SYMBOL(__raw_readq); +EXPORT_SYMBOL(__raw_writeb); +EXPORT_SYMBOL(__raw_writew); +EXPORT_SYMBOL(__raw_writel); +EXPORT_SYMBOL(__raw_writeq); + +u8 readb(const volatile void __iomem *addr) +{ + u8 ret = __raw_readb(addr); + mb(); + return ret; +} + +u16 readw(const volatile void __iomem *addr) +{ + u16 ret = __raw_readw(addr); + mb(); + return ret; +} + +u32 readl(const volatile void __iomem *addr) +{ + u32 ret = __raw_readl(addr); + mb(); + return ret; +} + +u64 readq(const volatile void __iomem *addr) +{ + u64 ret = __raw_readq(addr); + mb(); + return ret; +} + +void writeb(u8 b, volatile void __iomem *addr) +{ + __raw_writeb(b, addr); + mb(); +} + +void writew(u16 b, volatile void __iomem *addr) +{ + __raw_writew(b, addr); + mb(); +} + +void writel(u32 b, volatile void __iomem *addr) +{ + __raw_writel(b, addr); + mb(); +} + +void writeq(u64 b, volatile void __iomem *addr) +{ + __raw_writeq(b, addr); + mb(); +} + +EXPORT_SYMBOL(readb); +EXPORT_SYMBOL(readw); +EXPORT_SYMBOL(readl); +EXPORT_SYMBOL(readq); +EXPORT_SYMBOL(writeb); +EXPORT_SYMBOL(writew); +EXPORT_SYMBOL(writel); +EXPORT_SYMBOL(writeq); + + +/* + * Read COUNT 8-bit bytes from port PORT into memory starting at SRC. + */ +void ioread8_rep(void __iomem *port, void *dst, unsigned long count) +{ + while ((unsigned long)dst & 0x3) { + if (!count) + return; + count--; + *(unsigned char *)dst = ioread8(port); + dst += 1; + } + + while (count >= 4) { + unsigned int w; + count -= 4; + w = ioread8(port); + w |= ioread8(port) << 8; + w |= ioread8(port) << 16; + w |= ioread8(port) << 24; + *(unsigned int *)dst = w; + dst += 4; + } + + while (count) { + --count; + *(unsigned char *)dst = ioread8(port); + dst += 1; + } +} + +void insb(unsigned long port, void *dst, unsigned long count) +{ + ioread8_rep(ioport_map(port, 1), dst, count); +} + +EXPORT_SYMBOL(ioread8_rep); +EXPORT_SYMBOL(insb); + +/* + * Read COUNT 16-bit words from port PORT into memory starting at + * SRC. SRC must be at least short aligned. This is used by the + * IDE driver to read disk sectors. Performance is important, but + * the interfaces seems to be slow: just using the inlined version + * of the inw() breaks things. + */ +void ioread16_rep(void __iomem *port, void *dst, unsigned long count) +{ + if (unlikely((unsigned long)dst & 0x3)) { + if (!count) + return; + BUG_ON((unsigned long)dst & 0x1); + count--; + *(unsigned short *)dst = ioread16(port); + dst += 2; + } + + while (count >= 2) { + unsigned int w; + count -= 2; + w = ioread16(port); + w |= ioread16(port) << 16; + *(unsigned int *)dst = w; + dst += 4; + } + + if (count) { + *(unsigned short*)dst = ioread16(port); + } +} + +void insw(unsigned long port, void *dst, unsigned long count) +{ + ioread16_rep(ioport_map(port, 2), dst, count); +} + +EXPORT_SYMBOL(ioread16_rep); +EXPORT_SYMBOL(insw); + + +/* + * Read COUNT 32-bit words from port PORT into memory starting at + * SRC. Now works with any alignment in SRC. Performance is important, + * but the interfaces seems to be slow: just using the inlined version + * of the inl() breaks things. + */ +void ioread32_rep(void __iomem *port, void *dst, unsigned long count) +{ + if (unlikely((unsigned long)dst & 0x3)) { + while (count--) { + struct S { int x __attribute__((packed)); }; + ((struct S *)dst)->x = ioread32(port); + dst += 4; + } + } else { + /* Buffer 32-bit aligned. */ + while (count--) { + *(unsigned int *)dst = ioread32(port); + dst += 4; + } + } +} + +void insl(unsigned long port, void *dst, unsigned long count) +{ + ioread32_rep(ioport_map(port, 4), dst, count); +} + +EXPORT_SYMBOL(ioread32_rep); +EXPORT_SYMBOL(insl); + + +/* + * Like insb but in the opposite direction. + * Don't worry as much about doing aligned memory transfers: + * doing byte reads the "slow" way isn't nearly as slow as + * doing byte writes the slow way (no r-m-w cycle). + */ +void iowrite8_rep(void __iomem *port, const void *xsrc, unsigned long count) +{ + const unsigned char *src = xsrc; + while (count--) + iowrite8(*src++, port); +} + +void outsb(unsigned long port, const void *src, unsigned long count) +{ + iowrite8_rep(ioport_map(port, 1), src, count); +} + +EXPORT_SYMBOL(iowrite8_rep); +EXPORT_SYMBOL(outsb); + + +/* + * Like insw but in the opposite direction. This is used by the IDE + * driver to write disk sectors. Performance is important, but the + * interfaces seems to be slow: just using the inlined version of the + * outw() breaks things. + */ +void iowrite16_rep(void __iomem *port, const void *src, unsigned long count) +{ + if (unlikely((unsigned long)src & 0x3)) { + if (!count) + return; + BUG_ON((unsigned long)src & 0x1); + iowrite16(*(unsigned short *)src, port); + src += 2; + --count; + } + + while (count >= 2) { + unsigned int w; + count -= 2; + w = *(unsigned int *)src; + src += 4; + iowrite16(w >> 0, port); + iowrite16(w >> 16, port); + } + + if (count) { + iowrite16(*(unsigned short *)src, port); + } +} + +void outsw(unsigned long port, const void *src, unsigned long count) +{ + iowrite16_rep(ioport_map(port, 2), src, count); +} + +EXPORT_SYMBOL(iowrite16_rep); +EXPORT_SYMBOL(outsw); + + +/* + * Like insl but in the opposite direction. This is used by the IDE + * driver to write disk sectors. Works with any alignment in SRC. + * Performance is important, but the interfaces seems to be slow: + * just using the inlined version of the outl() breaks things. + */ +void iowrite32_rep(void __iomem *port, const void *src, unsigned long count) +{ + if (unlikely((unsigned long)src & 0x3)) { + while (count--) { + struct S { int x __attribute__((packed)); }; + iowrite32(((struct S *)src)->x, port); + src += 4; + } + } else { + /* Buffer 32-bit aligned. */ + while (count--) { + iowrite32(*(unsigned int *)src, port); + src += 4; + } + } +} + +void outsl(unsigned long port, const void *src, unsigned long count) +{ + iowrite32_rep(ioport_map(port, 4), src, count); +} + +EXPORT_SYMBOL(iowrite32_rep); +EXPORT_SYMBOL(outsl); + + +/* + * Copy data from IO memory space to "real" memory space. + * This needs to be optimized. + */ +void memcpy_fromio(void *to, const volatile void __iomem *from, long count) +{ + /* Optimize co-aligned transfers. Everything else gets handled + a byte at a time. */ + + if (count >= 8 && ((u64)to & 7) == ((u64)from & 7)) { + count -= 8; + do { + *(u64 *)to = __raw_readq(from); + count -= 8; + to += 8; + from += 8; + } while (count >= 0); + count += 8; + } + + if (count >= 4 && ((u64)to & 3) == ((u64)from & 3)) { + count -= 4; + do { + *(u32 *)to = __raw_readl(from); + count -= 4; + to += 4; + from += 4; + } while (count >= 0); + count += 4; + } + + if (count >= 2 && ((u64)to & 1) == ((u64)from & 1)) { + count -= 2; + do { + *(u16 *)to = __raw_readw(from); + count -= 2; + to += 2; + from += 2; + } while (count >= 0); + count += 2; + } + + while (count > 0) { + *(u8 *) to = __raw_readb(from); + count--; + to++; + from++; + } + mb(); +} + +EXPORT_SYMBOL(memcpy_fromio); + + +/* + * Copy data from "real" memory space to IO memory space. + * This needs to be optimized. + */ +void memcpy_toio(volatile void __iomem *to, const void *from, long count) +{ + /* Optimize co-aligned transfers. Everything else gets handled + a byte at a time. */ + /* FIXME -- align FROM. */ + + if (count >= 8 && ((u64)to & 7) == ((u64)from & 7)) { + count -= 8; + do { + __raw_writeq(*(const u64 *)from, to); + count -= 8; + to += 8; + from += 8; + } while (count >= 0); + count += 8; + } + + if (count >= 4 && ((u64)to & 3) == ((u64)from & 3)) { + count -= 4; + do { + __raw_writel(*(const u32 *)from, to); + count -= 4; + to += 4; + from += 4; + } while (count >= 0); + count += 4; + } + + if (count >= 2 && ((u64)to & 1) == ((u64)from & 1)) { + count -= 2; + do { + __raw_writew(*(const u16 *)from, to); + count -= 2; + to += 2; + from += 2; + } while (count >= 0); + count += 2; + } + + while (count > 0) { + __raw_writeb(*(const u8 *) from, to); + count--; + to++; + from++; + } + mb(); +} + +EXPORT_SYMBOL(memcpy_toio); + + +/* + * "memset" on IO memory space. + */ +void _memset_c_io(volatile void __iomem *to, unsigned long c, long count) +{ + /* Handle any initial odd byte */ + if (count > 0 && ((u64)to & 1)) { + __raw_writeb(c, to); + to++; + count--; + } + + /* Handle any initial odd halfword */ + if (count >= 2 && ((u64)to & 2)) { + __raw_writew(c, to); + to += 2; + count -= 2; + } + + /* Handle any initial odd word */ + if (count >= 4 && ((u64)to & 4)) { + __raw_writel(c, to); + to += 4; + count -= 4; + } + + /* Handle all full-sized quadwords: we're aligned + (or have a small count) */ + count -= 8; + if (count >= 0) { + do { + __raw_writeq(c, to); + to += 8; + count -= 8; + } while (count >= 0); + } + count += 8; + + /* The tail is word-aligned if we still have count >= 4 */ + if (count >= 4) { + __raw_writel(c, to); + to += 4; + count -= 4; + } + + /* The tail is half-word aligned if we have count >= 2 */ + if (count >= 2) { + __raw_writew(c, to); + to += 2; + count -= 2; + } + + /* And finally, one last byte.. */ + if (count) { + __raw_writeb(c, to); + } + mb(); +} + +EXPORT_SYMBOL(_memset_c_io); + +/* A version of memcpy used by the vga console routines to move data around + arbitrarily between screen and main memory. */ + +void +scr_memcpyw(u16 *d, const u16 *s, unsigned int count) +{ + const u16 __iomem *ios = (const u16 __iomem *) s; + u16 __iomem *iod = (u16 __iomem *) d; + int s_isio = __is_ioaddr(s); + int d_isio = __is_ioaddr(d); + + if (s_isio) { + if (d_isio) { + /* FIXME: Should handle unaligned ops and + operation widening. */ + + count /= 2; + while (count--) { + u16 tmp = __raw_readw(ios++); + __raw_writew(tmp, iod++); + } + } + else + memcpy_fromio(d, ios, count); + } else { + if (d_isio) + memcpy_toio(iod, s, count); + else + memcpy(d, s, count); + } +} + +EXPORT_SYMBOL(scr_memcpyw); + +void __iomem *ioport_map(unsigned long port, unsigned int size) +{ + return IO_CONCAT(__IO_PREFIX,ioportmap) (port); +} + +void ioport_unmap(void __iomem *addr) +{ +} + +EXPORT_SYMBOL(ioport_map); +EXPORT_SYMBOL(ioport_unmap); diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c new file mode 100644 index 00000000..2872accd --- /dev/null +++ b/arch/alpha/kernel/irq.c @@ -0,0 +1,130 @@ +/* + * linux/arch/alpha/kernel/irq.c + * + * Copyright (C) 1995 Linus Torvalds + * + * This file contains the code used by various IRQ handling routines: + * asking for different IRQ's should be done through these routines + * instead of just grabbing them. Thus setups with different IRQ numbers + * shouldn't result in any weird surprises, and installing new handlers + * should be easier. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/kernel_stat.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/ptrace.h> +#include <linux/interrupt.h> +#include <linux/random.h> +#include <linux/init.h> +#include <linux/irq.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/profile.h> +#include <linux/bitops.h> + +#include <asm/io.h> +#include <asm/uaccess.h> + +volatile unsigned long irq_err_count; +DEFINE_PER_CPU(unsigned long, irq_pmi_count); + +void ack_bad_irq(unsigned int irq) +{ + irq_err_count++; + printk(KERN_CRIT "Unexpected IRQ trap at vector %u\n", irq); +} + +#ifdef CONFIG_SMP +static char irq_user_affinity[NR_IRQS]; + +int irq_select_affinity(unsigned int irq) +{ + struct irq_data *data = irq_get_irq_data(irq); + struct irq_chip *chip; + static int last_cpu; + int cpu = last_cpu + 1; + + if (!data) + return 1; + chip = irq_data_get_irq_chip(data); + + if (!chip->irq_set_affinity || irq_user_affinity[irq]) + return 1; + + while (!cpu_possible(cpu) || + !cpumask_test_cpu(cpu, irq_default_affinity)) + cpu = (cpu < (NR_CPUS-1) ? cpu + 1 : 0); + last_cpu = cpu; + + cpumask_copy(data->affinity, cpumask_of(cpu)); + chip->irq_set_affinity(data, cpumask_of(cpu), false); + return 0; +} +#endif /* CONFIG_SMP */ + +int arch_show_interrupts(struct seq_file *p, int prec) +{ + int j; + +#ifdef CONFIG_SMP + seq_puts(p, "IPI: "); + for_each_online_cpu(j) + seq_printf(p, "%10lu ", cpu_data[j].ipi_count); + seq_putc(p, '\n'); +#endif + seq_puts(p, "PMI: "); + for_each_online_cpu(j) + seq_printf(p, "%10lu ", per_cpu(irq_pmi_count, j)); + seq_puts(p, " Performance Monitoring\n"); + seq_printf(p, "ERR: %10lu\n", irq_err_count); + return 0; +} + +/* + * handle_irq handles all normal device IRQ's (the special + * SMP cross-CPU interrupts have their own specific + * handlers). + */ + +#define MAX_ILLEGAL_IRQS 16 + +void +handle_irq(int irq) +{ + /* + * We ack quickly, we don't want the irq controller + * thinking we're snobs just because some other CPU has + * disabled global interrupts (we have already done the + * INT_ACK cycles, it's too late to try to pretend to the + * controller that we aren't taking the interrupt). + * + * 0 return value means that this irq is already being + * handled by some other CPU. (or is disabled) + */ + static unsigned int illegal_count=0; + struct irq_desc *desc = irq_to_desc(irq); + + if (!desc || ((unsigned) irq > ACTUAL_NR_IRQS && + illegal_count < MAX_ILLEGAL_IRQS)) { + irq_err_count++; + illegal_count++; + printk(KERN_CRIT "device_interrupt: invalid interrupt %d\n", + irq); + return; + } + + /* + * From here we must proceed with IPL_MAX. Note that we do not + * explicitly enable interrupts afterwards - some MILO PALcode + * (namely LX164 one) seems to have severe problems with RTI + * at IPL 0. + */ + local_irq_disable(); + irq_enter(); + generic_handle_irq_desc(irq, desc); + irq_exit(); +} diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c new file mode 100644 index 00000000..772ddfdb --- /dev/null +++ b/arch/alpha/kernel/irq_alpha.c @@ -0,0 +1,251 @@ +/* + * Alpha specific irq code. + */ + +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/irq.h> +#include <linux/kernel_stat.h> +#include <linux/module.h> + +#include <asm/machvec.h> +#include <asm/dma.h> +#include <asm/perf_event.h> +#include <asm/mce.h> + +#include "proto.h" +#include "irq_impl.h" + +/* Hack minimum IPL during interrupt processing for broken hardware. */ +#ifdef CONFIG_ALPHA_BROKEN_IRQ_MASK +int __min_ipl; +EXPORT_SYMBOL(__min_ipl); +#endif + +/* + * Performance counter hook. A module can override this to + * do something useful. + */ +static void +dummy_perf(unsigned long vector, struct pt_regs *regs) +{ + irq_err_count++; + printk(KERN_CRIT "Performance counter interrupt!\n"); +} + +void (*perf_irq)(unsigned long, struct pt_regs *) = dummy_perf; +EXPORT_SYMBOL(perf_irq); + +/* + * The main interrupt entry point. + */ + +asmlinkage void +do_entInt(unsigned long type, unsigned long vector, + unsigned long la_ptr, struct pt_regs *regs) +{ + struct pt_regs *old_regs; + switch (type) { + case 0: +#ifdef CONFIG_SMP + handle_ipi(regs); + return; +#else + irq_err_count++; + printk(KERN_CRIT "Interprocessor interrupt? " + "You must be kidding!\n"); +#endif + break; + case 1: + old_regs = set_irq_regs(regs); +#ifdef CONFIG_SMP + { + long cpu; + + local_irq_disable(); + smp_percpu_timer_interrupt(regs); + cpu = smp_processor_id(); + if (cpu != boot_cpuid) { + kstat_incr_irqs_this_cpu(RTC_IRQ, irq_to_desc(RTC_IRQ)); + } else { + handle_irq(RTC_IRQ); + } + } +#else + handle_irq(RTC_IRQ); +#endif + set_irq_regs(old_regs); + return; + case 2: + old_regs = set_irq_regs(regs); + alpha_mv.machine_check(vector, la_ptr); + set_irq_regs(old_regs); + return; + case 3: + old_regs = set_irq_regs(regs); + alpha_mv.device_interrupt(vector); + set_irq_regs(old_regs); + return; + case 4: + perf_irq(la_ptr, regs); + return; + default: + printk(KERN_CRIT "Hardware intr %ld %lx? Huh?\n", + type, vector); + } + printk(KERN_CRIT "PC = %016lx PS=%04lx\n", regs->pc, regs->ps); +} + +void __init +common_init_isa_dma(void) +{ + outb(0, DMA1_RESET_REG); + outb(0, DMA2_RESET_REG); + outb(0, DMA1_CLR_MASK_REG); + outb(0, DMA2_CLR_MASK_REG); +} + +void __init +init_IRQ(void) +{ + /* Just in case the platform init_irq() causes interrupts/mchecks + (as is the case with RAWHIDE, at least). */ + wrent(entInt, 0); + + alpha_mv.init_irq(); +} + +/* + * machine error checks + */ +#define MCHK_K_TPERR 0x0080 +#define MCHK_K_TCPERR 0x0082 +#define MCHK_K_HERR 0x0084 +#define MCHK_K_ECC_C 0x0086 +#define MCHK_K_ECC_NC 0x0088 +#define MCHK_K_OS_BUGCHECK 0x008A +#define MCHK_K_PAL_BUGCHECK 0x0090 + +#ifndef CONFIG_SMP +struct mcheck_info __mcheck_info; +#endif + +void +process_mcheck_info(unsigned long vector, unsigned long la_ptr, + const char *machine, int expected) +{ + struct el_common *mchk_header; + const char *reason; + + /* + * See if the machine check is due to a badaddr() and if so, + * ignore it. + */ + +#ifdef CONFIG_VERBOSE_MCHECK + if (alpha_verbose_mcheck > 1) { + printk(KERN_CRIT "%s machine check %s\n", machine, + expected ? "expected." : "NOT expected!!!"); + } +#endif + + if (expected) { + int cpu = smp_processor_id(); + mcheck_expected(cpu) = 0; + mcheck_taken(cpu) = 1; + return; + } + + mchk_header = (struct el_common *)la_ptr; + + printk(KERN_CRIT "%s machine check: vector=0x%lx pc=0x%lx code=0x%x\n", + machine, vector, get_irq_regs()->pc, mchk_header->code); + + switch (mchk_header->code) { + /* Machine check reasons. Defined according to PALcode sources. */ + case 0x80: reason = "tag parity error"; break; + case 0x82: reason = "tag control parity error"; break; + case 0x84: reason = "generic hard error"; break; + case 0x86: reason = "correctable ECC error"; break; + case 0x88: reason = "uncorrectable ECC error"; break; + case 0x8A: reason = "OS-specific PAL bugcheck"; break; + case 0x90: reason = "callsys in kernel mode"; break; + case 0x96: reason = "i-cache read retryable error"; break; + case 0x98: reason = "processor detected hard error"; break; + + /* System specific (these are for Alcor, at least): */ + case 0x202: reason = "system detected hard error"; break; + case 0x203: reason = "system detected uncorrectable ECC error"; break; + case 0x204: reason = "SIO SERR occurred on PCI bus"; break; + case 0x205: reason = "parity error detected by core logic"; break; + case 0x206: reason = "SIO IOCHK occurred on ISA bus"; break; + case 0x207: reason = "non-existent memory error"; break; + case 0x208: reason = "MCHK_K_DCSR"; break; + case 0x209: reason = "PCI SERR detected"; break; + case 0x20b: reason = "PCI data parity error detected"; break; + case 0x20d: reason = "PCI address parity error detected"; break; + case 0x20f: reason = "PCI master abort error"; break; + case 0x211: reason = "PCI target abort error"; break; + case 0x213: reason = "scatter/gather PTE invalid error"; break; + case 0x215: reason = "flash ROM write error"; break; + case 0x217: reason = "IOA timeout detected"; break; + case 0x219: reason = "IOCHK#, EISA add-in board parity or other catastrophic error"; break; + case 0x21b: reason = "EISA fail-safe timer timeout"; break; + case 0x21d: reason = "EISA bus time-out"; break; + case 0x21f: reason = "EISA software generated NMI"; break; + case 0x221: reason = "unexpected ev5 IRQ[3] interrupt"; break; + default: reason = "unknown"; break; + } + + printk(KERN_CRIT "machine check type: %s%s\n", + reason, mchk_header->retry ? " (retryable)" : ""); + + dik_show_regs(get_irq_regs(), NULL); + +#ifdef CONFIG_VERBOSE_MCHECK + if (alpha_verbose_mcheck > 1) { + /* Dump the logout area to give all info. */ + unsigned long *ptr = (unsigned long *)la_ptr; + long i; + for (i = 0; i < mchk_header->size / sizeof(long); i += 2) { + printk(KERN_CRIT " +%8lx %016lx %016lx\n", + i*sizeof(long), ptr[i], ptr[i+1]); + } + } +#endif /* CONFIG_VERBOSE_MCHECK */ +} + +/* + * The special RTC interrupt type. The interrupt itself was + * processed by PALcode, and comes in via entInt vector 1. + */ + +struct irqaction timer_irqaction = { + .handler = timer_interrupt, + .flags = IRQF_DISABLED, + .name = "timer", +}; + +void __init +init_rtc_irq(void) +{ + irq_set_chip_and_handler_name(RTC_IRQ, &dummy_irq_chip, + handle_simple_irq, "RTC"); + setup_irq(RTC_IRQ, &timer_irqaction); +} + +/* Dummy irqactions. */ +struct irqaction isa_cascade_irqaction = { + .handler = no_action, + .name = "isa-cascade" +}; + +struct irqaction timer_cascade_irqaction = { + .handler = no_action, + .name = "timer-cascade" +}; + +struct irqaction halt_switch_irqaction = { + .handler = no_action, + .name = "halt-switch" +}; diff --git a/arch/alpha/kernel/irq_i8259.c b/arch/alpha/kernel/irq_i8259.c new file mode 100644 index 00000000..e1861c77 --- /dev/null +++ b/arch/alpha/kernel/irq_i8259.c @@ -0,0 +1,166 @@ +/* + * linux/arch/alpha/kernel/irq_i8259.c + * + * This is the 'legacy' 8259A Programmable Interrupt Controller, + * present in the majority of PC/AT boxes. + * + * Started hacking from linux-2.3.30pre6/arch/i386/kernel/i8259.c. + */ + +#include <linux/init.h> +#include <linux/cache.h> +#include <linux/sched.h> +#include <linux/irq.h> +#include <linux/interrupt.h> + +#include <asm/io.h> + +#include "proto.h" +#include "irq_impl.h" + + +/* Note mask bit is true for DISABLED irqs. */ +static unsigned int cached_irq_mask = 0xffff; +static DEFINE_SPINLOCK(i8259_irq_lock); + +static inline void +i8259_update_irq_hw(unsigned int irq, unsigned long mask) +{ + int port = 0x21; + if (irq & 8) mask >>= 8; + if (irq & 8) port = 0xA1; + outb(mask, port); +} + +inline void +i8259a_enable_irq(struct irq_data *d) +{ + spin_lock(&i8259_irq_lock); + i8259_update_irq_hw(d->irq, cached_irq_mask &= ~(1 << d->irq)); + spin_unlock(&i8259_irq_lock); +} + +static inline void +__i8259a_disable_irq(unsigned int irq) +{ + i8259_update_irq_hw(irq, cached_irq_mask |= 1 << irq); +} + +void +i8259a_disable_irq(struct irq_data *d) +{ + spin_lock(&i8259_irq_lock); + __i8259a_disable_irq(d->irq); + spin_unlock(&i8259_irq_lock); +} + +void +i8259a_mask_and_ack_irq(struct irq_data *d) +{ + unsigned int irq = d->irq; + + spin_lock(&i8259_irq_lock); + __i8259a_disable_irq(irq); + + /* Ack the interrupt making it the lowest priority. */ + if (irq >= 8) { + outb(0xE0 | (irq - 8), 0xa0); /* ack the slave */ + irq = 2; + } + outb(0xE0 | irq, 0x20); /* ack the master */ + spin_unlock(&i8259_irq_lock); +} + +struct irq_chip i8259a_irq_type = { + .name = "XT-PIC", + .irq_unmask = i8259a_enable_irq, + .irq_mask = i8259a_disable_irq, + .irq_mask_ack = i8259a_mask_and_ack_irq, +}; + +void __init +init_i8259a_irqs(void) +{ + static struct irqaction cascade = { + .handler = no_action, + .name = "cascade", + }; + + long i; + + outb(0xff, 0x21); /* mask all of 8259A-1 */ + outb(0xff, 0xA1); /* mask all of 8259A-2 */ + + for (i = 0; i < 16; i++) { + irq_set_chip_and_handler(i, &i8259a_irq_type, handle_level_irq); + } + + setup_irq(2, &cascade); +} + + +#if defined(CONFIG_ALPHA_GENERIC) +# define IACK_SC alpha_mv.iack_sc +#elif defined(CONFIG_ALPHA_APECS) +# define IACK_SC APECS_IACK_SC +#elif defined(CONFIG_ALPHA_LCA) +# define IACK_SC LCA_IACK_SC +#elif defined(CONFIG_ALPHA_CIA) +# define IACK_SC CIA_IACK_SC +#elif defined(CONFIG_ALPHA_PYXIS) +# define IACK_SC PYXIS_IACK_SC +#elif defined(CONFIG_ALPHA_TITAN) +# define IACK_SC TITAN_IACK_SC +#elif defined(CONFIG_ALPHA_TSUNAMI) +# define IACK_SC TSUNAMI_IACK_SC +#elif defined(CONFIG_ALPHA_IRONGATE) +# define IACK_SC IRONGATE_IACK_SC +#endif +/* Note that CONFIG_ALPHA_POLARIS is intentionally left out here, since + sys_rx164 wants to use isa_no_iack_sc_device_interrupt for some reason. */ + +#if defined(IACK_SC) +void +isa_device_interrupt(unsigned long vector) +{ + /* + * Generate a PCI interrupt acknowledge cycle. The PIC will + * respond with the interrupt vector of the highest priority + * interrupt that is pending. The PALcode sets up the + * interrupts vectors such that irq level L generates vector L. + */ + int j = *(vuip) IACK_SC; + j &= 0xff; + handle_irq(j); +} +#endif + +#if defined(CONFIG_ALPHA_GENERIC) || !defined(IACK_SC) +void +isa_no_iack_sc_device_interrupt(unsigned long vector) +{ + unsigned long pic; + + /* + * It seems to me that the probability of two or more *device* + * interrupts occurring at almost exactly the same time is + * pretty low. So why pay the price of checking for + * additional interrupts here if the common case can be + * handled so much easier? + */ + /* + * The first read of gives you *all* interrupting lines. + * Therefore, read the mask register and and out those lines + * not enabled. Note that some documentation has 21 and a1 + * write only. This is not true. + */ + pic = inb(0x20) | (inb(0xA0) << 8); /* read isr */ + pic &= 0xFFFB; /* mask out cascade & hibits */ + + while (pic) { + int j = ffz(~pic); + pic &= pic - 1; + handle_irq(j); + } +} +#endif diff --git a/arch/alpha/kernel/irq_impl.h b/arch/alpha/kernel/irq_impl.h new file mode 100644 index 00000000..d507a234 --- /dev/null +++ b/arch/alpha/kernel/irq_impl.h @@ -0,0 +1,40 @@ +/* + * linux/arch/alpha/kernel/irq_impl.h + * + * Copyright (C) 1995 Linus Torvalds + * Copyright (C) 1998, 2000 Richard Henderson + * + * This file contains declarations and inline functions for interfacing + * with the IRQ handling routines in irq.c. + */ + +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/profile.h> + + +#define RTC_IRQ 8 + +extern void isa_device_interrupt(unsigned long); +extern void isa_no_iack_sc_device_interrupt(unsigned long); +extern void srm_device_interrupt(unsigned long); +extern void pyxis_device_interrupt(unsigned long); + +extern struct irqaction timer_irqaction; +extern struct irqaction isa_cascade_irqaction; +extern struct irqaction timer_cascade_irqaction; +extern struct irqaction halt_switch_irqaction; + +extern void init_srm_irqs(long, unsigned long); +extern void init_pyxis_irqs(unsigned long); +extern void init_rtc_irq(void); + +extern void common_init_isa_dma(void); + +extern void i8259a_enable_irq(struct irq_data *d); +extern void i8259a_disable_irq(struct irq_data *d); +extern void i8259a_mask_and_ack_irq(struct irq_data *d); +extern struct irq_chip i8259a_irq_type; +extern void init_i8259a_irqs(void); + +extern void handle_irq(int irq); diff --git a/arch/alpha/kernel/irq_pyxis.c b/arch/alpha/kernel/irq_pyxis.c new file mode 100644 index 00000000..13c97a5b --- /dev/null +++ b/arch/alpha/kernel/irq_pyxis.c @@ -0,0 +1,110 @@ +/* + * linux/arch/alpha/kernel/irq_pyxis.c + * + * Based on code written by David A Rusling (david.rusling@reo.mts.dec.com). + * + * IRQ Code common to all PYXIS core logic chips. + */ + +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/irq.h> + +#include <asm/io.h> +#include <asm/core_cia.h> + +#include "proto.h" +#include "irq_impl.h" + + +/* Note mask bit is true for ENABLED irqs. */ +static unsigned long cached_irq_mask; + +static inline void +pyxis_update_irq_hw(unsigned long mask) +{ + *(vulp)PYXIS_INT_MASK = mask; + mb(); + *(vulp)PYXIS_INT_MASK; +} + +static inline void +pyxis_enable_irq(struct irq_data *d) +{ + pyxis_update_irq_hw(cached_irq_mask |= 1UL << (d->irq - 16)); +} + +static void +pyxis_disable_irq(struct irq_data *d) +{ + pyxis_update_irq_hw(cached_irq_mask &= ~(1UL << (d->irq - 16))); +} + +static void +pyxis_mask_and_ack_irq(struct irq_data *d) +{ + unsigned long bit = 1UL << (d->irq - 16); + unsigned long mask = cached_irq_mask &= ~bit; + + /* Disable the interrupt. */ + *(vulp)PYXIS_INT_MASK = mask; + wmb(); + /* Ack PYXIS PCI interrupt. */ + *(vulp)PYXIS_INT_REQ = bit; + mb(); + /* Re-read to force both writes. */ + *(vulp)PYXIS_INT_MASK; +} + +static struct irq_chip pyxis_irq_type = { + .name = "PYXIS", + .irq_mask_ack = pyxis_mask_and_ack_irq, + .irq_mask = pyxis_disable_irq, + .irq_unmask = pyxis_enable_irq, +}; + +void +pyxis_device_interrupt(unsigned long vector) +{ + unsigned long pld; + unsigned int i; + + /* Read the interrupt summary register of PYXIS */ + pld = *(vulp)PYXIS_INT_REQ; + pld &= cached_irq_mask; + + /* + * Now for every possible bit set, work through them and call + * the appropriate interrupt handler. + */ + while (pld) { + i = ffz(~pld); + pld &= pld - 1; /* clear least bit set */ + if (i == 7) + isa_device_interrupt(vector); + else + handle_irq(16+i); + } +} + +void __init +init_pyxis_irqs(unsigned long ignore_mask) +{ + long i; + + *(vulp)PYXIS_INT_MASK = 0; /* disable all */ + *(vulp)PYXIS_INT_REQ = -1; /* flush all */ + mb(); + + /* Send -INTA pulses to clear any pending interrupts ...*/ + *(vuip) CIA_IACK_SC; + + for (i = 16; i < 48; ++i) { + if ((ignore_mask >> i) & 1) + continue; + irq_set_chip_and_handler(i, &pyxis_irq_type, handle_level_irq); + irq_set_status_flags(i, IRQ_LEVEL); + } + + setup_irq(16+7, &isa_cascade_irqaction); +} diff --git a/arch/alpha/kernel/irq_srm.c b/arch/alpha/kernel/irq_srm.c new file mode 100644 index 00000000..a79fa30e --- /dev/null +++ b/arch/alpha/kernel/irq_srm.c @@ -0,0 +1,64 @@ +/* + * Handle interrupts from the SRM, assuming no additional weirdness. + */ + +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/irq.h> + +#include "proto.h" +#include "irq_impl.h" + + +/* + * Is the palcode SMP safe? In other words: can we call cserve_ena/dis + * at the same time in multiple CPUs? To be safe I added a spinlock + * but it can be removed trivially if the palcode is robust against smp. + */ +DEFINE_SPINLOCK(srm_irq_lock); + +static inline void +srm_enable_irq(struct irq_data *d) +{ + spin_lock(&srm_irq_lock); + cserve_ena(d->irq - 16); + spin_unlock(&srm_irq_lock); +} + +static void +srm_disable_irq(struct irq_data *d) +{ + spin_lock(&srm_irq_lock); + cserve_dis(d->irq - 16); + spin_unlock(&srm_irq_lock); +} + +/* Handle interrupts from the SRM, assuming no additional weirdness. */ +static struct irq_chip srm_irq_type = { + .name = "SRM", + .irq_unmask = srm_enable_irq, + .irq_mask = srm_disable_irq, + .irq_mask_ack = srm_disable_irq, +}; + +void __init +init_srm_irqs(long max, unsigned long ignore_mask) +{ + long i; + + if (NR_IRQS <= 16) + return; + for (i = 16; i < max; ++i) { + if (i < 64 && ((ignore_mask >> i) & 1)) + continue; + irq_set_chip_and_handler(i, &srm_irq_type, handle_level_irq); + irq_set_status_flags(i, IRQ_LEVEL); + } +} + +void +srm_device_interrupt(unsigned long vector) +{ + int irq = (vector - 0x800) >> 4; + handle_irq(irq); +} diff --git a/arch/alpha/kernel/machvec_impl.h b/arch/alpha/kernel/machvec_impl.h new file mode 100644 index 00000000..7fa62488 --- /dev/null +++ b/arch/alpha/kernel/machvec_impl.h @@ -0,0 +1,155 @@ +/* + * linux/arch/alpha/kernel/machvec_impl.h + * + * Copyright (C) 1997, 1998 Richard Henderson + * + * This file has goodies to help simplify instantiation of machine vectors. + */ + +#include <asm/pgalloc.h> + +/* Whee. These systems don't have an HAE: + IRONGATE, MARVEL, POLARIS, TSUNAMI, TITAN, WILDFIRE + Fix things up for the GENERIC kernel by defining the HAE address + to be that of the cache. Now we can read and write it as we like. ;-) */ +#define IRONGATE_HAE_ADDRESS (&alpha_mv.hae_cache) +#define MARVEL_HAE_ADDRESS (&alpha_mv.hae_cache) +#define POLARIS_HAE_ADDRESS (&alpha_mv.hae_cache) +#define TSUNAMI_HAE_ADDRESS (&alpha_mv.hae_cache) +#define TITAN_HAE_ADDRESS (&alpha_mv.hae_cache) +#define WILDFIRE_HAE_ADDRESS (&alpha_mv.hae_cache) + +#ifdef CIA_ONE_HAE_WINDOW +#define CIA_HAE_ADDRESS (&alpha_mv.hae_cache) +#endif +#ifdef MCPCIA_ONE_HAE_WINDOW +#define MCPCIA_HAE_ADDRESS (&alpha_mv.hae_cache) +#endif +#ifdef T2_ONE_HAE_WINDOW +#define T2_HAE_ADDRESS (&alpha_mv.hae_cache) +#endif + +/* Only a few systems don't define IACK_SC, handling all interrupts through + the SRM console. But splitting out that one case from IO() below + seems like such a pain. Define this to get things to compile. */ +#define JENSEN_IACK_SC 1 +#define T2_IACK_SC 1 +#define WILDFIRE_IACK_SC 1 /* FIXME */ + +/* + * Some helpful macros for filling in the blanks. + */ + +#define CAT1(x,y) x##y +#define CAT(x,y) CAT1(x,y) + +#define DO_DEFAULT_RTC \ + .rtc_port = 0x70, \ + .rtc_get_time = common_get_rtc_time, \ + .rtc_set_time = common_set_rtc_time + +#define DO_EV4_MMU \ + .max_asn = EV4_MAX_ASN, \ + .mv_switch_mm = ev4_switch_mm, \ + .mv_activate_mm = ev4_activate_mm, \ + .mv_flush_tlb_current = ev4_flush_tlb_current, \ + .mv_flush_tlb_current_page = ev4_flush_tlb_current_page + +#define DO_EV5_MMU \ + .max_asn = EV5_MAX_ASN, \ + .mv_switch_mm = ev5_switch_mm, \ + .mv_activate_mm = ev5_activate_mm, \ + .mv_flush_tlb_current = ev5_flush_tlb_current, \ + .mv_flush_tlb_current_page = ev5_flush_tlb_current_page + +#define DO_EV6_MMU \ + .max_asn = EV6_MAX_ASN, \ + .mv_switch_mm = ev5_switch_mm, \ + .mv_activate_mm = ev5_activate_mm, \ + .mv_flush_tlb_current = ev5_flush_tlb_current, \ + .mv_flush_tlb_current_page = ev5_flush_tlb_current_page + +#define DO_EV7_MMU \ + .max_asn = EV6_MAX_ASN, \ + .mv_switch_mm = ev5_switch_mm, \ + .mv_activate_mm = ev5_activate_mm, \ + .mv_flush_tlb_current = ev5_flush_tlb_current, \ + .mv_flush_tlb_current_page = ev5_flush_tlb_current_page + +#define IO_LITE(UP,low) \ + .hae_register = (unsigned long *) CAT(UP,_HAE_ADDRESS), \ + .iack_sc = CAT(UP,_IACK_SC), \ + .mv_ioread8 = CAT(low,_ioread8), \ + .mv_ioread16 = CAT(low,_ioread16), \ + .mv_ioread32 = CAT(low,_ioread32), \ + .mv_iowrite8 = CAT(low,_iowrite8), \ + .mv_iowrite16 = CAT(low,_iowrite16), \ + .mv_iowrite32 = CAT(low,_iowrite32), \ + .mv_readb = CAT(low,_readb), \ + .mv_readw = CAT(low,_readw), \ + .mv_readl = CAT(low,_readl), \ + .mv_readq = CAT(low,_readq), \ + .mv_writeb = CAT(low,_writeb), \ + .mv_writew = CAT(low,_writew), \ + .mv_writel = CAT(low,_writel), \ + .mv_writeq = CAT(low,_writeq), \ + .mv_ioportmap = CAT(low,_ioportmap), \ + .mv_ioremap = CAT(low,_ioremap), \ + .mv_iounmap = CAT(low,_iounmap), \ + .mv_is_ioaddr = CAT(low,_is_ioaddr), \ + .mv_is_mmio = CAT(low,_is_mmio) \ + +#define IO(UP,low) \ + IO_LITE(UP,low), \ + .pci_ops = &CAT(low,_pci_ops), \ + .mv_pci_tbi = CAT(low,_pci_tbi) + +#define DO_APECS_IO IO(APECS,apecs) +#define DO_CIA_IO IO(CIA,cia) +#define DO_IRONGATE_IO IO(IRONGATE,irongate) +#define DO_LCA_IO IO(LCA,lca) +#define DO_MARVEL_IO IO(MARVEL,marvel) +#define DO_MCPCIA_IO IO(MCPCIA,mcpcia) +#define DO_POLARIS_IO IO(POLARIS,polaris) +#define DO_T2_IO IO(T2,t2) +#define DO_TSUNAMI_IO IO(TSUNAMI,tsunami) +#define DO_TITAN_IO IO(TITAN,titan) +#define DO_WILDFIRE_IO IO(WILDFIRE,wildfire) + +#define DO_PYXIS_IO IO_LITE(CIA,cia_bwx), \ + .pci_ops = &cia_pci_ops, \ + .mv_pci_tbi = cia_pci_tbi + +/* + * In a GENERIC kernel, we have lots of these vectors floating about, + * all but one of which we want to go away. In a non-GENERIC kernel, + * we want only one, ever. + * + * Accomplish this in the GENERIC kernel by putting all of the vectors + * in the .init.data section where they'll go away. We'll copy the + * one we want to the real alpha_mv vector in setup_arch. + * + * Accomplish this in a non-GENERIC kernel by ifdef'ing out all but + * one of the vectors, which will not reside in .init.data. We then + * alias this one vector to alpha_mv, so no copy is needed. + * + * Upshot: set __initdata to nothing for non-GENERIC kernels. + */ + +#ifdef CONFIG_ALPHA_GENERIC +#define __initmv __initdata +#define ALIAS_MV(x) +#else +#define __initmv __initdata_refok + +/* GCC actually has a syntax for defining aliases, but is under some + delusion that you shouldn't be able to declare it extern somewhere + else beforehand. Fine. We'll do it ourselves. */ +#if 0 +#define ALIAS_MV(system) \ + struct alpha_machine_vector alpha_mv __attribute__((alias(#system "_mv"))); +#else +#define ALIAS_MV(system) \ + asm(".global alpha_mv\nalpha_mv = " #system "_mv"); +#endif +#endif /* GENERIC */ diff --git a/arch/alpha/kernel/module.c b/arch/alpha/kernel/module.c new file mode 100644 index 00000000..2fd00b70 --- /dev/null +++ b/arch/alpha/kernel/module.c @@ -0,0 +1,282 @@ +/* Kernel module help for Alpha. + Copyright (C) 2002 Richard Henderson. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#include <linux/moduleloader.h> +#include <linux/elf.h> +#include <linux/vmalloc.h> +#include <linux/fs.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/slab.h> + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(fmt...) +#endif + +/* Allocate the GOT at the end of the core sections. */ + +struct got_entry { + struct got_entry *next; + Elf64_Sxword r_addend; + int got_offset; +}; + +static inline void +process_reloc_for_got(Elf64_Rela *rela, + struct got_entry *chains, Elf64_Xword *poffset) +{ + unsigned long r_sym = ELF64_R_SYM (rela->r_info); + unsigned long r_type = ELF64_R_TYPE (rela->r_info); + Elf64_Sxword r_addend = rela->r_addend; + struct got_entry *g; + + if (r_type != R_ALPHA_LITERAL) + return; + + for (g = chains + r_sym; g ; g = g->next) + if (g->r_addend == r_addend) { + if (g->got_offset == 0) { + g->got_offset = *poffset; + *poffset += 8; + } + goto found_entry; + } + + g = kmalloc (sizeof (*g), GFP_KERNEL); + g->next = chains[r_sym].next; + g->r_addend = r_addend; + g->got_offset = *poffset; + *poffset += 8; + chains[r_sym].next = g; + + found_entry: + /* Trick: most of the ELF64_R_TYPE field is unused. There are + 42 valid relocation types, and a 32-bit field. Co-opt the + bits above 256 to store the got offset for this reloc. */ + rela->r_info |= g->got_offset << 8; +} + +int +module_frob_arch_sections(Elf64_Ehdr *hdr, Elf64_Shdr *sechdrs, + char *secstrings, struct module *me) +{ + struct got_entry *chains; + Elf64_Rela *rela; + Elf64_Shdr *esechdrs, *symtab, *s, *got; + unsigned long nsyms, nrela, i; + + esechdrs = sechdrs + hdr->e_shnum; + symtab = got = NULL; + + /* Find out how large the symbol table is. Allocate one got_entry + head per symbol. Normally this will be enough, but not always. + We'll chain different offsets for the symbol down each head. */ + for (s = sechdrs; s < esechdrs; ++s) + if (s->sh_type == SHT_SYMTAB) + symtab = s; + else if (!strcmp(".got", secstrings + s->sh_name)) { + got = s; + me->arch.gotsecindex = s - sechdrs; + } + + if (!symtab) { + printk(KERN_ERR "module %s: no symbol table\n", me->name); + return -ENOEXEC; + } + if (!got) { + printk(KERN_ERR "module %s: no got section\n", me->name); + return -ENOEXEC; + } + + nsyms = symtab->sh_size / sizeof(Elf64_Sym); + chains = kcalloc(nsyms, sizeof(struct got_entry), GFP_KERNEL); + if (!chains) { + printk(KERN_ERR + "module %s: no memory for symbol chain buffer\n", + me->name); + return -ENOMEM; + } + + got->sh_size = 0; + got->sh_addralign = 8; + got->sh_type = SHT_NOBITS; + + /* Examine all LITERAL relocations to find out what GOT entries + are required. This sizes the GOT section as well. */ + for (s = sechdrs; s < esechdrs; ++s) + if (s->sh_type == SHT_RELA) { + nrela = s->sh_size / sizeof(Elf64_Rela); + rela = (void *)hdr + s->sh_offset; + for (i = 0; i < nrela; ++i) + process_reloc_for_got(rela+i, chains, + &got->sh_size); + } + + /* Free the memory we allocated. */ + for (i = 0; i < nsyms; ++i) { + struct got_entry *g, *n; + for (g = chains[i].next; g ; g = n) { + n = g->next; + kfree(g); + } + } + kfree(chains); + + return 0; +} + +int +apply_relocate_add(Elf64_Shdr *sechdrs, const char *strtab, + unsigned int symindex, unsigned int relsec, + struct module *me) +{ + Elf64_Rela *rela = (void *)sechdrs[relsec].sh_addr; + unsigned long i, n = sechdrs[relsec].sh_size / sizeof(*rela); + Elf64_Sym *symtab, *sym; + void *base, *location; + unsigned long got, gp; + + DEBUGP("Applying relocate section %u to %u\n", relsec, + sechdrs[relsec].sh_info); + + base = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr; + symtab = (Elf64_Sym *)sechdrs[symindex].sh_addr; + + /* The small sections were sorted to the end of the segment. + The following should definitely cover them. */ + gp = (u64)me->module_core + me->core_size - 0x8000; + got = sechdrs[me->arch.gotsecindex].sh_addr; + + for (i = 0; i < n; i++) { + unsigned long r_sym = ELF64_R_SYM (rela[i].r_info); + unsigned long r_type = ELF64_R_TYPE (rela[i].r_info); + unsigned long r_got_offset = r_type >> 8; + unsigned long value, hi, lo; + r_type &= 0xff; + + /* This is where to make the change. */ + location = base + rela[i].r_offset; + + /* This is the symbol it is referring to. Note that all + unresolved symbols have been resolved. */ + sym = symtab + r_sym; + value = sym->st_value + rela[i].r_addend; + + switch (r_type) { + case R_ALPHA_NONE: + break; + case R_ALPHA_REFQUAD: + /* BUG() can produce misaligned relocations. */ + ((u32 *)location)[0] = value; + ((u32 *)location)[1] = value >> 32; + break; + case R_ALPHA_GPREL32: + value -= gp; + if ((int)value != value) + goto reloc_overflow; + *(u32 *)location = value; + break; + case R_ALPHA_LITERAL: + hi = got + r_got_offset; + lo = hi - gp; + if ((short)lo != lo) + goto reloc_overflow; + *(u16 *)location = lo; + *(u64 *)hi = value; + break; + case R_ALPHA_LITUSE: + break; + case R_ALPHA_GPDISP: + value = gp - (u64)location; + lo = (short)value; + hi = (int)(value - lo); + if (hi + lo != value) + goto reloc_overflow; + *(u16 *)location = hi >> 16; + *(u16 *)(location + rela[i].r_addend) = lo; + break; + case R_ALPHA_BRSGP: + /* BRSGP is only allowed to bind to local symbols. + If the section is undef, this means that the + value was resolved from somewhere else. */ + if (sym->st_shndx == SHN_UNDEF) + goto reloc_overflow; + if ((sym->st_other & STO_ALPHA_STD_GPLOAD) == + STO_ALPHA_STD_GPLOAD) + /* Omit the prologue. */ + value += 8; + /* FALLTHRU */ + case R_ALPHA_BRADDR: + value -= (u64)location + 4; + if (value & 3) + goto reloc_overflow; + value = (long)value >> 2; + if (value + (1<<21) >= 1<<22) + goto reloc_overflow; + value &= 0x1fffff; + value |= *(u32 *)location & ~0x1fffff; + *(u32 *)location = value; + break; + case R_ALPHA_HINT: + break; + case R_ALPHA_SREL32: + value -= (u64)location; + if ((int)value != value) + goto reloc_overflow; + *(u32 *)location = value; + break; + case R_ALPHA_SREL64: + value -= (u64)location; + *(u64 *)location = value; + break; + case R_ALPHA_GPRELHIGH: + value = (long)(value - gp + 0x8000) >> 16; + if ((short) value != value) + goto reloc_overflow; + *(u16 *)location = value; + break; + case R_ALPHA_GPRELLOW: + value -= gp; + *(u16 *)location = value; + break; + case R_ALPHA_GPREL16: + value -= gp; + if ((short) value != value) + goto reloc_overflow; + *(u16 *)location = value; + break; + default: + printk(KERN_ERR "module %s: Unknown relocation: %lu\n", + me->name, r_type); + return -ENOEXEC; + reloc_overflow: + if (ELF64_ST_TYPE (sym->st_info) == STT_SECTION) + printk(KERN_ERR + "module %s: Relocation (type %lu) overflow vs section %d\n", + me->name, r_type, sym->st_shndx); + else + printk(KERN_ERR + "module %s: Relocation (type %lu) overflow vs %s\n", + me->name, r_type, strtab + sym->st_name); + return -ENOEXEC; + } + } + + return 0; +} diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c new file mode 100644 index 00000000..49ee3193 --- /dev/null +++ b/arch/alpha/kernel/osf_sys.c @@ -0,0 +1,1240 @@ +/* + * linux/arch/alpha/kernel/osf_sys.c + * + * Copyright (C) 1995 Linus Torvalds + */ + +/* + * This file handles some of the stranger OSF/1 system call interfaces. + * Some of the system calls expect a non-C calling standard, others have + * special parameter blocks.. + */ + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/stddef.h> +#include <linux/syscalls.h> +#include <linux/unistd.h> +#include <linux/ptrace.h> +#include <linux/user.h> +#include <linux/utsname.h> +#include <linux/time.h> +#include <linux/timex.h> +#include <linux/major.h> +#include <linux/stat.h> +#include <linux/mman.h> +#include <linux/shm.h> +#include <linux/poll.h> +#include <linux/file.h> +#include <linux/types.h> +#include <linux/ipc.h> +#include <linux/namei.h> +#include <linux/uio.h> +#include <linux/vfs.h> +#include <linux/rcupdate.h> +#include <linux/slab.h> + +#include <asm/fpu.h> +#include <asm/io.h> +#include <asm/uaccess.h> +#include <asm/sysinfo.h> +#include <asm/thread_info.h> +#include <asm/hwrpb.h> +#include <asm/processor.h> + +/* + * Brk needs to return an error. Still support Linux's brk(0) query idiom, + * which OSF programs just shouldn't be doing. We're still not quite + * identical to OSF as we don't return 0 on success, but doing otherwise + * would require changes to libc. Hopefully this is good enough. + */ +SYSCALL_DEFINE1(osf_brk, unsigned long, brk) +{ + unsigned long retval = sys_brk(brk); + if (brk && brk != retval) + retval = -ENOMEM; + return retval; +} + +/* + * This is pure guess-work.. + */ +SYSCALL_DEFINE4(osf_set_program_attributes, unsigned long, text_start, + unsigned long, text_len, unsigned long, bss_start, + unsigned long, bss_len) +{ + struct mm_struct *mm; + + mm = current->mm; + mm->end_code = bss_start + bss_len; + mm->start_brk = bss_start + bss_len; + mm->brk = bss_start + bss_len; +#if 0 + printk("set_program_attributes(%lx %lx %lx %lx)\n", + text_start, text_len, bss_start, bss_len); +#endif + return 0; +} + +/* + * OSF/1 directory handling functions... + * + * The "getdents()" interface is much more sane: the "basep" stuff is + * braindamage (it can't really handle filesystems where the directory + * offset differences aren't the same as "d_reclen"). + */ +#define NAME_OFFSET offsetof (struct osf_dirent, d_name) + +struct osf_dirent { + unsigned int d_ino; + unsigned short d_reclen; + unsigned short d_namlen; + char d_name[1]; +}; + +struct osf_dirent_callback { + struct osf_dirent __user *dirent; + long __user *basep; + unsigned int count; + int error; +}; + +static int +osf_filldir(void *__buf, const char *name, int namlen, loff_t offset, + u64 ino, unsigned int d_type) +{ + struct osf_dirent __user *dirent; + struct osf_dirent_callback *buf = (struct osf_dirent_callback *) __buf; + unsigned int reclen = ALIGN(NAME_OFFSET + namlen + 1, sizeof(u32)); + unsigned int d_ino; + + buf->error = -EINVAL; /* only used if we fail */ + if (reclen > buf->count) + return -EINVAL; + d_ino = ino; + if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { + buf->error = -EOVERFLOW; + return -EOVERFLOW; + } + if (buf->basep) { + if (put_user(offset, buf->basep)) + goto Efault; + buf->basep = NULL; + } + dirent = buf->dirent; + if (put_user(d_ino, &dirent->d_ino) || + put_user(namlen, &dirent->d_namlen) || + put_user(reclen, &dirent->d_reclen) || + copy_to_user(dirent->d_name, name, namlen) || + put_user(0, dirent->d_name + namlen)) + goto Efault; + dirent = (void __user *)dirent + reclen; + buf->dirent = dirent; + buf->count -= reclen; + return 0; +Efault: + buf->error = -EFAULT; + return -EFAULT; +} + +SYSCALL_DEFINE4(osf_getdirentries, unsigned int, fd, + struct osf_dirent __user *, dirent, unsigned int, count, + long __user *, basep) +{ + int error; + struct file *file; + struct osf_dirent_callback buf; + + error = -EBADF; + file = fget(fd); + if (!file) + goto out; + + buf.dirent = dirent; + buf.basep = basep; + buf.count = count; + buf.error = 0; + + error = vfs_readdir(file, osf_filldir, &buf); + if (error >= 0) + error = buf.error; + if (count != buf.count) + error = count - buf.count; + + fput(file); + out: + return error; +} + +#undef NAME_OFFSET + +SYSCALL_DEFINE6(osf_mmap, unsigned long, addr, unsigned long, len, + unsigned long, prot, unsigned long, flags, unsigned long, fd, + unsigned long, off) +{ + unsigned long ret = -EINVAL; + +#if 0 + if (flags & (_MAP_HASSEMAPHORE | _MAP_INHERIT | _MAP_UNALIGNED)) + printk("%s: unimplemented OSF mmap flags %04lx\n", + current->comm, flags); +#endif + if ((off + PAGE_ALIGN(len)) < off) + goto out; + if (off & ~PAGE_MASK) + goto out; + ret = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); + out: + return ret; +} + + +/* + * The OSF/1 statfs structure is much larger, but this should + * match the beginning, at least. + */ +struct osf_statfs { + short f_type; + short f_flags; + int f_fsize; + int f_bsize; + int f_blocks; + int f_bfree; + int f_bavail; + int f_files; + int f_ffree; + __kernel_fsid_t f_fsid; +}; + +static int +linux_to_osf_statfs(struct kstatfs *linux_stat, struct osf_statfs __user *osf_stat, + unsigned long bufsiz) +{ + struct osf_statfs tmp_stat; + + tmp_stat.f_type = linux_stat->f_type; + tmp_stat.f_flags = 0; /* mount flags */ + tmp_stat.f_fsize = linux_stat->f_frsize; + tmp_stat.f_bsize = linux_stat->f_bsize; + tmp_stat.f_blocks = linux_stat->f_blocks; + tmp_stat.f_bfree = linux_stat->f_bfree; + tmp_stat.f_bavail = linux_stat->f_bavail; + tmp_stat.f_files = linux_stat->f_files; + tmp_stat.f_ffree = linux_stat->f_ffree; + tmp_stat.f_fsid = linux_stat->f_fsid; + if (bufsiz > sizeof(tmp_stat)) + bufsiz = sizeof(tmp_stat); + return copy_to_user(osf_stat, &tmp_stat, bufsiz) ? -EFAULT : 0; +} + +SYSCALL_DEFINE3(osf_statfs, const char __user *, pathname, + struct osf_statfs __user *, buffer, unsigned long, bufsiz) +{ + struct kstatfs linux_stat; + int error = user_statfs(pathname, &linux_stat); + if (!error) + error = linux_to_osf_statfs(&linux_stat, buffer, bufsiz); + return error; +} + +SYSCALL_DEFINE3(osf_fstatfs, unsigned long, fd, + struct osf_statfs __user *, buffer, unsigned long, bufsiz) +{ + struct kstatfs linux_stat; + int error = fd_statfs(fd, &linux_stat); + if (!error) + error = linux_to_osf_statfs(&linux_stat, buffer, bufsiz); + return error; +} + +/* + * Uhh.. OSF/1 mount parameters aren't exactly obvious.. + * + * Although to be frank, neither are the native Linux/i386 ones.. + */ +struct ufs_args { + char __user *devname; + int flags; + uid_t exroot; +}; + +struct cdfs_args { + char __user *devname; + int flags; + uid_t exroot; + + /* This has lots more here, which Linux handles with the option block + but I'm too lazy to do the translation into ASCII. */ +}; + +struct procfs_args { + char __user *devname; + int flags; + uid_t exroot; +}; + +/* + * We can't actually handle ufs yet, so we translate UFS mounts to + * ext2fs mounts. I wouldn't mind a UFS filesystem, but the UFS + * layout is so braindead it's a major headache doing it. + * + * Just how long ago was it written? OTOH our UFS driver may be still + * unhappy with OSF UFS. [CHECKME] + */ +static int +osf_ufs_mount(char *dirname, struct ufs_args __user *args, int flags) +{ + int retval; + struct cdfs_args tmp; + char *devname; + + retval = -EFAULT; + if (copy_from_user(&tmp, args, sizeof(tmp))) + goto out; + devname = getname(tmp.devname); + retval = PTR_ERR(devname); + if (IS_ERR(devname)) + goto out; + retval = do_mount(devname, dirname, "ext2", flags, NULL); + putname(devname); + out: + return retval; +} + +static int +osf_cdfs_mount(char *dirname, struct cdfs_args __user *args, int flags) +{ + int retval; + struct cdfs_args tmp; + char *devname; + + retval = -EFAULT; + if (copy_from_user(&tmp, args, sizeof(tmp))) + goto out; + devname = getname(tmp.devname); + retval = PTR_ERR(devname); + if (IS_ERR(devname)) + goto out; + retval = do_mount(devname, dirname, "iso9660", flags, NULL); + putname(devname); + out: + return retval; +} + +static int +osf_procfs_mount(char *dirname, struct procfs_args __user *args, int flags) +{ + struct procfs_args tmp; + + if (copy_from_user(&tmp, args, sizeof(tmp))) + return -EFAULT; + + return do_mount("", dirname, "proc", flags, NULL); +} + +SYSCALL_DEFINE4(osf_mount, unsigned long, typenr, const char __user *, path, + int, flag, void __user *, data) +{ + int retval; + char *name; + + name = getname(path); + retval = PTR_ERR(name); + if (IS_ERR(name)) + goto out; + switch (typenr) { + case 1: + retval = osf_ufs_mount(name, data, flag); + break; + case 6: + retval = osf_cdfs_mount(name, data, flag); + break; + case 9: + retval = osf_procfs_mount(name, data, flag); + break; + default: + retval = -EINVAL; + printk("osf_mount(%ld, %x)\n", typenr, flag); + } + putname(name); + out: + return retval; +} + +SYSCALL_DEFINE1(osf_utsname, char __user *, name) +{ + int error; + + down_read(&uts_sem); + error = -EFAULT; + if (copy_to_user(name + 0, utsname()->sysname, 32)) + goto out; + if (copy_to_user(name + 32, utsname()->nodename, 32)) + goto out; + if (copy_to_user(name + 64, utsname()->release, 32)) + goto out; + if (copy_to_user(name + 96, utsname()->version, 32)) + goto out; + if (copy_to_user(name + 128, utsname()->machine, 32)) + goto out; + + error = 0; + out: + up_read(&uts_sem); + return error; +} + +SYSCALL_DEFINE0(getpagesize) +{ + return PAGE_SIZE; +} + +SYSCALL_DEFINE0(getdtablesize) +{ + return sysctl_nr_open; +} + +/* + * For compatibility with OSF/1 only. Use utsname(2) instead. + */ +SYSCALL_DEFINE2(osf_getdomainname, char __user *, name, int, namelen) +{ + unsigned len; + int i; + + if (!access_ok(VERIFY_WRITE, name, namelen)) + return -EFAULT; + + len = namelen; + if (len > 32) + len = 32; + + down_read(&uts_sem); + for (i = 0; i < len; ++i) { + __put_user(utsname()->domainname[i], name + i); + if (utsname()->domainname[i] == '\0') + break; + } + up_read(&uts_sem); + + return 0; +} + +/* + * The following stuff should move into a header file should it ever + * be labeled "officially supported." Right now, there is just enough + * support to avoid applications (such as tar) printing error + * messages. The attributes are not really implemented. + */ + +/* + * Values for Property list entry flag + */ +#define PLE_PROPAGATE_ON_COPY 0x1 /* cp(1) will copy entry + by default */ +#define PLE_FLAG_MASK 0x1 /* Valid flag values */ +#define PLE_FLAG_ALL -1 /* All flag value */ + +struct proplistname_args { + unsigned int pl_mask; + unsigned int pl_numnames; + char **pl_names; +}; + +union pl_args { + struct setargs { + char __user *path; + long follow; + long nbytes; + char __user *buf; + } set; + struct fsetargs { + long fd; + long nbytes; + char __user *buf; + } fset; + struct getargs { + char __user *path; + long follow; + struct proplistname_args __user *name_args; + long nbytes; + char __user *buf; + int __user *min_buf_size; + } get; + struct fgetargs { + long fd; + struct proplistname_args __user *name_args; + long nbytes; + char __user *buf; + int __user *min_buf_size; + } fget; + struct delargs { + char __user *path; + long follow; + struct proplistname_args __user *name_args; + } del; + struct fdelargs { + long fd; + struct proplistname_args __user *name_args; + } fdel; +}; + +enum pl_code { + PL_SET = 1, PL_FSET = 2, + PL_GET = 3, PL_FGET = 4, + PL_DEL = 5, PL_FDEL = 6 +}; + +SYSCALL_DEFINE2(osf_proplist_syscall, enum pl_code, code, + union pl_args __user *, args) +{ + long error; + int __user *min_buf_size_ptr; + + switch (code) { + case PL_SET: + if (get_user(error, &args->set.nbytes)) + error = -EFAULT; + break; + case PL_FSET: + if (get_user(error, &args->fset.nbytes)) + error = -EFAULT; + break; + case PL_GET: + error = get_user(min_buf_size_ptr, &args->get.min_buf_size); + if (error) + break; + error = put_user(0, min_buf_size_ptr); + break; + case PL_FGET: + error = get_user(min_buf_size_ptr, &args->fget.min_buf_size); + if (error) + break; + error = put_user(0, min_buf_size_ptr); + break; + case PL_DEL: + case PL_FDEL: + error = 0; + break; + default: + error = -EOPNOTSUPP; + break; + }; + return error; +} + +SYSCALL_DEFINE2(osf_sigstack, struct sigstack __user *, uss, + struct sigstack __user *, uoss) +{ + unsigned long usp = rdusp(); + unsigned long oss_sp = current->sas_ss_sp + current->sas_ss_size; + unsigned long oss_os = on_sig_stack(usp); + int error; + + if (uss) { + void __user *ss_sp; + + error = -EFAULT; + if (get_user(ss_sp, &uss->ss_sp)) + goto out; + + /* If the current stack was set with sigaltstack, don't + swap stacks while we are on it. */ + error = -EPERM; + if (current->sas_ss_sp && on_sig_stack(usp)) + goto out; + + /* Since we don't know the extent of the stack, and we don't + track onstack-ness, but rather calculate it, we must + presume a size. Ho hum this interface is lossy. */ + current->sas_ss_sp = (unsigned long)ss_sp - SIGSTKSZ; + current->sas_ss_size = SIGSTKSZ; + } + + if (uoss) { + error = -EFAULT; + if (! access_ok(VERIFY_WRITE, uoss, sizeof(*uoss)) + || __put_user(oss_sp, &uoss->ss_sp) + || __put_user(oss_os, &uoss->ss_onstack)) + goto out; + } + + error = 0; + out: + return error; +} + +SYSCALL_DEFINE3(osf_sysinfo, int, command, char __user *, buf, long, count) +{ + const char *sysinfo_table[] = { + utsname()->sysname, + utsname()->nodename, + utsname()->release, + utsname()->version, + utsname()->machine, + "alpha", /* instruction set architecture */ + "dummy", /* hardware serial number */ + "dummy", /* hardware manufacturer */ + "dummy", /* secure RPC domain */ + }; + unsigned long offset; + const char *res; + long len, err = -EINVAL; + + offset = command-1; + if (offset >= ARRAY_SIZE(sysinfo_table)) { + /* Digital UNIX has a few unpublished interfaces here */ + printk("sysinfo(%d)", command); + goto out; + } + + down_read(&uts_sem); + res = sysinfo_table[offset]; + len = strlen(res)+1; + if ((unsigned long)len > (unsigned long)count) + len = count; + if (copy_to_user(buf, res, len)) + err = -EFAULT; + else + err = 0; + up_read(&uts_sem); + out: + return err; +} + +SYSCALL_DEFINE5(osf_getsysinfo, unsigned long, op, void __user *, buffer, + unsigned long, nbytes, int __user *, start, void __user *, arg) +{ + unsigned long w; + struct percpu_struct *cpu; + + switch (op) { + case GSI_IEEE_FP_CONTROL: + /* Return current software fp control & status bits. */ + /* Note that DU doesn't verify available space here. */ + + w = current_thread_info()->ieee_state & IEEE_SW_MASK; + w = swcr_update_status(w, rdfpcr()); + if (put_user(w, (unsigned long __user *) buffer)) + return -EFAULT; + return 0; + + case GSI_IEEE_STATE_AT_SIGNAL: + /* + * Not sure anybody will ever use this weird stuff. These + * ops can be used (under OSF/1) to set the fpcr that should + * be used when a signal handler starts executing. + */ + break; + + case GSI_UACPROC: + if (nbytes < sizeof(unsigned int)) + return -EINVAL; + w = (current_thread_info()->flags >> ALPHA_UAC_SHIFT) & + UAC_BITMASK; + if (put_user(w, (unsigned int __user *)buffer)) + return -EFAULT; + return 1; + + case GSI_PROC_TYPE: + if (nbytes < sizeof(unsigned long)) + return -EINVAL; + cpu = (struct percpu_struct*) + ((char*)hwrpb + hwrpb->processor_offset); + w = cpu->type; + if (put_user(w, (unsigned long __user*)buffer)) + return -EFAULT; + return 1; + + case GSI_GET_HWRPB: + if (nbytes > sizeof(*hwrpb)) + return -EINVAL; + if (copy_to_user(buffer, hwrpb, nbytes) != 0) + return -EFAULT; + return 1; + + default: + break; + } + + return -EOPNOTSUPP; +} + +SYSCALL_DEFINE5(osf_setsysinfo, unsigned long, op, void __user *, buffer, + unsigned long, nbytes, int __user *, start, void __user *, arg) +{ + switch (op) { + case SSI_IEEE_FP_CONTROL: { + unsigned long swcr, fpcr; + unsigned int *state; + + /* + * Alpha Architecture Handbook 4.7.7.3: + * To be fully IEEE compiant, we must track the current IEEE + * exception state in software, because spurious bits can be + * set in the trap shadow of a software-complete insn. + */ + + if (get_user(swcr, (unsigned long __user *)buffer)) + return -EFAULT; + state = ¤t_thread_info()->ieee_state; + + /* Update softare trap enable bits. */ + *state = (*state & ~IEEE_SW_MASK) | (swcr & IEEE_SW_MASK); + + /* Update the real fpcr. */ + fpcr = rdfpcr() & FPCR_DYN_MASK; + fpcr |= ieee_swcr_to_fpcr(swcr); + wrfpcr(fpcr); + + return 0; + } + + case SSI_IEEE_RAISE_EXCEPTION: { + unsigned long exc, swcr, fpcr, fex; + unsigned int *state; + + if (get_user(exc, (unsigned long __user *)buffer)) + return -EFAULT; + state = ¤t_thread_info()->ieee_state; + exc &= IEEE_STATUS_MASK; + + /* Update softare trap enable bits. */ + swcr = (*state & IEEE_SW_MASK) | exc; + *state |= exc; + + /* Update the real fpcr. */ + fpcr = rdfpcr(); + fpcr |= ieee_swcr_to_fpcr(swcr); + wrfpcr(fpcr); + + /* If any exceptions set by this call, and are unmasked, + send a signal. Old exceptions are not signaled. */ + fex = (exc >> IEEE_STATUS_TO_EXCSUM_SHIFT) & swcr; + if (fex) { + siginfo_t info; + int si_code = 0; + + if (fex & IEEE_TRAP_ENABLE_DNO) si_code = FPE_FLTUND; + if (fex & IEEE_TRAP_ENABLE_INE) si_code = FPE_FLTRES; + if (fex & IEEE_TRAP_ENABLE_UNF) si_code = FPE_FLTUND; + if (fex & IEEE_TRAP_ENABLE_OVF) si_code = FPE_FLTOVF; + if (fex & IEEE_TRAP_ENABLE_DZE) si_code = FPE_FLTDIV; + if (fex & IEEE_TRAP_ENABLE_INV) si_code = FPE_FLTINV; + + info.si_signo = SIGFPE; + info.si_errno = 0; + info.si_code = si_code; + info.si_addr = NULL; /* FIXME */ + send_sig_info(SIGFPE, &info, current); + } + return 0; + } + + case SSI_IEEE_STATE_AT_SIGNAL: + case SSI_IEEE_IGNORE_STATE_AT_SIGNAL: + /* + * Not sure anybody will ever use this weird stuff. These + * ops can be used (under OSF/1) to set the fpcr that should + * be used when a signal handler starts executing. + */ + break; + + case SSI_NVPAIRS: { + unsigned long v, w, i; + unsigned int old, new; + + for (i = 0; i < nbytes; ++i) { + + if (get_user(v, 2*i + (unsigned int __user *)buffer)) + return -EFAULT; + if (get_user(w, 2*i + 1 + (unsigned int __user *)buffer)) + return -EFAULT; + switch (v) { + case SSIN_UACPROC: + again: + old = current_thread_info()->flags; + new = old & ~(UAC_BITMASK << ALPHA_UAC_SHIFT); + new = new | (w & UAC_BITMASK) << ALPHA_UAC_SHIFT; + if (cmpxchg(¤t_thread_info()->flags, + old, new) != old) + goto again; + break; + + default: + return -EOPNOTSUPP; + } + } + return 0; + } + + default: + break; + } + + return -EOPNOTSUPP; +} + +/* Translations due to the fact that OSF's time_t is an int. Which + affects all sorts of things, like timeval and itimerval. */ + +extern struct timezone sys_tz; + +struct timeval32 +{ + int tv_sec, tv_usec; +}; + +struct itimerval32 +{ + struct timeval32 it_interval; + struct timeval32 it_value; +}; + +static inline long +get_tv32(struct timeval *o, struct timeval32 __user *i) +{ + return (!access_ok(VERIFY_READ, i, sizeof(*i)) || + (__get_user(o->tv_sec, &i->tv_sec) | + __get_user(o->tv_usec, &i->tv_usec))); +} + +static inline long +put_tv32(struct timeval32 __user *o, struct timeval *i) +{ + return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) || + (__put_user(i->tv_sec, &o->tv_sec) | + __put_user(i->tv_usec, &o->tv_usec))); +} + +static inline long +get_it32(struct itimerval *o, struct itimerval32 __user *i) +{ + return (!access_ok(VERIFY_READ, i, sizeof(*i)) || + (__get_user(o->it_interval.tv_sec, &i->it_interval.tv_sec) | + __get_user(o->it_interval.tv_usec, &i->it_interval.tv_usec) | + __get_user(o->it_value.tv_sec, &i->it_value.tv_sec) | + __get_user(o->it_value.tv_usec, &i->it_value.tv_usec))); +} + +static inline long +put_it32(struct itimerval32 __user *o, struct itimerval *i) +{ + return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) || + (__put_user(i->it_interval.tv_sec, &o->it_interval.tv_sec) | + __put_user(i->it_interval.tv_usec, &o->it_interval.tv_usec) | + __put_user(i->it_value.tv_sec, &o->it_value.tv_sec) | + __put_user(i->it_value.tv_usec, &o->it_value.tv_usec))); +} + +static inline void +jiffies_to_timeval32(unsigned long jiffies, struct timeval32 *value) +{ + value->tv_usec = (jiffies % HZ) * (1000000L / HZ); + value->tv_sec = jiffies / HZ; +} + +SYSCALL_DEFINE2(osf_gettimeofday, struct timeval32 __user *, tv, + struct timezone __user *, tz) +{ + if (tv) { + struct timeval ktv; + do_gettimeofday(&ktv); + if (put_tv32(tv, &ktv)) + return -EFAULT; + } + if (tz) { + if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) + return -EFAULT; + } + return 0; +} + +SYSCALL_DEFINE2(osf_settimeofday, struct timeval32 __user *, tv, + struct timezone __user *, tz) +{ + struct timespec kts; + struct timezone ktz; + + if (tv) { + if (get_tv32((struct timeval *)&kts, tv)) + return -EFAULT; + } + if (tz) { + if (copy_from_user(&ktz, tz, sizeof(*tz))) + return -EFAULT; + } + + kts.tv_nsec *= 1000; + + return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); +} + +SYSCALL_DEFINE2(osf_getitimer, int, which, struct itimerval32 __user *, it) +{ + struct itimerval kit; + int error; + + error = do_getitimer(which, &kit); + if (!error && put_it32(it, &kit)) + error = -EFAULT; + + return error; +} + +SYSCALL_DEFINE3(osf_setitimer, int, which, struct itimerval32 __user *, in, + struct itimerval32 __user *, out) +{ + struct itimerval kin, kout; + int error; + + if (in) { + if (get_it32(&kin, in)) + return -EFAULT; + } else + memset(&kin, 0, sizeof(kin)); + + error = do_setitimer(which, &kin, out ? &kout : NULL); + if (error || !out) + return error; + + if (put_it32(out, &kout)) + return -EFAULT; + + return 0; + +} + +SYSCALL_DEFINE2(osf_utimes, const char __user *, filename, + struct timeval32 __user *, tvs) +{ + struct timespec tv[2]; + + if (tvs) { + struct timeval ktvs[2]; + if (get_tv32(&ktvs[0], &tvs[0]) || + get_tv32(&ktvs[1], &tvs[1])) + return -EFAULT; + + if (ktvs[0].tv_usec < 0 || ktvs[0].tv_usec >= 1000000 || + ktvs[1].tv_usec < 0 || ktvs[1].tv_usec >= 1000000) + return -EINVAL; + + tv[0].tv_sec = ktvs[0].tv_sec; + tv[0].tv_nsec = 1000 * ktvs[0].tv_usec; + tv[1].tv_sec = ktvs[1].tv_sec; + tv[1].tv_nsec = 1000 * ktvs[1].tv_usec; + } + + return do_utimes(AT_FDCWD, filename, tvs ? tv : NULL, 0); +} + +SYSCALL_DEFINE5(osf_select, int, n, fd_set __user *, inp, fd_set __user *, outp, + fd_set __user *, exp, struct timeval32 __user *, tvp) +{ + struct timespec end_time, *to = NULL; + if (tvp) { + time_t sec, usec; + + to = &end_time; + + if (!access_ok(VERIFY_READ, tvp, sizeof(*tvp)) + || __get_user(sec, &tvp->tv_sec) + || __get_user(usec, &tvp->tv_usec)) { + return -EFAULT; + } + + if (sec < 0 || usec < 0) + return -EINVAL; + + if (poll_select_set_timeout(to, sec, usec * NSEC_PER_USEC)) + return -EINVAL; + + } + + /* OSF does not copy back the remaining time. */ + return core_sys_select(n, inp, outp, exp, to); +} + +struct rusage32 { + struct timeval32 ru_utime; /* user time used */ + struct timeval32 ru_stime; /* system time used */ + long ru_maxrss; /* maximum resident set size */ + long ru_ixrss; /* integral shared memory size */ + long ru_idrss; /* integral unshared data size */ + long ru_isrss; /* integral unshared stack size */ + long ru_minflt; /* page reclaims */ + long ru_majflt; /* page faults */ + long ru_nswap; /* swaps */ + long ru_inblock; /* block input operations */ + long ru_oublock; /* block output operations */ + long ru_msgsnd; /* messages sent */ + long ru_msgrcv; /* messages received */ + long ru_nsignals; /* signals received */ + long ru_nvcsw; /* voluntary context switches */ + long ru_nivcsw; /* involuntary " */ +}; + +SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru) +{ + struct rusage32 r; + + if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN) + return -EINVAL; + + memset(&r, 0, sizeof(r)); + switch (who) { + case RUSAGE_SELF: + jiffies_to_timeval32(current->utime, &r.ru_utime); + jiffies_to_timeval32(current->stime, &r.ru_stime); + r.ru_minflt = current->min_flt; + r.ru_majflt = current->maj_flt; + break; + case RUSAGE_CHILDREN: + jiffies_to_timeval32(current->signal->cutime, &r.ru_utime); + jiffies_to_timeval32(current->signal->cstime, &r.ru_stime); + r.ru_minflt = current->signal->cmin_flt; + r.ru_majflt = current->signal->cmaj_flt; + break; + } + + return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; +} + +SYSCALL_DEFINE4(osf_wait4, pid_t, pid, int __user *, ustatus, int, options, + struct rusage32 __user *, ur) +{ + struct rusage r; + long ret, err; + unsigned int status = 0; + mm_segment_t old_fs; + + if (!ur) + return sys_wait4(pid, ustatus, options, NULL); + + old_fs = get_fs(); + + set_fs (KERNEL_DS); + ret = sys_wait4(pid, (unsigned int __user *) &status, options, + (struct rusage __user *) &r); + set_fs (old_fs); + + if (!access_ok(VERIFY_WRITE, ur, sizeof(*ur))) + return -EFAULT; + + err = 0; + err |= put_user(status, ustatus); + err |= __put_user(r.ru_utime.tv_sec, &ur->ru_utime.tv_sec); + err |= __put_user(r.ru_utime.tv_usec, &ur->ru_utime.tv_usec); + err |= __put_user(r.ru_stime.tv_sec, &ur->ru_stime.tv_sec); + err |= __put_user(r.ru_stime.tv_usec, &ur->ru_stime.tv_usec); + err |= __put_user(r.ru_maxrss, &ur->ru_maxrss); + err |= __put_user(r.ru_ixrss, &ur->ru_ixrss); + err |= __put_user(r.ru_idrss, &ur->ru_idrss); + err |= __put_user(r.ru_isrss, &ur->ru_isrss); + err |= __put_user(r.ru_minflt, &ur->ru_minflt); + err |= __put_user(r.ru_majflt, &ur->ru_majflt); + err |= __put_user(r.ru_nswap, &ur->ru_nswap); + err |= __put_user(r.ru_inblock, &ur->ru_inblock); + err |= __put_user(r.ru_oublock, &ur->ru_oublock); + err |= __put_user(r.ru_msgsnd, &ur->ru_msgsnd); + err |= __put_user(r.ru_msgrcv, &ur->ru_msgrcv); + err |= __put_user(r.ru_nsignals, &ur->ru_nsignals); + err |= __put_user(r.ru_nvcsw, &ur->ru_nvcsw); + err |= __put_user(r.ru_nivcsw, &ur->ru_nivcsw); + + return err ? err : ret; +} + +/* + * I don't know what the parameters are: the first one + * seems to be a timeval pointer, and I suspect the second + * one is the time remaining.. Ho humm.. No documentation. + */ +SYSCALL_DEFINE2(osf_usleep_thread, struct timeval32 __user *, sleep, + struct timeval32 __user *, remain) +{ + struct timeval tmp; + unsigned long ticks; + + if (get_tv32(&tmp, sleep)) + goto fault; + + ticks = timeval_to_jiffies(&tmp); + + ticks = schedule_timeout_interruptible(ticks); + + if (remain) { + jiffies_to_timeval(ticks, &tmp); + if (put_tv32(remain, &tmp)) + goto fault; + } + + return 0; + fault: + return -EFAULT; +} + + +struct timex32 { + unsigned int modes; /* mode selector */ + long offset; /* time offset (usec) */ + long freq; /* frequency offset (scaled ppm) */ + long maxerror; /* maximum error (usec) */ + long esterror; /* estimated error (usec) */ + int status; /* clock command/status */ + long constant; /* pll time constant */ + long precision; /* clock precision (usec) (read only) */ + long tolerance; /* clock frequency tolerance (ppm) + * (read only) + */ + struct timeval32 time; /* (read only) */ + long tick; /* (modified) usecs between clock ticks */ + + long ppsfreq; /* pps frequency (scaled ppm) (ro) */ + long jitter; /* pps jitter (us) (ro) */ + int shift; /* interval duration (s) (shift) (ro) */ + long stabil; /* pps stability (scaled ppm) (ro) */ + long jitcnt; /* jitter limit exceeded (ro) */ + long calcnt; /* calibration intervals (ro) */ + long errcnt; /* calibration errors (ro) */ + long stbcnt; /* stability limit exceeded (ro) */ + + int :32; int :32; int :32; int :32; + int :32; int :32; int :32; int :32; + int :32; int :32; int :32; int :32; +}; + +SYSCALL_DEFINE1(old_adjtimex, struct timex32 __user *, txc_p) +{ + struct timex txc; + int ret; + + /* copy relevant bits of struct timex. */ + if (copy_from_user(&txc, txc_p, offsetof(struct timex32, time)) || + copy_from_user(&txc.tick, &txc_p->tick, sizeof(struct timex32) - + offsetof(struct timex32, time))) + return -EFAULT; + + ret = do_adjtimex(&txc); + if (ret < 0) + return ret; + + /* copy back to timex32 */ + if (copy_to_user(txc_p, &txc, offsetof(struct timex32, time)) || + (copy_to_user(&txc_p->tick, &txc.tick, sizeof(struct timex32) - + offsetof(struct timex32, tick))) || + (put_tv32(&txc_p->time, &txc.time))) + return -EFAULT; + + return ret; +} + +/* Get an address range which is currently unmapped. Similar to the + generic version except that we know how to honor ADDR_LIMIT_32BIT. */ + +static unsigned long +arch_get_unmapped_area_1(unsigned long addr, unsigned long len, + unsigned long limit) +{ + struct vm_area_struct *vma = find_vma(current->mm, addr); + + while (1) { + /* At this point: (!vma || addr < vma->vm_end). */ + if (limit - len < addr) + return -ENOMEM; + if (!vma || addr + len <= vma->vm_start) + return addr; + addr = vma->vm_end; + vma = vma->vm_next; + } +} + +unsigned long +arch_get_unmapped_area(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + unsigned long limit; + + /* "32 bit" actually means 31 bit, since pointers sign extend. */ + if (current->personality & ADDR_LIMIT_32BIT) + limit = 0x80000000; + else + limit = TASK_SIZE; + + if (len > limit) + return -ENOMEM; + + if (flags & MAP_FIXED) + return addr; + + /* First, see if the given suggestion fits. + + The OSF/1 loader (/sbin/loader) relies on us returning an + address larger than the requested if one exists, which is + a terribly broken way to program. + + That said, I can see the use in being able to suggest not + merely specific addresses, but regions of memory -- perhaps + this feature should be incorporated into all ports? */ + + if (addr) { + addr = arch_get_unmapped_area_1 (PAGE_ALIGN(addr), len, limit); + if (addr != (unsigned long) -ENOMEM) + return addr; + } + + /* Next, try allocating at TASK_UNMAPPED_BASE. */ + addr = arch_get_unmapped_area_1 (PAGE_ALIGN(TASK_UNMAPPED_BASE), + len, limit); + if (addr != (unsigned long) -ENOMEM) + return addr; + + /* Finally, try allocating in low memory. */ + addr = arch_get_unmapped_area_1 (PAGE_SIZE, len, limit); + + return addr; +} + +#ifdef CONFIG_OSF4_COMPAT + +/* Clear top 32 bits of iov_len in the user's buffer for + compatibility with old versions of OSF/1 where iov_len + was defined as int. */ +static int +osf_fix_iov_len(const struct iovec __user *iov, unsigned long count) +{ + unsigned long i; + + for (i = 0 ; i < count ; i++) { + int __user *iov_len_high = (int __user *)&iov[i].iov_len + 1; + + if (put_user(0, iov_len_high)) + return -EFAULT; + } + return 0; +} + +SYSCALL_DEFINE3(osf_readv, unsigned long, fd, + const struct iovec __user *, vector, unsigned long, count) +{ + if (unlikely(personality(current->personality) == PER_OSF4)) + if (osf_fix_iov_len(vector, count)) + return -EFAULT; + return sys_readv(fd, vector, count); +} + +SYSCALL_DEFINE3(osf_writev, unsigned long, fd, + const struct iovec __user *, vector, unsigned long, count) +{ + if (unlikely(personality(current->personality) == PER_OSF4)) + if (osf_fix_iov_len(vector, count)) + return -EFAULT; + return sys_writev(fd, vector, count); +} + +#endif diff --git a/arch/alpha/kernel/pc873xx.c b/arch/alpha/kernel/pc873xx.c new file mode 100644 index 00000000..27dcbff8 --- /dev/null +++ b/arch/alpha/kernel/pc873xx.c @@ -0,0 +1,88 @@ +#include <linux/ioport.h> +#include <asm/io.h> + +#include "pc873xx.h" + +static unsigned pc873xx_probelist[] = {0x398, 0x26e, 0}; + +static char *pc873xx_names[] = { + "PC87303", "PC87306", "PC87312", "PC87332", "PC87334" +}; + +static unsigned int base, model; + + +unsigned int __init pc873xx_get_base() +{ + return base; +} + +char *__init pc873xx_get_model() +{ + return pc873xx_names[model]; +} + +static unsigned char __init pc873xx_read(unsigned int base, int reg) +{ + outb(reg, base); + return inb(base + 1); +} + +static void __init pc873xx_write(unsigned int base, int reg, unsigned char data) +{ + unsigned long flags; + + local_irq_save(flags); + outb(reg, base); + outb(data, base + 1); + outb(data, base + 1); /* Must be written twice */ + local_irq_restore(flags); +} + +int __init pc873xx_probe(void) +{ + int val, index = 0; + + while ((base = pc873xx_probelist[index++])) { + + if (request_region(base, 2, "Super IO PC873xx") == NULL) + continue; + + val = pc873xx_read(base, REG_SID); + if ((val & 0xf0) == 0x10) { + model = PC87332; + break; + } else if ((val & 0xf8) == 0x70) { + model = PC87306; + break; + } else if ((val & 0xf8) == 0x50) { + model = PC87334; + break; + } else if ((val & 0xf8) == 0x40) { + model = PC87303; + break; + } + + release_region(base, 2); + } + + return (base == 0) ? -1 : 1; +} + +void __init pc873xx_enable_epp19(void) +{ + unsigned char data; + + printk(KERN_INFO "PC873xx enabling EPP v1.9\n"); + data = pc873xx_read(base, REG_PCR); + pc873xx_write(base, REG_PCR, (data & 0xFC) | 0x02); +} + +void __init pc873xx_enable_ide(void) +{ + unsigned char data; + + printk(KERN_INFO "PC873xx enabling IDE interrupt\n"); + data = pc873xx_read(base, REG_FER); + pc873xx_write(base, REG_FER, data | 0x40); +} diff --git a/arch/alpha/kernel/pc873xx.h b/arch/alpha/kernel/pc873xx.h new file mode 100644 index 00000000..25e16956 --- /dev/null +++ b/arch/alpha/kernel/pc873xx.h @@ -0,0 +1,35 @@ + +#ifndef _PC873xx_H_ +#define _PC873xx_H_ + +/* + * Control Register Values + */ +#define REG_FER 0x00 +#define REG_FAR 0x01 +#define REG_PTR 0x02 +#define REG_FCR 0x03 +#define REG_PCR 0x04 +#define REG_KRR 0x05 +#define REG_PMC 0x06 +#define REG_TUP 0x07 +#define REG_SID 0x08 +#define REG_ASC 0x09 +#define REG_IRC 0x0e + +/* + * Model numbers + */ +#define PC87303 0 +#define PC87306 1 +#define PC87312 2 +#define PC87332 3 +#define PC87334 4 + +int pc873xx_probe(void); +unsigned int pc873xx_get_base(void); +char *pc873xx_get_model(void); +void pc873xx_enable_epp19(void); +void pc873xx_enable_ide(void); + +#endif diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c new file mode 100644 index 00000000..df24b76f --- /dev/null +++ b/arch/alpha/kernel/pci-noop.c @@ -0,0 +1,189 @@ +/* + * linux/arch/alpha/kernel/pci-noop.c + * + * Stub PCI interfaces for Jensen-specific kernels. + */ + +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/gfp.h> +#include <linux/capability.h> +#include <linux/mm.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/dma-mapping.h> +#include <linux/scatterlist.h> + +#include "proto.h" + + +/* + * The PCI controller list. + */ + +struct pci_controller *hose_head, **hose_tail = &hose_head; +struct pci_controller *pci_isa_hose; + + +struct pci_controller * __init +alloc_pci_controller(void) +{ + struct pci_controller *hose; + + hose = alloc_bootmem(sizeof(*hose)); + + *hose_tail = hose; + hose_tail = &hose->next; + + return hose; +} + +struct resource * __init +alloc_resource(void) +{ + struct resource *res; + + res = alloc_bootmem(sizeof(*res)); + + return res; +} + +asmlinkage long +sys_pciconfig_iobase(long which, unsigned long bus, unsigned long dfn) +{ + struct pci_controller *hose; + + /* from hose or from bus.devfn */ + if (which & IOBASE_FROM_HOSE) { + for (hose = hose_head; hose; hose = hose->next) + if (hose->index == bus) + break; + if (!hose) + return -ENODEV; + } else { + /* Special hook for ISA access. */ + if (bus == 0 && dfn == 0) + hose = pci_isa_hose; + else + return -ENODEV; + } + + switch (which & ~IOBASE_FROM_HOSE) { + case IOBASE_HOSE: + return hose->index; + case IOBASE_SPARSE_MEM: + return hose->sparse_mem_base; + case IOBASE_DENSE_MEM: + return hose->dense_mem_base; + case IOBASE_SPARSE_IO: + return hose->sparse_io_base; + case IOBASE_DENSE_IO: + return hose->dense_io_base; + case IOBASE_ROOT_BUS: + return hose->bus->number; + } + + return -EOPNOTSUPP; +} + +asmlinkage long +sys_pciconfig_read(unsigned long bus, unsigned long dfn, + unsigned long off, unsigned long len, void *buf) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + else + return -ENODEV; +} + +asmlinkage long +sys_pciconfig_write(unsigned long bus, unsigned long dfn, + unsigned long off, unsigned long len, void *buf) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + else + return -ENODEV; +} + +static void *alpha_noop_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, + struct dma_attrs *attrs) +{ + void *ret; + + if (!dev || *dev->dma_mask >= 0xffffffffUL) + gfp &= ~GFP_DMA; + ret = (void *)__get_free_pages(gfp, get_order(size)); + if (ret) { + memset(ret, 0, size); + *dma_handle = virt_to_phys(ret); + } + return ret; +} + +static void alpha_noop_free_coherent(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_addr, + struct dma_attrs *attrs) +{ + free_pages((unsigned long)cpu_addr, get_order(size)); +} + +static dma_addr_t alpha_noop_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + return page_to_pa(page) + offset; +} + +static int alpha_noop_map_sg(struct device *dev, struct scatterlist *sgl, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs) +{ + int i; + struct scatterlist *sg; + + for_each_sg(sgl, sg, nents, i) { + void *va; + + BUG_ON(!sg_page(sg)); + va = sg_virt(sg); + sg_dma_address(sg) = (dma_addr_t)virt_to_phys(va); + sg_dma_len(sg) = sg->length; + } + + return nents; +} + +static int alpha_noop_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return 0; +} + +static int alpha_noop_supported(struct device *dev, u64 mask) +{ + return mask < 0x00ffffffUL ? 0 : 1; +} + +static int alpha_noop_set_mask(struct device *dev, u64 mask) +{ + if (!dev->dma_mask || !dma_supported(dev, mask)) + return -EIO; + + *dev->dma_mask = mask; + return 0; +} + +struct dma_map_ops alpha_noop_ops = { + .alloc = alpha_noop_alloc_coherent, + .free = alpha_noop_free_coherent, + .map_page = alpha_noop_map_page, + .map_sg = alpha_noop_map_sg, + .mapping_error = alpha_noop_mapping_error, + .dma_supported = alpha_noop_supported, + .set_dma_mask = alpha_noop_set_mask, +}; + +struct dma_map_ops *dma_ops = &alpha_noop_ops; +EXPORT_SYMBOL(dma_ops); diff --git a/arch/alpha/kernel/pci-sysfs.c b/arch/alpha/kernel/pci-sysfs.c new file mode 100644 index 00000000..53649c7d --- /dev/null +++ b/arch/alpha/kernel/pci-sysfs.c @@ -0,0 +1,369 @@ +/* + * arch/alpha/kernel/pci-sysfs.c + * + * Copyright (C) 2009 Ivan Kokshaysky + * + * Alpha PCI resource files. + * + * Loosely based on generic HAVE_PCI_MMAP implementation in + * drivers/pci/pci-sysfs.c + */ + +#include <linux/sched.h> +#include <linux/stat.h> +#include <linux/slab.h> +#include <linux/pci.h> + +static int hose_mmap_page_range(struct pci_controller *hose, + struct vm_area_struct *vma, + enum pci_mmap_state mmap_type, int sparse) +{ + unsigned long base; + + if (mmap_type == pci_mmap_mem) + base = sparse ? hose->sparse_mem_base : hose->dense_mem_base; + else + base = sparse ? hose->sparse_io_base : hose->dense_io_base; + + vma->vm_pgoff += base >> PAGE_SHIFT; + vma->vm_flags |= (VM_IO | VM_RESERVED); + + return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); +} + +static int __pci_mmap_fits(struct pci_dev *pdev, int num, + struct vm_area_struct *vma, int sparse) +{ + unsigned long nr, start, size; + int shift = sparse ? 5 : 0; + + nr = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + start = vma->vm_pgoff; + size = ((pci_resource_len(pdev, num) - 1) >> (PAGE_SHIFT - shift)) + 1; + + if (start < size && size - start >= nr) + return 1; + WARN(1, "process \"%s\" tried to map%s 0x%08lx-0x%08lx on %s BAR %d " + "(size 0x%08lx)\n", + current->comm, sparse ? " sparse" : "", start, start + nr, + pci_name(pdev), num, size); + return 0; +} + +/** + * pci_mmap_resource - map a PCI resource into user memory space + * @kobj: kobject for mapping + * @attr: struct bin_attribute for the file being mapped + * @vma: struct vm_area_struct passed into the mmap + * @sparse: address space type + * + * Use the bus mapping routines to map a PCI resource into userspace. + */ +static int pci_mmap_resource(struct kobject *kobj, + struct bin_attribute *attr, + struct vm_area_struct *vma, int sparse) +{ + struct pci_dev *pdev = to_pci_dev(container_of(kobj, + struct device, kobj)); + struct resource *res = attr->private; + enum pci_mmap_state mmap_type; + struct pci_bus_region bar; + int i; + + for (i = 0; i < PCI_ROM_RESOURCE; i++) + if (res == &pdev->resource[i]) + break; + if (i >= PCI_ROM_RESOURCE) + return -ENODEV; + + if (!__pci_mmap_fits(pdev, i, vma, sparse)) + return -EINVAL; + + if (iomem_is_exclusive(res->start)) + return -EINVAL; + + pcibios_resource_to_bus(pdev, &bar, res); + vma->vm_pgoff += bar.start >> (PAGE_SHIFT - (sparse ? 5 : 0)); + mmap_type = res->flags & IORESOURCE_MEM ? pci_mmap_mem : pci_mmap_io; + + return hose_mmap_page_range(pdev->sysdata, vma, mmap_type, sparse); +} + +static int pci_mmap_resource_sparse(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + struct vm_area_struct *vma) +{ + return pci_mmap_resource(kobj, attr, vma, 1); +} + +static int pci_mmap_resource_dense(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + struct vm_area_struct *vma) +{ + return pci_mmap_resource(kobj, attr, vma, 0); +} + +/** + * pci_remove_resource_files - cleanup resource files + * @dev: dev to cleanup + * + * If we created resource files for @dev, remove them from sysfs and + * free their resources. + */ +void pci_remove_resource_files(struct pci_dev *pdev) +{ + int i; + + for (i = 0; i < PCI_ROM_RESOURCE; i++) { + struct bin_attribute *res_attr; + + res_attr = pdev->res_attr[i]; + if (res_attr) { + sysfs_remove_bin_file(&pdev->dev.kobj, res_attr); + kfree(res_attr); + } + + res_attr = pdev->res_attr_wc[i]; + if (res_attr) { + sysfs_remove_bin_file(&pdev->dev.kobj, res_attr); + kfree(res_attr); + } + } +} + +static int sparse_mem_mmap_fits(struct pci_dev *pdev, int num) +{ + struct pci_bus_region bar; + struct pci_controller *hose = pdev->sysdata; + long dense_offset; + unsigned long sparse_size; + + pcibios_resource_to_bus(pdev, &bar, &pdev->resource[num]); + + /* All core logic chips have 4G sparse address space, except + CIA which has 16G (see xxx_SPARSE_MEM and xxx_DENSE_MEM + definitions in asm/core_xxx.h files). This corresponds + to 128M or 512M of the bus space. */ + dense_offset = (long)(hose->dense_mem_base - hose->sparse_mem_base); + sparse_size = dense_offset >= 0x400000000UL ? 0x20000000 : 0x8000000; + + return bar.end < sparse_size; +} + +static int pci_create_one_attr(struct pci_dev *pdev, int num, char *name, + char *suffix, struct bin_attribute *res_attr, + unsigned long sparse) +{ + size_t size = pci_resource_len(pdev, num); + + sprintf(name, "resource%d%s", num, suffix); + res_attr->mmap = sparse ? pci_mmap_resource_sparse : + pci_mmap_resource_dense; + res_attr->attr.name = name; + res_attr->attr.mode = S_IRUSR | S_IWUSR; + res_attr->size = sparse ? size << 5 : size; + res_attr->private = &pdev->resource[num]; + return sysfs_create_bin_file(&pdev->dev.kobj, res_attr); +} + +static int pci_create_attr(struct pci_dev *pdev, int num) +{ + /* allocate attribute structure, piggyback attribute name */ + int retval, nlen1, nlen2 = 0, res_count = 1; + unsigned long sparse_base, dense_base; + struct bin_attribute *attr; + struct pci_controller *hose = pdev->sysdata; + char *suffix, *attr_name; + + suffix = ""; /* Assume bwx machine, normal resourceN files. */ + nlen1 = 10; + + if (pdev->resource[num].flags & IORESOURCE_MEM) { + sparse_base = hose->sparse_mem_base; + dense_base = hose->dense_mem_base; + if (sparse_base && !sparse_mem_mmap_fits(pdev, num)) { + sparse_base = 0; + suffix = "_dense"; + nlen1 = 16; /* resourceN_dense */ + } + } else { + sparse_base = hose->sparse_io_base; + dense_base = hose->dense_io_base; + } + + if (sparse_base) { + suffix = "_sparse"; + nlen1 = 17; + if (dense_base) { + nlen2 = 16; /* resourceN_dense */ + res_count = 2; + } + } + + attr = kzalloc(sizeof(*attr) * res_count + nlen1 + nlen2, GFP_ATOMIC); + if (!attr) + return -ENOMEM; + + /* Create bwx, sparse or single dense file */ + attr_name = (char *)(attr + res_count); + pdev->res_attr[num] = attr; + retval = pci_create_one_attr(pdev, num, attr_name, suffix, attr, + sparse_base); + if (retval || res_count == 1) + return retval; + + /* Create dense file */ + attr_name += nlen1; + attr++; + pdev->res_attr_wc[num] = attr; + return pci_create_one_attr(pdev, num, attr_name, "_dense", attr, 0); +} + +/** + * pci_create_resource_files - create resource files in sysfs for @dev + * @dev: dev in question + * + * Walk the resources in @dev creating files for each resource available. + */ +int pci_create_resource_files(struct pci_dev *pdev) +{ + int i; + int retval; + + /* Expose the PCI resources from this device as files */ + for (i = 0; i < PCI_ROM_RESOURCE; i++) { + + /* skip empty resources */ + if (!pci_resource_len(pdev, i)) + continue; + + retval = pci_create_attr(pdev, i); + if (retval) { + pci_remove_resource_files(pdev); + return retval; + } + } + return 0; +} + +/* Legacy I/O bus mapping stuff. */ + +static int __legacy_mmap_fits(struct pci_controller *hose, + struct vm_area_struct *vma, + unsigned long res_size, int sparse) +{ + unsigned long nr, start, size; + + nr = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + start = vma->vm_pgoff; + size = ((res_size - 1) >> PAGE_SHIFT) + 1; + + if (start < size && size - start >= nr) + return 1; + WARN(1, "process \"%s\" tried to map%s 0x%08lx-0x%08lx on hose %d " + "(size 0x%08lx)\n", + current->comm, sparse ? " sparse" : "", start, start + nr, + hose->index, size); + return 0; +} + +static inline int has_sparse(struct pci_controller *hose, + enum pci_mmap_state mmap_type) +{ + unsigned long base; + + base = (mmap_type == pci_mmap_mem) ? hose->sparse_mem_base : + hose->sparse_io_base; + + return base != 0; +} + +int pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma, + enum pci_mmap_state mmap_type) +{ + struct pci_controller *hose = bus->sysdata; + int sparse = has_sparse(hose, mmap_type); + unsigned long res_size; + + res_size = (mmap_type == pci_mmap_mem) ? bus->legacy_mem->size : + bus->legacy_io->size; + if (!__legacy_mmap_fits(hose, vma, res_size, sparse)) + return -EINVAL; + + return hose_mmap_page_range(hose, vma, mmap_type, sparse); +} + +/** + * pci_adjust_legacy_attr - adjustment of legacy file attributes + * @b: bus to create files under + * @mmap_type: I/O port or memory + * + * Adjust file name and size for sparse mappings. + */ +void pci_adjust_legacy_attr(struct pci_bus *bus, enum pci_mmap_state mmap_type) +{ + struct pci_controller *hose = bus->sysdata; + + if (!has_sparse(hose, mmap_type)) + return; + + if (mmap_type == pci_mmap_mem) { + bus->legacy_mem->attr.name = "legacy_mem_sparse"; + bus->legacy_mem->size <<= 5; + } else { + bus->legacy_io->attr.name = "legacy_io_sparse"; + bus->legacy_io->size <<= 5; + } + return; +} + +/* Legacy I/O bus read/write functions */ +int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t size) +{ + struct pci_controller *hose = bus->sysdata; + + port += hose->io_space->start; + + switch(size) { + case 1: + *((u8 *)val) = inb(port); + return 1; + case 2: + if (port & 1) + return -EINVAL; + *((u16 *)val) = inw(port); + return 2; + case 4: + if (port & 3) + return -EINVAL; + *((u32 *)val) = inl(port); + return 4; + } + return -EINVAL; +} + +int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val, size_t size) +{ + struct pci_controller *hose = bus->sysdata; + + port += hose->io_space->start; + + switch(size) { + case 1: + outb(port, val); + return 1; + case 2: + if (port & 1) + return -EINVAL; + outw(port, val); + return 2; + case 4: + if (port & 3) + return -EINVAL; + outl(port, val); + return 4; + } + return -EINVAL; +} diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c new file mode 100644 index 00000000..1a629636 --- /dev/null +++ b/arch/alpha/kernel/pci.c @@ -0,0 +1,459 @@ +/* + * linux/arch/alpha/kernel/pci.c + * + * Extruded from code written by + * Dave Rusling (david.rusling@reo.mts.dec.com) + * David Mosberger (davidm@cs.arizona.edu) + */ + +/* 2.3.x PCI/resources, 1999 Andrea Arcangeli <andrea@suse.de> */ + +/* + * Nov 2000, Ivan Kokshaysky <ink@jurassic.park.msu.ru> + * PCI-PCI bridges cleanup + */ +#include <linux/string.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/ioport.h> +#include <linux/kernel.h> +#include <linux/bootmem.h> +#include <linux/module.h> +#include <linux/cache.h> +#include <linux/slab.h> +#include <asm/machvec.h> + +#include "proto.h" +#include "pci_impl.h" + + +/* + * Some string constants used by the various core logics. + */ + +const char *const pci_io_names[] = { + "PCI IO bus 0", "PCI IO bus 1", "PCI IO bus 2", "PCI IO bus 3", + "PCI IO bus 4", "PCI IO bus 5", "PCI IO bus 6", "PCI IO bus 7" +}; + +const char *const pci_mem_names[] = { + "PCI mem bus 0", "PCI mem bus 1", "PCI mem bus 2", "PCI mem bus 3", + "PCI mem bus 4", "PCI mem bus 5", "PCI mem bus 6", "PCI mem bus 7" +}; + +const char pci_hae0_name[] = "HAE0"; + +/* + * If PCI_PROBE_ONLY in pci_flags is set, we don't change any PCI resource + * assignments. + */ + +/* + * The PCI controller list. + */ + +struct pci_controller *hose_head, **hose_tail = &hose_head; +struct pci_controller *pci_isa_hose; + +/* + * Quirks. + */ + +static void __init +quirk_isa_bridge(struct pci_dev *dev) +{ + dev->class = PCI_CLASS_BRIDGE_ISA << 8; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82378, quirk_isa_bridge); + +static void __init +quirk_cypress(struct pci_dev *dev) +{ + /* The Notorious Cy82C693 chip. */ + + /* The generic legacy mode IDE fixup in drivers/pci/probe.c + doesn't work correctly with the Cypress IDE controller as + it has non-standard register layout. Fix that. */ + if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE) { + dev->resource[2].start = dev->resource[3].start = 0; + dev->resource[2].end = dev->resource[3].end = 0; + dev->resource[2].flags = dev->resource[3].flags = 0; + if (PCI_FUNC(dev->devfn) == 2) { + dev->resource[0].start = 0x170; + dev->resource[0].end = 0x177; + dev->resource[1].start = 0x376; + dev->resource[1].end = 0x376; + } + } + + /* The Cypress bridge responds on the PCI bus in the address range + 0xffff0000-0xffffffff (conventional x86 BIOS ROM). There is no + way to turn this off. The bridge also supports several extended + BIOS ranges (disabled after power-up), and some consoles do turn + them on. So if we use a large direct-map window, or a large SG + window, we must avoid the entire 0xfff00000-0xffffffff region. */ + if (dev->class >> 8 == PCI_CLASS_BRIDGE_ISA) { + if (__direct_map_base + __direct_map_size >= 0xfff00000UL) + __direct_map_size = 0xfff00000UL - __direct_map_base; + else { + struct pci_controller *hose = dev->sysdata; + struct pci_iommu_arena *pci = hose->sg_pci; + if (pci && pci->dma_base + pci->size >= 0xfff00000UL) + pci->size = 0xfff00000UL - pci->dma_base; + } + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_CONTAQ, PCI_DEVICE_ID_CONTAQ_82C693, quirk_cypress); + +/* Called for each device after PCI setup is done. */ +static void __init +pcibios_fixup_final(struct pci_dev *dev) +{ + unsigned int class = dev->class >> 8; + + if (class == PCI_CLASS_BRIDGE_ISA || class == PCI_CLASS_BRIDGE_EISA) { + dev->dma_mask = MAX_ISA_DMA_ADDRESS - 1; + isa_bridge = dev; + } +} +DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_final); + +/* Just declaring that the power-of-ten prefixes are actually the + power-of-two ones doesn't make it true :) */ +#define KB 1024 +#define MB (1024*KB) +#define GB (1024*MB) + +resource_size_t +pcibios_align_resource(void *data, const struct resource *res, + resource_size_t size, resource_size_t align) +{ + struct pci_dev *dev = data; + struct pci_controller *hose = dev->sysdata; + unsigned long alignto; + resource_size_t start = res->start; + + if (res->flags & IORESOURCE_IO) { + /* Make sure we start at our min on all hoses */ + if (start - hose->io_space->start < PCIBIOS_MIN_IO) + start = PCIBIOS_MIN_IO + hose->io_space->start; + + /* + * Put everything into 0x00-0xff region modulo 0x400 + */ + if (start & 0x300) + start = (start + 0x3ff) & ~0x3ff; + } + else if (res->flags & IORESOURCE_MEM) { + /* Make sure we start at our min on all hoses */ + if (start - hose->mem_space->start < PCIBIOS_MIN_MEM) + start = PCIBIOS_MIN_MEM + hose->mem_space->start; + + /* + * The following holds at least for the Low Cost + * Alpha implementation of the PCI interface: + * + * In sparse memory address space, the first + * octant (16MB) of every 128MB segment is + * aliased to the very first 16 MB of the + * address space (i.e., it aliases the ISA + * memory address space). Thus, we try to + * avoid allocating PCI devices in that range. + * Can be allocated in 2nd-7th octant only. + * Devices that need more than 112MB of + * address space must be accessed through + * dense memory space only! + */ + + /* Align to multiple of size of minimum base. */ + alignto = max_t(resource_size_t, 0x1000, align); + start = ALIGN(start, alignto); + if (hose->sparse_mem_base && size <= 7 * 16*MB) { + if (((start / (16*MB)) & 0x7) == 0) { + start &= ~(128*MB - 1); + start += 16*MB; + start = ALIGN(start, alignto); + } + if (start/(128*MB) != (start + size - 1)/(128*MB)) { + start &= ~(128*MB - 1); + start += (128 + 16)*MB; + start = ALIGN(start, alignto); + } + } + } + + return start; +} +#undef KB +#undef MB +#undef GB + +static int __init +pcibios_init(void) +{ + if (alpha_mv.init_pci) + alpha_mv.init_pci(); + return 0; +} + +subsys_initcall(pcibios_init); + +char * __devinit +pcibios_setup(char *str) +{ + return str; +} + +#ifdef ALPHA_RESTORE_SRM_SETUP +static struct pdev_srm_saved_conf *srm_saved_configs; + +void __devinit +pdev_save_srm_config(struct pci_dev *dev) +{ + struct pdev_srm_saved_conf *tmp; + static int printed = 0; + + if (!alpha_using_srm || pci_has_flag(PCI_PROBE_ONLY)) + return; + + if (!printed) { + printk(KERN_INFO "pci: enabling save/restore of SRM state\n"); + printed = 1; + } + + tmp = kmalloc(sizeof(*tmp), GFP_KERNEL); + if (!tmp) { + printk(KERN_ERR "%s: kmalloc() failed!\n", __func__); + return; + } + tmp->next = srm_saved_configs; + tmp->dev = dev; + + pci_save_state(dev); + + srm_saved_configs = tmp; +} + +void +pci_restore_srm_config(void) +{ + struct pdev_srm_saved_conf *tmp; + + /* No need to restore if probed only. */ + if (pci_has_flag(PCI_PROBE_ONLY)) + return; + + /* Restore SRM config. */ + for (tmp = srm_saved_configs; tmp; tmp = tmp->next) { + pci_restore_state(tmp->dev); + } +} +#endif + +void __devinit +pcibios_fixup_bus(struct pci_bus *bus) +{ + struct pci_dev *dev = bus->self; + + if (pci_has_flag(PCI_PROBE_ONLY) && dev && + (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) { + pci_read_bridge_bases(bus); + } + + list_for_each_entry(dev, &bus->devices, bus_list) { + pdev_save_srm_config(dev); + } +} + +void __init +pcibios_update_irq(struct pci_dev *dev, int irq) +{ + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); +} + +int +pcibios_enable_device(struct pci_dev *dev, int mask) +{ + return pci_enable_resources(dev, mask); +} + +/* + * If we set up a device for bus mastering, we need to check the latency + * timer as certain firmware forgets to set it properly, as seen + * on SX164 and LX164 with SRM. + */ +void +pcibios_set_master(struct pci_dev *dev) +{ + u8 lat; + pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat); + if (lat >= 16) return; + printk("PCI: Setting latency timer of device %s to 64\n", + pci_name(dev)); + pci_write_config_byte(dev, PCI_LATENCY_TIMER, 64); +} + +void __init +pcibios_claim_one_bus(struct pci_bus *b) +{ + struct pci_dev *dev; + struct pci_bus *child_bus; + + list_for_each_entry(dev, &b->devices, bus_list) { + int i; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct resource *r = &dev->resource[i]; + + if (r->parent || !r->start || !r->flags) + continue; + if (pci_has_flag(PCI_PROBE_ONLY) || + (r->flags & IORESOURCE_PCI_FIXED)) + pci_claim_resource(dev, i); + } + } + + list_for_each_entry(child_bus, &b->children, node) + pcibios_claim_one_bus(child_bus); +} + +static void __init +pcibios_claim_console_setup(void) +{ + struct pci_bus *b; + + list_for_each_entry(b, &pci_root_buses, node) + pcibios_claim_one_bus(b); +} + +void __init +common_init_pci(void) +{ + struct pci_controller *hose; + struct list_head resources; + struct pci_bus *bus; + int next_busno; + int need_domain_info = 0; + u32 pci_mem_end; + u32 sg_base; + unsigned long end; + + /* Scan all of the recorded PCI controllers. */ + for (next_busno = 0, hose = hose_head; hose; hose = hose->next) { + sg_base = hose->sg_pci ? hose->sg_pci->dma_base : ~0; + + /* Adjust hose mem_space limit to prevent PCI allocations + in the iommu windows. */ + pci_mem_end = min((u32)__direct_map_base, sg_base) - 1; + end = hose->mem_space->start + pci_mem_end; + if (hose->mem_space->end > end) + hose->mem_space->end = end; + + INIT_LIST_HEAD(&resources); + pci_add_resource_offset(&resources, hose->io_space, + hose->io_space->start); + pci_add_resource_offset(&resources, hose->mem_space, + hose->mem_space->start); + + bus = pci_scan_root_bus(NULL, next_busno, alpha_mv.pci_ops, + hose, &resources); + hose->bus = bus; + hose->need_domain_info = need_domain_info; + next_busno = bus->subordinate + 1; + /* Don't allow 8-bit bus number overflow inside the hose - + reserve some space for bridges. */ + if (next_busno > 224) { + next_busno = 0; + need_domain_info = 1; + } + } + + pcibios_claim_console_setup(); + + pci_assign_unassigned_resources(); + pci_fixup_irqs(alpha_mv.pci_swizzle, alpha_mv.pci_map_irq); +} + + +struct pci_controller * __init +alloc_pci_controller(void) +{ + struct pci_controller *hose; + + hose = alloc_bootmem(sizeof(*hose)); + + *hose_tail = hose; + hose_tail = &hose->next; + + return hose; +} + +struct resource * __init +alloc_resource(void) +{ + struct resource *res; + + res = alloc_bootmem(sizeof(*res)); + + return res; +} + + +/* Provide information on locations of various I/O regions in physical + memory. Do this on a per-card basis so that we choose the right hose. */ + +asmlinkage long +sys_pciconfig_iobase(long which, unsigned long bus, unsigned long dfn) +{ + struct pci_controller *hose; + struct pci_dev *dev; + + /* from hose or from bus.devfn */ + if (which & IOBASE_FROM_HOSE) { + for(hose = hose_head; hose; hose = hose->next) + if (hose->index == bus) break; + if (!hose) return -ENODEV; + } else { + /* Special hook for ISA access. */ + if (bus == 0 && dfn == 0) { + hose = pci_isa_hose; + } else { + dev = pci_get_bus_and_slot(bus, dfn); + if (!dev) + return -ENODEV; + hose = dev->sysdata; + pci_dev_put(dev); + } + } + + switch (which & ~IOBASE_FROM_HOSE) { + case IOBASE_HOSE: + return hose->index; + case IOBASE_SPARSE_MEM: + return hose->sparse_mem_base; + case IOBASE_DENSE_MEM: + return hose->dense_mem_base; + case IOBASE_SPARSE_IO: + return hose->sparse_io_base; + case IOBASE_DENSE_IO: + return hose->dense_io_base; + case IOBASE_ROOT_BUS: + return hose->bus->number; + } + + return -EOPNOTSUPP; +} + +/* Destroy an __iomem token. Not copied from lib/iomap.c. */ + +void pci_iounmap(struct pci_dev *dev, void __iomem * addr) +{ + if (__is_mmio(addr)) + iounmap(addr); +} + +EXPORT_SYMBOL(pci_iounmap); + +/* FIXME: Some boxes have multiple ISA bridges! */ +struct pci_dev *isa_bridge; +EXPORT_SYMBOL(isa_bridge); diff --git a/arch/alpha/kernel/pci_impl.h b/arch/alpha/kernel/pci_impl.h new file mode 100644 index 00000000..2b0ac429 --- /dev/null +++ b/arch/alpha/kernel/pci_impl.h @@ -0,0 +1,201 @@ +/* + * linux/arch/alpha/kernel/pci_impl.h + * + * This file contains declarations and inline functions for interfacing + * with the PCI initialization routines. + */ + +struct pci_dev; +struct pci_controller; +struct pci_iommu_arena; + +/* + * We can't just blindly use 64K for machines with EISA busses; they + * may also have PCI-PCI bridges present, and then we'd configure the + * bridge incorrectly. + * + * Also, we start at 0x8000 or 0x9000, in hopes to get all devices' + * IO space areas allocated *before* 0xC000; this is because certain + * BIOSes (Millennium for one) use PCI Config space "mechanism #2" + * accesses to probe the bus. If a device's registers appear at 0xC000, + * it may see an INx/OUTx at that address during BIOS emulation of the + * VGA BIOS, and some cards, notably Adaptec 2940UW, take mortal offense. + */ + +#define EISA_DEFAULT_IO_BASE 0x9000 /* start above 8th slot */ +#define DEFAULT_IO_BASE 0x8000 /* start at 8th slot */ + +/* + * We try to make the DEFAULT_MEM_BASE addresses *always* have more than + * a single bit set. This is so that devices like the broken Myrinet card + * will always have a PCI memory address that will never match a IDSEL + * address in PCI Config space, which can cause problems with early rev cards. + */ + +/* + * An XL is AVANTI (APECS) family, *but* it has only 27 bits of ISA address + * that get passed through the PCI<->ISA bridge chip. Although this causes + * us to set the PCI->Mem window bases lower than normal, we still allocate + * PCI bus devices' memory addresses *below* the low DMA mapping window, + * and hope they fit below 64Mb (to avoid conflicts), and so that they can + * be accessed via SPARSE space. + * + * We accept the risk that a broken Myrinet card will be put into a true XL + * and thus can more easily run into the problem described below. + */ +#define XL_DEFAULT_MEM_BASE ((16+2)*1024*1024) /* 16M to 64M-1 is avail */ + +/* + * APECS and LCA have only 34 bits for physical addresses, thus limiting PCI + * bus memory addresses for SPARSE access to be less than 128Mb. + */ +#define APECS_AND_LCA_DEFAULT_MEM_BASE ((16+2)*1024*1024) + +/* + * Because MCPCIA and T2 core logic support more bits for + * physical addresses, they should allow an expanded range of SPARSE + * memory addresses. However, we do not use them all, in order to + * avoid the HAE manipulation that would be needed. + */ +#define MCPCIA_DEFAULT_MEM_BASE ((32+2)*1024*1024) +#define T2_DEFAULT_MEM_BASE ((16+1)*1024*1024) + +/* + * Because CIA and PYXIS have more bits for physical addresses, + * they support an expanded range of SPARSE memory addresses. + */ +#define DEFAULT_MEM_BASE ((128+16)*1024*1024) + +/* ??? Experimenting with no HAE for CIA. */ +#define CIA_DEFAULT_MEM_BASE ((32+2)*1024*1024) + +#define IRONGATE_DEFAULT_MEM_BASE ((256*8-16)*1024*1024) + +#define DEFAULT_AGP_APER_SIZE (64*1024*1024) + +/* + * A small note about bridges and interrupts. The DECchip 21050 (and + * later) adheres to the PCI-PCI bridge specification. This says that + * the interrupts on the other side of a bridge are swizzled in the + * following manner: + * + * Dev Interrupt Interrupt + * Pin on Pin on + * Device Connector + * + * 4 A A + * B B + * C C + * D D + * + * 5 A B + * B C + * C D + * D A + * + * 6 A C + * B D + * C A + * D B + * + * 7 A D + * B A + * C B + * D C + * + * Where A = pin 1, B = pin 2 and so on and pin=0 = default = A. + * Thus, each swizzle is ((pin-1) + (device#-4)) % 4 + * + * pci_swizzle_interrupt_pin() swizzles for exactly one bridge. The routine + * pci_common_swizzle() handles multiple bridges. But there are a + * couple boards that do strange things. + */ + + +/* The following macro is used to implement the table-based irq mapping + function for all single-bus Alphas. */ + +#define COMMON_TABLE_LOOKUP \ +({ long _ctl_ = -1; \ + if (slot >= min_idsel && slot <= max_idsel && pin < irqs_per_slot) \ + _ctl_ = irq_tab[slot - min_idsel][pin]; \ + _ctl_; }) + + +/* A PCI IOMMU allocation arena. There are typically two of these + regions per bus. */ +/* ??? The 8400 has a 32-byte pte entry, and the entire table apparently + lives directly on the host bridge (no tlb?). We don't support this + machine, but if we ever did, we'd need to parameterize all this quite + a bit further. Probably with per-bus operation tables. */ + +struct pci_iommu_arena +{ + spinlock_t lock; + struct pci_controller *hose; +#define IOMMU_INVALID_PTE 0x2 /* 32:63 bits MBZ */ +#define IOMMU_RESERVED_PTE 0xface + unsigned long *ptes; + dma_addr_t dma_base; + unsigned int size; + unsigned int next_entry; + unsigned int align_entry; +}; + +#if defined(CONFIG_ALPHA_SRM) && \ + (defined(CONFIG_ALPHA_CIA) || defined(CONFIG_ALPHA_LCA)) +# define NEED_SRM_SAVE_RESTORE +#else +# undef NEED_SRM_SAVE_RESTORE +#endif + +#if defined(CONFIG_ALPHA_GENERIC) || defined(NEED_SRM_SAVE_RESTORE) +# define ALPHA_RESTORE_SRM_SETUP +#else +# undef ALPHA_RESTORE_SRM_SETUP +#endif + +#ifdef ALPHA_RESTORE_SRM_SETUP +/* Store PCI device configuration left by SRM here. */ +struct pdev_srm_saved_conf +{ + struct pdev_srm_saved_conf *next; + struct pci_dev *dev; +}; + +extern void pci_restore_srm_config(void); +#else +#define pdev_save_srm_config(dev) do {} while (0) +#define pci_restore_srm_config() do {} while (0) +#endif + +/* The hose list. */ +extern struct pci_controller *hose_head, **hose_tail; +extern struct pci_controller *pci_isa_hose; + +extern unsigned long alpha_agpgart_size; + +extern void common_init_pci(void); +#define common_swizzle pci_common_swizzle +extern struct pci_controller *alloc_pci_controller(void); +extern struct resource *alloc_resource(void); + +extern struct pci_iommu_arena *iommu_arena_new_node(int, + struct pci_controller *, + dma_addr_t, unsigned long, + unsigned long); +extern struct pci_iommu_arena *iommu_arena_new(struct pci_controller *, + dma_addr_t, unsigned long, + unsigned long); +extern const char *const pci_io_names[]; +extern const char *const pci_mem_names[]; +extern const char pci_hae0_name[]; + +extern unsigned long size_for_memory(unsigned long max); + +extern int iommu_reserve(struct pci_iommu_arena *, long, long); +extern int iommu_release(struct pci_iommu_arena *, long, long); +extern int iommu_bind(struct pci_iommu_arena *, long, long, struct page **); +extern int iommu_unbind(struct pci_iommu_arena *, long, long); + + diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c new file mode 100644 index 00000000..cd634795 --- /dev/null +++ b/arch/alpha/kernel/pci_iommu.c @@ -0,0 +1,969 @@ +/* + * linux/arch/alpha/kernel/pci_iommu.c + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/pci.h> +#include <linux/gfp.h> +#include <linux/bootmem.h> +#include <linux/export.h> +#include <linux/scatterlist.h> +#include <linux/log2.h> +#include <linux/dma-mapping.h> +#include <linux/iommu-helper.h> + +#include <asm/io.h> +#include <asm/hwrpb.h> + +#include "proto.h" +#include "pci_impl.h" + + +#define DEBUG_ALLOC 0 +#if DEBUG_ALLOC > 0 +# define DBGA(args...) printk(KERN_DEBUG args) +#else +# define DBGA(args...) +#endif +#if DEBUG_ALLOC > 1 +# define DBGA2(args...) printk(KERN_DEBUG args) +#else +# define DBGA2(args...) +#endif + +#define DEBUG_NODIRECT 0 + +#define ISA_DMA_MASK 0x00ffffff + +static inline unsigned long +mk_iommu_pte(unsigned long paddr) +{ + return (paddr >> (PAGE_SHIFT-1)) | 1; +} + +/* Return the minimum of MAX or the first power of two larger + than main memory. */ + +unsigned long +size_for_memory(unsigned long max) +{ + unsigned long mem = max_low_pfn << PAGE_SHIFT; + if (mem < max) + max = roundup_pow_of_two(mem); + return max; +} + +struct pci_iommu_arena * __init +iommu_arena_new_node(int nid, struct pci_controller *hose, dma_addr_t base, + unsigned long window_size, unsigned long align) +{ + unsigned long mem_size; + struct pci_iommu_arena *arena; + + mem_size = window_size / (PAGE_SIZE / sizeof(unsigned long)); + + /* Note that the TLB lookup logic uses bitwise concatenation, + not addition, so the required arena alignment is based on + the size of the window. Retain the align parameter so that + particular systems can over-align the arena. */ + if (align < mem_size) + align = mem_size; + + +#ifdef CONFIG_DISCONTIGMEM + + arena = alloc_bootmem_node(NODE_DATA(nid), sizeof(*arena)); + if (!NODE_DATA(nid) || !arena) { + printk("%s: couldn't allocate arena from node %d\n" + " falling back to system-wide allocation\n", + __func__, nid); + arena = alloc_bootmem(sizeof(*arena)); + } + + arena->ptes = __alloc_bootmem_node(NODE_DATA(nid), mem_size, align, 0); + if (!NODE_DATA(nid) || !arena->ptes) { + printk("%s: couldn't allocate arena ptes from node %d\n" + " falling back to system-wide allocation\n", + __func__, nid); + arena->ptes = __alloc_bootmem(mem_size, align, 0); + } + +#else /* CONFIG_DISCONTIGMEM */ + + arena = alloc_bootmem(sizeof(*arena)); + arena->ptes = __alloc_bootmem(mem_size, align, 0); + +#endif /* CONFIG_DISCONTIGMEM */ + + spin_lock_init(&arena->lock); + arena->hose = hose; + arena->dma_base = base; + arena->size = window_size; + arena->next_entry = 0; + + /* Align allocations to a multiple of a page size. Not needed + unless there are chip bugs. */ + arena->align_entry = 1; + + return arena; +} + +struct pci_iommu_arena * __init +iommu_arena_new(struct pci_controller *hose, dma_addr_t base, + unsigned long window_size, unsigned long align) +{ + return iommu_arena_new_node(0, hose, base, window_size, align); +} + +/* Must be called with the arena lock held */ +static long +iommu_arena_find_pages(struct device *dev, struct pci_iommu_arena *arena, + long n, long mask) +{ + unsigned long *ptes; + long i, p, nent; + int pass = 0; + unsigned long base; + unsigned long boundary_size; + + base = arena->dma_base >> PAGE_SHIFT; + if (dev) { + boundary_size = dma_get_seg_boundary(dev) + 1; + boundary_size >>= PAGE_SHIFT; + } else { + boundary_size = 1UL << (32 - PAGE_SHIFT); + } + + /* Search forward for the first mask-aligned sequence of N free ptes */ + ptes = arena->ptes; + nent = arena->size >> PAGE_SHIFT; + p = ALIGN(arena->next_entry, mask + 1); + i = 0; + +again: + while (i < n && p+i < nent) { + if (!i && iommu_is_span_boundary(p, n, base, boundary_size)) { + p = ALIGN(p + 1, mask + 1); + goto again; + } + + if (ptes[p+i]) + p = ALIGN(p + i + 1, mask + 1), i = 0; + else + i = i + 1; + } + + if (i < n) { + if (pass < 1) { + /* + * Reached the end. Flush the TLB and restart + * the search from the beginning. + */ + alpha_mv.mv_pci_tbi(arena->hose, 0, -1); + + pass++; + p = 0; + i = 0; + goto again; + } else + return -1; + } + + /* Success. It's the responsibility of the caller to mark them + in use before releasing the lock */ + return p; +} + +static long +iommu_arena_alloc(struct device *dev, struct pci_iommu_arena *arena, long n, + unsigned int align) +{ + unsigned long flags; + unsigned long *ptes; + long i, p, mask; + + spin_lock_irqsave(&arena->lock, flags); + + /* Search for N empty ptes */ + ptes = arena->ptes; + mask = max(align, arena->align_entry) - 1; + p = iommu_arena_find_pages(dev, arena, n, mask); + if (p < 0) { + spin_unlock_irqrestore(&arena->lock, flags); + return -1; + } + + /* Success. Mark them all in use, ie not zero and invalid + for the iommu tlb that could load them from under us. + The chip specific bits will fill this in with something + kosher when we return. */ + for (i = 0; i < n; ++i) + ptes[p+i] = IOMMU_INVALID_PTE; + + arena->next_entry = p + n; + spin_unlock_irqrestore(&arena->lock, flags); + + return p; +} + +static void +iommu_arena_free(struct pci_iommu_arena *arena, long ofs, long n) +{ + unsigned long *p; + long i; + + p = arena->ptes + ofs; + for (i = 0; i < n; ++i) + p[i] = 0; +} + +/* + * True if the machine supports DAC addressing, and DEV can + * make use of it given MASK. + */ +static int pci_dac_dma_supported(struct pci_dev *dev, u64 mask) +{ + dma_addr_t dac_offset = alpha_mv.pci_dac_offset; + int ok = 1; + + /* If this is not set, the machine doesn't support DAC at all. */ + if (dac_offset == 0) + ok = 0; + + /* The device has to be able to address our DAC bit. */ + if ((dac_offset & dev->dma_mask) != dac_offset) + ok = 0; + + /* If both conditions above are met, we are fine. */ + DBGA("pci_dac_dma_supported %s from %p\n", + ok ? "yes" : "no", __builtin_return_address(0)); + + return ok; +} + +/* Map a single buffer of the indicated size for PCI DMA in streaming + mode. The 32-bit PCI bus mastering address to use is returned. + Once the device is given the dma address, the device owns this memory + until either pci_unmap_single or pci_dma_sync_single is performed. */ + +static dma_addr_t +pci_map_single_1(struct pci_dev *pdev, void *cpu_addr, size_t size, + int dac_allowed) +{ + struct pci_controller *hose = pdev ? pdev->sysdata : pci_isa_hose; + dma_addr_t max_dma = pdev ? pdev->dma_mask : ISA_DMA_MASK; + struct pci_iommu_arena *arena; + long npages, dma_ofs, i; + unsigned long paddr; + dma_addr_t ret; + unsigned int align = 0; + struct device *dev = pdev ? &pdev->dev : NULL; + + paddr = __pa(cpu_addr); + +#if !DEBUG_NODIRECT + /* First check to see if we can use the direct map window. */ + if (paddr + size + __direct_map_base - 1 <= max_dma + && paddr + size <= __direct_map_size) { + ret = paddr + __direct_map_base; + + DBGA2("pci_map_single: [%p,%zx] -> direct %llx from %p\n", + cpu_addr, size, ret, __builtin_return_address(0)); + + return ret; + } +#endif + + /* Next, use DAC if selected earlier. */ + if (dac_allowed) { + ret = paddr + alpha_mv.pci_dac_offset; + + DBGA2("pci_map_single: [%p,%zx] -> DAC %llx from %p\n", + cpu_addr, size, ret, __builtin_return_address(0)); + + return ret; + } + + /* If the machine doesn't define a pci_tbi routine, we have to + assume it doesn't support sg mapping, and, since we tried to + use direct_map above, it now must be considered an error. */ + if (! alpha_mv.mv_pci_tbi) { + printk_once(KERN_WARNING "pci_map_single: no HW sg\n"); + return 0; + } + + arena = hose->sg_pci; + if (!arena || arena->dma_base + arena->size - 1 > max_dma) + arena = hose->sg_isa; + + npages = iommu_num_pages(paddr, size, PAGE_SIZE); + + /* Force allocation to 64KB boundary for ISA bridges. */ + if (pdev && pdev == isa_bridge) + align = 8; + dma_ofs = iommu_arena_alloc(dev, arena, npages, align); + if (dma_ofs < 0) { + printk(KERN_WARNING "pci_map_single failed: " + "could not allocate dma page tables\n"); + return 0; + } + + paddr &= PAGE_MASK; + for (i = 0; i < npages; ++i, paddr += PAGE_SIZE) + arena->ptes[i + dma_ofs] = mk_iommu_pte(paddr); + + ret = arena->dma_base + dma_ofs * PAGE_SIZE; + ret += (unsigned long)cpu_addr & ~PAGE_MASK; + + DBGA2("pci_map_single: [%p,%zx] np %ld -> sg %llx from %p\n", + cpu_addr, size, npages, ret, __builtin_return_address(0)); + + return ret; +} + +/* Helper for generic DMA-mapping functions. */ +static struct pci_dev *alpha_gendev_to_pci(struct device *dev) +{ + if (dev && dev->bus == &pci_bus_type) + return to_pci_dev(dev); + + /* Assume that non-PCI devices asking for DMA are either ISA or EISA, + BUG() otherwise. */ + BUG_ON(!isa_bridge); + + /* Assume non-busmaster ISA DMA when dma_mask is not set (the ISA + bridge is bus master then). */ + if (!dev || !dev->dma_mask || !*dev->dma_mask) + return isa_bridge; + + /* For EISA bus masters, return isa_bridge (it might have smaller + dma_mask due to wiring limitations). */ + if (*dev->dma_mask >= isa_bridge->dma_mask) + return isa_bridge; + + /* This assumes ISA bus master with dma_mask 0xffffff. */ + return NULL; +} + +static dma_addr_t alpha_pci_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct pci_dev *pdev = alpha_gendev_to_pci(dev); + int dac_allowed; + + if (dir == PCI_DMA_NONE) + BUG(); + + dac_allowed = pdev ? pci_dac_dma_supported(pdev, pdev->dma_mask) : 0; + return pci_map_single_1(pdev, (char *)page_address(page) + offset, + size, dac_allowed); +} + +/* Unmap a single streaming mode DMA translation. The DMA_ADDR and + SIZE must match what was provided for in a previous pci_map_single + call. All other usages are undefined. After this call, reads by + the cpu to the buffer are guaranteed to see whatever the device + wrote there. */ + +static void alpha_pci_unmap_page(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + unsigned long flags; + struct pci_dev *pdev = alpha_gendev_to_pci(dev); + struct pci_controller *hose = pdev ? pdev->sysdata : pci_isa_hose; + struct pci_iommu_arena *arena; + long dma_ofs, npages; + + if (dir == PCI_DMA_NONE) + BUG(); + + if (dma_addr >= __direct_map_base + && dma_addr < __direct_map_base + __direct_map_size) { + /* Nothing to do. */ + + DBGA2("pci_unmap_single: direct [%llx,%zx] from %p\n", + dma_addr, size, __builtin_return_address(0)); + + return; + } + + if (dma_addr > 0xffffffff) { + DBGA2("pci64_unmap_single: DAC [%llx,%zx] from %p\n", + dma_addr, size, __builtin_return_address(0)); + return; + } + + arena = hose->sg_pci; + if (!arena || dma_addr < arena->dma_base) + arena = hose->sg_isa; + + dma_ofs = (dma_addr - arena->dma_base) >> PAGE_SHIFT; + if (dma_ofs * PAGE_SIZE >= arena->size) { + printk(KERN_ERR "Bogus pci_unmap_single: dma_addr %llx " + " base %llx size %x\n", + dma_addr, arena->dma_base, arena->size); + return; + BUG(); + } + + npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); + + spin_lock_irqsave(&arena->lock, flags); + + iommu_arena_free(arena, dma_ofs, npages); + + /* If we're freeing ptes above the `next_entry' pointer (they + may have snuck back into the TLB since the last wrap flush), + we need to flush the TLB before reallocating the latter. */ + if (dma_ofs >= arena->next_entry) + alpha_mv.mv_pci_tbi(hose, dma_addr, dma_addr + size - 1); + + spin_unlock_irqrestore(&arena->lock, flags); + + DBGA2("pci_unmap_single: sg [%llx,%zx] np %ld from %p\n", + dma_addr, size, npages, __builtin_return_address(0)); +} + +/* Allocate and map kernel buffer using consistent mode DMA for PCI + device. Returns non-NULL cpu-view pointer to the buffer if + successful and sets *DMA_ADDRP to the pci side dma address as well, + else DMA_ADDRP is undefined. */ + +static void *alpha_pci_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_addrp, gfp_t gfp, + struct dma_attrs *attrs) +{ + struct pci_dev *pdev = alpha_gendev_to_pci(dev); + void *cpu_addr; + long order = get_order(size); + + gfp &= ~GFP_DMA; + +try_again: + cpu_addr = (void *)__get_free_pages(gfp, order); + if (! cpu_addr) { + printk(KERN_INFO "pci_alloc_consistent: " + "get_free_pages failed from %p\n", + __builtin_return_address(0)); + /* ??? Really atomic allocation? Otherwise we could play + with vmalloc and sg if we can't find contiguous memory. */ + return NULL; + } + memset(cpu_addr, 0, size); + + *dma_addrp = pci_map_single_1(pdev, cpu_addr, size, 0); + if (*dma_addrp == 0) { + free_pages((unsigned long)cpu_addr, order); + if (alpha_mv.mv_pci_tbi || (gfp & GFP_DMA)) + return NULL; + /* The address doesn't fit required mask and we + do not have iommu. Try again with GFP_DMA. */ + gfp |= GFP_DMA; + goto try_again; + } + + DBGA2("pci_alloc_consistent: %zx -> [%p,%llx] from %p\n", + size, cpu_addr, *dma_addrp, __builtin_return_address(0)); + + return cpu_addr; +} + +/* Free and unmap a consistent DMA buffer. CPU_ADDR and DMA_ADDR must + be values that were returned from pci_alloc_consistent. SIZE must + be the same as what as passed into pci_alloc_consistent. + References to the memory and mappings associated with CPU_ADDR or + DMA_ADDR past this call are illegal. */ + +static void alpha_pci_free_coherent(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_addr, + struct dma_attrs *attrs) +{ + struct pci_dev *pdev = alpha_gendev_to_pci(dev); + pci_unmap_single(pdev, dma_addr, size, PCI_DMA_BIDIRECTIONAL); + free_pages((unsigned long)cpu_addr, get_order(size)); + + DBGA2("pci_free_consistent: [%llx,%zx] from %p\n", + dma_addr, size, __builtin_return_address(0)); +} + +/* Classify the elements of the scatterlist. Write dma_address + of each element with: + 0 : Followers all physically adjacent. + 1 : Followers all virtually adjacent. + -1 : Not leader, physically adjacent to previous. + -2 : Not leader, virtually adjacent to previous. + Write dma_length of each leader with the combined lengths of + the mergable followers. */ + +#define SG_ENT_VIRT_ADDRESS(SG) (sg_virt((SG))) +#define SG_ENT_PHYS_ADDRESS(SG) __pa(SG_ENT_VIRT_ADDRESS(SG)) + +static void +sg_classify(struct device *dev, struct scatterlist *sg, struct scatterlist *end, + int virt_ok) +{ + unsigned long next_paddr; + struct scatterlist *leader; + long leader_flag, leader_length; + unsigned int max_seg_size; + + leader = sg; + leader_flag = 0; + leader_length = leader->length; + next_paddr = SG_ENT_PHYS_ADDRESS(leader) + leader_length; + + /* we will not marge sg without device. */ + max_seg_size = dev ? dma_get_max_seg_size(dev) : 0; + for (++sg; sg < end; ++sg) { + unsigned long addr, len; + addr = SG_ENT_PHYS_ADDRESS(sg); + len = sg->length; + + if (leader_length + len > max_seg_size) + goto new_segment; + + if (next_paddr == addr) { + sg->dma_address = -1; + leader_length += len; + } else if (((next_paddr | addr) & ~PAGE_MASK) == 0 && virt_ok) { + sg->dma_address = -2; + leader_flag = 1; + leader_length += len; + } else { +new_segment: + leader->dma_address = leader_flag; + leader->dma_length = leader_length; + leader = sg; + leader_flag = 0; + leader_length = len; + } + + next_paddr = addr + len; + } + + leader->dma_address = leader_flag; + leader->dma_length = leader_length; +} + +/* Given a scatterlist leader, choose an allocation method and fill + in the blanks. */ + +static int +sg_fill(struct device *dev, struct scatterlist *leader, struct scatterlist *end, + struct scatterlist *out, struct pci_iommu_arena *arena, + dma_addr_t max_dma, int dac_allowed) +{ + unsigned long paddr = SG_ENT_PHYS_ADDRESS(leader); + long size = leader->dma_length; + struct scatterlist *sg; + unsigned long *ptes; + long npages, dma_ofs, i; + +#if !DEBUG_NODIRECT + /* If everything is physically contiguous, and the addresses + fall into the direct-map window, use it. */ + if (leader->dma_address == 0 + && paddr + size + __direct_map_base - 1 <= max_dma + && paddr + size <= __direct_map_size) { + out->dma_address = paddr + __direct_map_base; + out->dma_length = size; + + DBGA(" sg_fill: [%p,%lx] -> direct %llx\n", + __va(paddr), size, out->dma_address); + + return 0; + } +#endif + + /* If physically contiguous and DAC is available, use it. */ + if (leader->dma_address == 0 && dac_allowed) { + out->dma_address = paddr + alpha_mv.pci_dac_offset; + out->dma_length = size; + + DBGA(" sg_fill: [%p,%lx] -> DAC %llx\n", + __va(paddr), size, out->dma_address); + + return 0; + } + + /* Otherwise, we'll use the iommu to make the pages virtually + contiguous. */ + + paddr &= ~PAGE_MASK; + npages = iommu_num_pages(paddr, size, PAGE_SIZE); + dma_ofs = iommu_arena_alloc(dev, arena, npages, 0); + if (dma_ofs < 0) { + /* If we attempted a direct map above but failed, die. */ + if (leader->dma_address == 0) + return -1; + + /* Otherwise, break up the remaining virtually contiguous + hunks into individual direct maps and retry. */ + sg_classify(dev, leader, end, 0); + return sg_fill(dev, leader, end, out, arena, max_dma, dac_allowed); + } + + out->dma_address = arena->dma_base + dma_ofs*PAGE_SIZE + paddr; + out->dma_length = size; + + DBGA(" sg_fill: [%p,%lx] -> sg %llx np %ld\n", + __va(paddr), size, out->dma_address, npages); + + /* All virtually contiguous. We need to find the length of each + physically contiguous subsegment to fill in the ptes. */ + ptes = &arena->ptes[dma_ofs]; + sg = leader; + do { +#if DEBUG_ALLOC > 0 + struct scatterlist *last_sg = sg; +#endif + + size = sg->length; + paddr = SG_ENT_PHYS_ADDRESS(sg); + + while (sg+1 < end && (int) sg[1].dma_address == -1) { + size += sg[1].length; + sg++; + } + + npages = iommu_num_pages(paddr, size, PAGE_SIZE); + + paddr &= PAGE_MASK; + for (i = 0; i < npages; ++i, paddr += PAGE_SIZE) + *ptes++ = mk_iommu_pte(paddr); + +#if DEBUG_ALLOC > 0 + DBGA(" (%ld) [%p,%x] np %ld\n", + last_sg - leader, SG_ENT_VIRT_ADDRESS(last_sg), + last_sg->length, npages); + while (++last_sg <= sg) { + DBGA(" (%ld) [%p,%x] cont\n", + last_sg - leader, SG_ENT_VIRT_ADDRESS(last_sg), + last_sg->length); + } +#endif + } while (++sg < end && (int) sg->dma_address < 0); + + return 1; +} + +static int alpha_pci_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct pci_dev *pdev = alpha_gendev_to_pci(dev); + struct scatterlist *start, *end, *out; + struct pci_controller *hose; + struct pci_iommu_arena *arena; + dma_addr_t max_dma; + int dac_allowed; + + if (dir == PCI_DMA_NONE) + BUG(); + + dac_allowed = dev ? pci_dac_dma_supported(pdev, pdev->dma_mask) : 0; + + /* Fast path single entry scatterlists. */ + if (nents == 1) { + sg->dma_length = sg->length; + sg->dma_address + = pci_map_single_1(pdev, SG_ENT_VIRT_ADDRESS(sg), + sg->length, dac_allowed); + return sg->dma_address != 0; + } + + start = sg; + end = sg + nents; + + /* First, prepare information about the entries. */ + sg_classify(dev, sg, end, alpha_mv.mv_pci_tbi != 0); + + /* Second, figure out where we're going to map things. */ + if (alpha_mv.mv_pci_tbi) { + hose = pdev ? pdev->sysdata : pci_isa_hose; + max_dma = pdev ? pdev->dma_mask : ISA_DMA_MASK; + arena = hose->sg_pci; + if (!arena || arena->dma_base + arena->size - 1 > max_dma) + arena = hose->sg_isa; + } else { + max_dma = -1; + arena = NULL; + hose = NULL; + } + + /* Third, iterate over the scatterlist leaders and allocate + dma space as needed. */ + for (out = sg; sg < end; ++sg) { + if ((int) sg->dma_address < 0) + continue; + if (sg_fill(dev, sg, end, out, arena, max_dma, dac_allowed) < 0) + goto error; + out++; + } + + /* Mark the end of the list for pci_unmap_sg. */ + if (out < end) + out->dma_length = 0; + + if (out - start == 0) + printk(KERN_WARNING "pci_map_sg failed: no entries?\n"); + DBGA("pci_map_sg: %ld entries\n", out - start); + + return out - start; + + error: + printk(KERN_WARNING "pci_map_sg failed: " + "could not allocate dma page tables\n"); + + /* Some allocation failed while mapping the scatterlist + entries. Unmap them now. */ + if (out > start) + pci_unmap_sg(pdev, start, out - start, dir); + return 0; +} + +/* Unmap a set of streaming mode DMA translations. Again, cpu read + rules concerning calls here are the same as for pci_unmap_single() + above. */ + +static void alpha_pci_unmap_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct pci_dev *pdev = alpha_gendev_to_pci(dev); + unsigned long flags; + struct pci_controller *hose; + struct pci_iommu_arena *arena; + struct scatterlist *end; + dma_addr_t max_dma; + dma_addr_t fbeg, fend; + + if (dir == PCI_DMA_NONE) + BUG(); + + if (! alpha_mv.mv_pci_tbi) + return; + + hose = pdev ? pdev->sysdata : pci_isa_hose; + max_dma = pdev ? pdev->dma_mask : ISA_DMA_MASK; + arena = hose->sg_pci; + if (!arena || arena->dma_base + arena->size - 1 > max_dma) + arena = hose->sg_isa; + + fbeg = -1, fend = 0; + + spin_lock_irqsave(&arena->lock, flags); + + for (end = sg + nents; sg < end; ++sg) { + dma_addr_t addr; + size_t size; + long npages, ofs; + dma_addr_t tend; + + addr = sg->dma_address; + size = sg->dma_length; + if (!size) + break; + + if (addr > 0xffffffff) { + /* It's a DAC address -- nothing to do. */ + DBGA(" (%ld) DAC [%llx,%zx]\n", + sg - end + nents, addr, size); + continue; + } + + if (addr >= __direct_map_base + && addr < __direct_map_base + __direct_map_size) { + /* Nothing to do. */ + DBGA(" (%ld) direct [%llx,%zx]\n", + sg - end + nents, addr, size); + continue; + } + + DBGA(" (%ld) sg [%llx,%zx]\n", + sg - end + nents, addr, size); + + npages = iommu_num_pages(addr, size, PAGE_SIZE); + ofs = (addr - arena->dma_base) >> PAGE_SHIFT; + iommu_arena_free(arena, ofs, npages); + + tend = addr + size - 1; + if (fbeg > addr) fbeg = addr; + if (fend < tend) fend = tend; + } + + /* If we're freeing ptes above the `next_entry' pointer (they + may have snuck back into the TLB since the last wrap flush), + we need to flush the TLB before reallocating the latter. */ + if ((fend - arena->dma_base) >> PAGE_SHIFT >= arena->next_entry) + alpha_mv.mv_pci_tbi(hose, fbeg, fend); + + spin_unlock_irqrestore(&arena->lock, flags); + + DBGA("pci_unmap_sg: %ld entries\n", nents - (end - sg)); +} + +/* Return whether the given PCI device DMA address mask can be + supported properly. */ + +static int alpha_pci_supported(struct device *dev, u64 mask) +{ + struct pci_dev *pdev = alpha_gendev_to_pci(dev); + struct pci_controller *hose; + struct pci_iommu_arena *arena; + + /* If there exists a direct map, and the mask fits either + the entire direct mapped space or the total system memory as + shifted by the map base */ + if (__direct_map_size != 0 + && (__direct_map_base + __direct_map_size - 1 <= mask || + __direct_map_base + (max_low_pfn << PAGE_SHIFT) - 1 <= mask)) + return 1; + + /* Check that we have a scatter-gather arena that fits. */ + hose = pdev ? pdev->sysdata : pci_isa_hose; + arena = hose->sg_isa; + if (arena && arena->dma_base + arena->size - 1 <= mask) + return 1; + arena = hose->sg_pci; + if (arena && arena->dma_base + arena->size - 1 <= mask) + return 1; + + /* As last resort try ZONE_DMA. */ + if (!__direct_map_base && MAX_DMA_ADDRESS - IDENT_ADDR - 1 <= mask) + return 1; + + return 0; +} + + +/* + * AGP GART extensions to the IOMMU + */ +int +iommu_reserve(struct pci_iommu_arena *arena, long pg_count, long align_mask) +{ + unsigned long flags; + unsigned long *ptes; + long i, p; + + if (!arena) return -EINVAL; + + spin_lock_irqsave(&arena->lock, flags); + + /* Search for N empty ptes. */ + ptes = arena->ptes; + p = iommu_arena_find_pages(NULL, arena, pg_count, align_mask); + if (p < 0) { + spin_unlock_irqrestore(&arena->lock, flags); + return -1; + } + + /* Success. Mark them all reserved (ie not zero and invalid) + for the iommu tlb that could load them from under us. + They will be filled in with valid bits by _bind() */ + for (i = 0; i < pg_count; ++i) + ptes[p+i] = IOMMU_RESERVED_PTE; + + arena->next_entry = p + pg_count; + spin_unlock_irqrestore(&arena->lock, flags); + + return p; +} + +int +iommu_release(struct pci_iommu_arena *arena, long pg_start, long pg_count) +{ + unsigned long *ptes; + long i; + + if (!arena) return -EINVAL; + + ptes = arena->ptes; + + /* Make sure they're all reserved first... */ + for(i = pg_start; i < pg_start + pg_count; i++) + if (ptes[i] != IOMMU_RESERVED_PTE) + return -EBUSY; + + iommu_arena_free(arena, pg_start, pg_count); + return 0; +} + +int +iommu_bind(struct pci_iommu_arena *arena, long pg_start, long pg_count, + struct page **pages) +{ + unsigned long flags; + unsigned long *ptes; + long i, j; + + if (!arena) return -EINVAL; + + spin_lock_irqsave(&arena->lock, flags); + + ptes = arena->ptes; + + for(j = pg_start; j < pg_start + pg_count; j++) { + if (ptes[j] != IOMMU_RESERVED_PTE) { + spin_unlock_irqrestore(&arena->lock, flags); + return -EBUSY; + } + } + + for(i = 0, j = pg_start; i < pg_count; i++, j++) + ptes[j] = mk_iommu_pte(page_to_phys(pages[i])); + + spin_unlock_irqrestore(&arena->lock, flags); + + return 0; +} + +int +iommu_unbind(struct pci_iommu_arena *arena, long pg_start, long pg_count) +{ + unsigned long *p; + long i; + + if (!arena) return -EINVAL; + + p = arena->ptes + pg_start; + for(i = 0; i < pg_count; i++) + p[i] = IOMMU_RESERVED_PTE; + + return 0; +} + +static int alpha_pci_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return dma_addr == 0; +} + +static int alpha_pci_set_mask(struct device *dev, u64 mask) +{ + if (!dev->dma_mask || + !pci_dma_supported(alpha_gendev_to_pci(dev), mask)) + return -EIO; + + *dev->dma_mask = mask; + return 0; +} + +struct dma_map_ops alpha_pci_ops = { + .alloc = alpha_pci_alloc_coherent, + .free = alpha_pci_free_coherent, + .map_page = alpha_pci_map_page, + .unmap_page = alpha_pci_unmap_page, + .map_sg = alpha_pci_map_sg, + .unmap_sg = alpha_pci_unmap_sg, + .mapping_error = alpha_pci_mapping_error, + .dma_supported = alpha_pci_supported, + .set_dma_mask = alpha_pci_set_mask, +}; + +struct dma_map_ops *dma_ops = &alpha_pci_ops; +EXPORT_SYMBOL(dma_ops); diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c new file mode 100644 index 00000000..0dae252f --- /dev/null +++ b/arch/alpha/kernel/perf_event.c @@ -0,0 +1,893 @@ +/* + * Hardware performance events for the Alpha. + * + * We implement HW counts on the EV67 and subsequent CPUs only. + * + * (C) 2010 Michael J. Cree + * + * Somewhat based on the Sparc code, and to a lesser extent the PowerPC and + * ARM code, which are copyright by their respective authors. + */ + +#include <linux/perf_event.h> +#include <linux/kprobes.h> +#include <linux/kernel.h> +#include <linux/kdebug.h> +#include <linux/mutex.h> +#include <linux/init.h> + +#include <asm/hwrpb.h> +#include <linux/atomic.h> +#include <asm/irq.h> +#include <asm/irq_regs.h> +#include <asm/pal.h> +#include <asm/wrperfmon.h> +#include <asm/hw_irq.h> + + +/* The maximum number of PMCs on any Alpha CPU whatsoever. */ +#define MAX_HWEVENTS 3 +#define PMC_NO_INDEX -1 + +/* For tracking PMCs and the hw events they monitor on each CPU. */ +struct cpu_hw_events { + int enabled; + /* Number of events scheduled; also number entries valid in arrays below. */ + int n_events; + /* Number events added since last hw_perf_disable(). */ + int n_added; + /* Events currently scheduled. */ + struct perf_event *event[MAX_HWEVENTS]; + /* Event type of each scheduled event. */ + unsigned long evtype[MAX_HWEVENTS]; + /* Current index of each scheduled event; if not yet determined + * contains PMC_NO_INDEX. + */ + int current_idx[MAX_HWEVENTS]; + /* The active PMCs' config for easy use with wrperfmon(). */ + unsigned long config; + /* The active counters' indices for easy use with wrperfmon(). */ + unsigned long idx_mask; +}; +DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); + + + +/* + * A structure to hold the description of the PMCs available on a particular + * type of Alpha CPU. + */ +struct alpha_pmu_t { + /* Mapping of the perf system hw event types to indigenous event types */ + const int *event_map; + /* The number of entries in the event_map */ + int max_events; + /* The number of PMCs on this Alpha */ + int num_pmcs; + /* + * All PMC counters reside in the IBOX register PCTR. This is the + * LSB of the counter. + */ + int pmc_count_shift[MAX_HWEVENTS]; + /* + * The mask that isolates the PMC bits when the LSB of the counter + * is shifted to bit 0. + */ + unsigned long pmc_count_mask[MAX_HWEVENTS]; + /* The maximum period the PMC can count. */ + unsigned long pmc_max_period[MAX_HWEVENTS]; + /* + * The maximum value that may be written to the counter due to + * hardware restrictions is pmc_max_period - pmc_left. + */ + long pmc_left[3]; + /* Subroutine for allocation of PMCs. Enforces constraints. */ + int (*check_constraints)(struct perf_event **, unsigned long *, int); +}; + +/* + * The Alpha CPU PMU description currently in operation. This is set during + * the boot process to the specific CPU of the machine. + */ +static const struct alpha_pmu_t *alpha_pmu; + + +#define HW_OP_UNSUPPORTED -1 + +/* + * The hardware description of the EV67, EV68, EV69, EV7 and EV79 PMUs + * follow. Since they are identical we refer to them collectively as the + * EV67 henceforth. + */ + +/* + * EV67 PMC event types + * + * There is no one-to-one mapping of the possible hw event types to the + * actual codes that are used to program the PMCs hence we introduce our + * own hw event type identifiers. + */ +enum ev67_pmc_event_type { + EV67_CYCLES = 1, + EV67_INSTRUCTIONS, + EV67_BCACHEMISS, + EV67_MBOXREPLAY, + EV67_LAST_ET +}; +#define EV67_NUM_EVENT_TYPES (EV67_LAST_ET-EV67_CYCLES) + + +/* Mapping of the hw event types to the perf tool interface */ +static const int ev67_perfmon_event_map[] = { + [PERF_COUNT_HW_CPU_CYCLES] = EV67_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = EV67_INSTRUCTIONS, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = EV67_BCACHEMISS, +}; + +struct ev67_mapping_t { + int config; + int idx; +}; + +/* + * The mapping used for one event only - these must be in same order as enum + * ev67_pmc_event_type definition. + */ +static const struct ev67_mapping_t ev67_mapping[] = { + {EV67_PCTR_INSTR_CYCLES, 1}, /* EV67_CYCLES, */ + {EV67_PCTR_INSTR_CYCLES, 0}, /* EV67_INSTRUCTIONS */ + {EV67_PCTR_INSTR_BCACHEMISS, 1}, /* EV67_BCACHEMISS */ + {EV67_PCTR_CYCLES_MBOX, 1} /* EV67_MBOXREPLAY */ +}; + + +/* + * Check that a group of events can be simultaneously scheduled on to the + * EV67 PMU. Also allocate counter indices and config. + */ +static int ev67_check_constraints(struct perf_event **event, + unsigned long *evtype, int n_ev) +{ + int idx0; + unsigned long config; + + idx0 = ev67_mapping[evtype[0]-1].idx; + config = ev67_mapping[evtype[0]-1].config; + if (n_ev == 1) + goto success; + + BUG_ON(n_ev != 2); + + if (evtype[0] == EV67_MBOXREPLAY || evtype[1] == EV67_MBOXREPLAY) { + /* MBOX replay traps must be on PMC 1 */ + idx0 = (evtype[0] == EV67_MBOXREPLAY) ? 1 : 0; + /* Only cycles can accompany MBOX replay traps */ + if (evtype[idx0] == EV67_CYCLES) { + config = EV67_PCTR_CYCLES_MBOX; + goto success; + } + } + + if (evtype[0] == EV67_BCACHEMISS || evtype[1] == EV67_BCACHEMISS) { + /* Bcache misses must be on PMC 1 */ + idx0 = (evtype[0] == EV67_BCACHEMISS) ? 1 : 0; + /* Only instructions can accompany Bcache misses */ + if (evtype[idx0] == EV67_INSTRUCTIONS) { + config = EV67_PCTR_INSTR_BCACHEMISS; + goto success; + } + } + + if (evtype[0] == EV67_INSTRUCTIONS || evtype[1] == EV67_INSTRUCTIONS) { + /* Instructions must be on PMC 0 */ + idx0 = (evtype[0] == EV67_INSTRUCTIONS) ? 0 : 1; + /* By this point only cycles can accompany instructions */ + if (evtype[idx0^1] == EV67_CYCLES) { + config = EV67_PCTR_INSTR_CYCLES; + goto success; + } + } + + /* Otherwise, darn it, there is a conflict. */ + return -1; + +success: + event[0]->hw.idx = idx0; + event[0]->hw.config_base = config; + if (n_ev == 2) { + event[1]->hw.idx = idx0 ^ 1; + event[1]->hw.config_base = config; + } + return 0; +} + + +static const struct alpha_pmu_t ev67_pmu = { + .event_map = ev67_perfmon_event_map, + .max_events = ARRAY_SIZE(ev67_perfmon_event_map), + .num_pmcs = 2, + .pmc_count_shift = {EV67_PCTR_0_COUNT_SHIFT, EV67_PCTR_1_COUNT_SHIFT, 0}, + .pmc_count_mask = {EV67_PCTR_0_COUNT_MASK, EV67_PCTR_1_COUNT_MASK, 0}, + .pmc_max_period = {(1UL<<20) - 1, (1UL<<20) - 1, 0}, + .pmc_left = {16, 4, 0}, + .check_constraints = ev67_check_constraints +}; + + + +/* + * Helper routines to ensure that we read/write only the correct PMC bits + * when calling the wrperfmon PALcall. + */ +static inline void alpha_write_pmc(int idx, unsigned long val) +{ + val &= alpha_pmu->pmc_count_mask[idx]; + val <<= alpha_pmu->pmc_count_shift[idx]; + val |= (1<<idx); + wrperfmon(PERFMON_CMD_WRITE, val); +} + +static inline unsigned long alpha_read_pmc(int idx) +{ + unsigned long val; + + val = wrperfmon(PERFMON_CMD_READ, 0); + val >>= alpha_pmu->pmc_count_shift[idx]; + val &= alpha_pmu->pmc_count_mask[idx]; + return val; +} + +/* Set a new period to sample over */ +static int alpha_perf_event_set_period(struct perf_event *event, + struct hw_perf_event *hwc, int idx) +{ + long left = local64_read(&hwc->period_left); + long period = hwc->sample_period; + int ret = 0; + + if (unlikely(left <= -period)) { + left = period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + if (unlikely(left <= 0)) { + left += period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + /* + * Hardware restrictions require that the counters must not be + * written with values that are too close to the maximum period. + */ + if (unlikely(left < alpha_pmu->pmc_left[idx])) + left = alpha_pmu->pmc_left[idx]; + + if (left > (long)alpha_pmu->pmc_max_period[idx]) + left = alpha_pmu->pmc_max_period[idx]; + + local64_set(&hwc->prev_count, (unsigned long)(-left)); + + alpha_write_pmc(idx, (unsigned long)(-left)); + + perf_event_update_userpage(event); + + return ret; +} + + +/* + * Calculates the count (the 'delta') since the last time the PMC was read. + * + * As the PMCs' full period can easily be exceeded within the perf system + * sampling period we cannot use any high order bits as a guard bit in the + * PMCs to detect overflow as is done by other architectures. The code here + * calculates the delta on the basis that there is no overflow when ovf is + * zero. The value passed via ovf by the interrupt handler corrects for + * overflow. + * + * This can be racey on rare occasions -- a call to this routine can occur + * with an overflowed counter just before the PMI service routine is called. + * The check for delta negative hopefully always rectifies this situation. + */ +static unsigned long alpha_perf_event_update(struct perf_event *event, + struct hw_perf_event *hwc, int idx, long ovf) +{ + long prev_raw_count, new_raw_count; + long delta; + +again: + prev_raw_count = local64_read(&hwc->prev_count); + new_raw_count = alpha_read_pmc(idx); + + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) + goto again; + + delta = (new_raw_count - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf; + + /* It is possible on very rare occasions that the PMC has overflowed + * but the interrupt is yet to come. Detect and fix this situation. + */ + if (unlikely(delta < 0)) { + delta += alpha_pmu->pmc_max_period[idx] + 1; + } + + local64_add(delta, &event->count); + local64_sub(delta, &hwc->period_left); + + return new_raw_count; +} + + +/* + * Collect all HW events into the array event[]. + */ +static int collect_events(struct perf_event *group, int max_count, + struct perf_event *event[], unsigned long *evtype, + int *current_idx) +{ + struct perf_event *pe; + int n = 0; + + if (!is_software_event(group)) { + if (n >= max_count) + return -1; + event[n] = group; + evtype[n] = group->hw.event_base; + current_idx[n++] = PMC_NO_INDEX; + } + list_for_each_entry(pe, &group->sibling_list, group_entry) { + if (!is_software_event(pe) && pe->state != PERF_EVENT_STATE_OFF) { + if (n >= max_count) + return -1; + event[n] = pe; + evtype[n] = pe->hw.event_base; + current_idx[n++] = PMC_NO_INDEX; + } + } + return n; +} + + + +/* + * Check that a group of events can be simultaneously scheduled on to the PMU. + */ +static int alpha_check_constraints(struct perf_event **events, + unsigned long *evtypes, int n_ev) +{ + + /* No HW events is possible from hw_perf_group_sched_in(). */ + if (n_ev == 0) + return 0; + + if (n_ev > alpha_pmu->num_pmcs) + return -1; + + return alpha_pmu->check_constraints(events, evtypes, n_ev); +} + + +/* + * If new events have been scheduled then update cpuc with the new + * configuration. This may involve shifting cycle counts from one PMC to + * another. + */ +static void maybe_change_configuration(struct cpu_hw_events *cpuc) +{ + int j; + + if (cpuc->n_added == 0) + return; + + /* Find counters that are moving to another PMC and update */ + for (j = 0; j < cpuc->n_events; j++) { + struct perf_event *pe = cpuc->event[j]; + + if (cpuc->current_idx[j] != PMC_NO_INDEX && + cpuc->current_idx[j] != pe->hw.idx) { + alpha_perf_event_update(pe, &pe->hw, cpuc->current_idx[j], 0); + cpuc->current_idx[j] = PMC_NO_INDEX; + } + } + + /* Assign to counters all unassigned events. */ + cpuc->idx_mask = 0; + for (j = 0; j < cpuc->n_events; j++) { + struct perf_event *pe = cpuc->event[j]; + struct hw_perf_event *hwc = &pe->hw; + int idx = hwc->idx; + + if (cpuc->current_idx[j] == PMC_NO_INDEX) { + alpha_perf_event_set_period(pe, hwc, idx); + cpuc->current_idx[j] = idx; + } + + if (!(hwc->state & PERF_HES_STOPPED)) + cpuc->idx_mask |= (1<<cpuc->current_idx[j]); + } + cpuc->config = cpuc->event[0]->hw.config_base; +} + + + +/* Schedule perf HW event on to PMU. + * - this function is called from outside this module via the pmu struct + * returned from perf event initialisation. + */ +static int alpha_pmu_add(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int n0; + int ret; + unsigned long irq_flags; + + /* + * The Sparc code has the IRQ disable first followed by the perf + * disable, however this can lead to an overflowed counter with the + * PMI disabled on rare occasions. The alpha_perf_event_update() + * routine should detect this situation by noting a negative delta, + * nevertheless we disable the PMCs first to enable a potential + * final PMI to occur before we disable interrupts. + */ + perf_pmu_disable(event->pmu); + local_irq_save(irq_flags); + + /* Default to error to be returned */ + ret = -EAGAIN; + + /* Insert event on to PMU and if successful modify ret to valid return */ + n0 = cpuc->n_events; + if (n0 < alpha_pmu->num_pmcs) { + cpuc->event[n0] = event; + cpuc->evtype[n0] = event->hw.event_base; + cpuc->current_idx[n0] = PMC_NO_INDEX; + + if (!alpha_check_constraints(cpuc->event, cpuc->evtype, n0+1)) { + cpuc->n_events++; + cpuc->n_added++; + ret = 0; + } + } + + hwc->state = PERF_HES_UPTODATE; + if (!(flags & PERF_EF_START)) + hwc->state |= PERF_HES_STOPPED; + + local_irq_restore(irq_flags); + perf_pmu_enable(event->pmu); + + return ret; +} + + + +/* Disable performance monitoring unit + * - this function is called from outside this module via the pmu struct + * returned from perf event initialisation. + */ +static void alpha_pmu_del(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + unsigned long irq_flags; + int j; + + perf_pmu_disable(event->pmu); + local_irq_save(irq_flags); + + for (j = 0; j < cpuc->n_events; j++) { + if (event == cpuc->event[j]) { + int idx = cpuc->current_idx[j]; + + /* Shift remaining entries down into the existing + * slot. + */ + while (++j < cpuc->n_events) { + cpuc->event[j - 1] = cpuc->event[j]; + cpuc->evtype[j - 1] = cpuc->evtype[j]; + cpuc->current_idx[j - 1] = + cpuc->current_idx[j]; + } + + /* Absorb the final count and turn off the event. */ + alpha_perf_event_update(event, hwc, idx, 0); + perf_event_update_userpage(event); + + cpuc->idx_mask &= ~(1UL<<idx); + cpuc->n_events--; + break; + } + } + + local_irq_restore(irq_flags); + perf_pmu_enable(event->pmu); +} + + +static void alpha_pmu_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + alpha_perf_event_update(event, hwc, hwc->idx, 0); +} + + +static void alpha_pmu_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (!(hwc->state & PERF_HES_STOPPED)) { + cpuc->idx_mask &= ~(1UL<<hwc->idx); + hwc->state |= PERF_HES_STOPPED; + } + + if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + alpha_perf_event_update(event, hwc, hwc->idx, 0); + hwc->state |= PERF_HES_UPTODATE; + } + + if (cpuc->enabled) + wrperfmon(PERFMON_CMD_DISABLE, (1UL<<hwc->idx)); +} + + +static void alpha_pmu_start(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) + return; + + if (flags & PERF_EF_RELOAD) { + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + alpha_perf_event_set_period(event, hwc, hwc->idx); + } + + hwc->state = 0; + + cpuc->idx_mask |= 1UL<<hwc->idx; + if (cpuc->enabled) + wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx)); +} + + +/* + * Check that CPU performance counters are supported. + * - currently support EV67 and later CPUs. + * - actually some later revisions of the EV6 have the same PMC model as the + * EV67 but we don't do suffiently deep CPU detection to detect them. + * Bad luck to the very few people who might have one, I guess. + */ +static int supported_cpu(void) +{ + struct percpu_struct *cpu; + unsigned long cputype; + + /* Get cpu type from HW */ + cpu = (struct percpu_struct *)((char *)hwrpb + hwrpb->processor_offset); + cputype = cpu->type & 0xffffffff; + /* Include all of EV67, EV68, EV7, EV79 and EV69 as supported. */ + return (cputype >= EV67_CPU) && (cputype <= EV69_CPU); +} + + + +static void hw_perf_event_destroy(struct perf_event *event) +{ + /* Nothing to be done! */ + return; +} + + + +static int __hw_perf_event_init(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + struct perf_event *evts[MAX_HWEVENTS]; + unsigned long evtypes[MAX_HWEVENTS]; + int idx_rubbish_bin[MAX_HWEVENTS]; + int ev; + int n; + + /* We only support a limited range of HARDWARE event types with one + * only programmable via a RAW event type. + */ + if (attr->type == PERF_TYPE_HARDWARE) { + if (attr->config >= alpha_pmu->max_events) + return -EINVAL; + ev = alpha_pmu->event_map[attr->config]; + } else if (attr->type == PERF_TYPE_HW_CACHE) { + return -EOPNOTSUPP; + } else if (attr->type == PERF_TYPE_RAW) { + ev = attr->config & 0xff; + } else { + return -EOPNOTSUPP; + } + + if (ev < 0) { + return ev; + } + + /* The EV67 does not support mode exclusion */ + if (attr->exclude_kernel || attr->exclude_user + || attr->exclude_hv || attr->exclude_idle) { + return -EPERM; + } + + /* + * We place the event type in event_base here and leave calculation + * of the codes to programme the PMU for alpha_pmu_enable() because + * it is only then we will know what HW events are actually + * scheduled on to the PMU. At that point the code to programme the + * PMU is put into config_base and the PMC to use is placed into + * idx. We initialise idx (below) to PMC_NO_INDEX to indicate that + * it is yet to be determined. + */ + hwc->event_base = ev; + + /* Collect events in a group together suitable for calling + * alpha_check_constraints() to verify that the group as a whole can + * be scheduled on to the PMU. + */ + n = 0; + if (event->group_leader != event) { + n = collect_events(event->group_leader, + alpha_pmu->num_pmcs - 1, + evts, evtypes, idx_rubbish_bin); + if (n < 0) + return -EINVAL; + } + evtypes[n] = hwc->event_base; + evts[n] = event; + + if (alpha_check_constraints(evts, evtypes, n + 1)) + return -EINVAL; + + /* Indicate that PMU config and idx are yet to be determined. */ + hwc->config_base = 0; + hwc->idx = PMC_NO_INDEX; + + event->destroy = hw_perf_event_destroy; + + /* + * Most architectures reserve the PMU for their use at this point. + * As there is no existing mechanism to arbitrate usage and there + * appears to be no other user of the Alpha PMU we just assume + * that we can just use it, hence a NO-OP here. + * + * Maybe an alpha_reserve_pmu() routine should be implemented but is + * anything else ever going to use it? + */ + + if (!hwc->sample_period) { + hwc->sample_period = alpha_pmu->pmc_max_period[0]; + hwc->last_period = hwc->sample_period; + local64_set(&hwc->period_left, hwc->sample_period); + } + + return 0; +} + +/* + * Main entry point to initialise a HW performance event. + */ +static int alpha_pmu_event_init(struct perf_event *event) +{ + int err; + + /* does not support taken branch sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + break; + + default: + return -ENOENT; + } + + if (!alpha_pmu) + return -ENODEV; + + /* Do the real initialisation work. */ + err = __hw_perf_event_init(event); + + return err; +} + +/* + * Main entry point - enable HW performance counters. + */ +static void alpha_pmu_enable(struct pmu *pmu) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (cpuc->enabled) + return; + + cpuc->enabled = 1; + barrier(); + + if (cpuc->n_events > 0) { + /* Update cpuc with information from any new scheduled events. */ + maybe_change_configuration(cpuc); + + /* Start counting the desired events. */ + wrperfmon(PERFMON_CMD_LOGGING_OPTIONS, EV67_PCTR_MODE_AGGREGATE); + wrperfmon(PERFMON_CMD_DESIRED_EVENTS, cpuc->config); + wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask); + } +} + + +/* + * Main entry point - disable HW performance counters. + */ + +static void alpha_pmu_disable(struct pmu *pmu) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (!cpuc->enabled) + return; + + cpuc->enabled = 0; + cpuc->n_added = 0; + + wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask); +} + +static struct pmu pmu = { + .pmu_enable = alpha_pmu_enable, + .pmu_disable = alpha_pmu_disable, + .event_init = alpha_pmu_event_init, + .add = alpha_pmu_add, + .del = alpha_pmu_del, + .start = alpha_pmu_start, + .stop = alpha_pmu_stop, + .read = alpha_pmu_read, +}; + + +/* + * Main entry point - don't know when this is called but it + * obviously dumps debug info. + */ +void perf_event_print_debug(void) +{ + unsigned long flags; + unsigned long pcr; + int pcr0, pcr1; + int cpu; + + if (!supported_cpu()) + return; + + local_irq_save(flags); + + cpu = smp_processor_id(); + + pcr = wrperfmon(PERFMON_CMD_READ, 0); + pcr0 = (pcr >> alpha_pmu->pmc_count_shift[0]) & alpha_pmu->pmc_count_mask[0]; + pcr1 = (pcr >> alpha_pmu->pmc_count_shift[1]) & alpha_pmu->pmc_count_mask[1]; + + pr_info("CPU#%d: PCTR0[%06x] PCTR1[%06x]\n", cpu, pcr0, pcr1); + + local_irq_restore(flags); +} + + +/* + * Performance Monitoring Interrupt Service Routine called when a PMC + * overflows. The PMC that overflowed is passed in la_ptr. + */ +static void alpha_perf_event_irq_handler(unsigned long la_ptr, + struct pt_regs *regs) +{ + struct cpu_hw_events *cpuc; + struct perf_sample_data data; + struct perf_event *event; + struct hw_perf_event *hwc; + int idx, j; + + __get_cpu_var(irq_pmi_count)++; + cpuc = &__get_cpu_var(cpu_hw_events); + + /* Completely counting through the PMC's period to trigger a new PMC + * overflow interrupt while in this interrupt routine is utterly + * disastrous! The EV6 and EV67 counters are sufficiently large to + * prevent this but to be really sure disable the PMCs. + */ + wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask); + + /* la_ptr is the counter that overflowed. */ + if (unlikely(la_ptr >= alpha_pmu->num_pmcs)) { + /* This should never occur! */ + irq_err_count++; + pr_warning("PMI: silly index %ld\n", la_ptr); + wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask); + return; + } + + idx = la_ptr; + + perf_sample_data_init(&data, 0); + for (j = 0; j < cpuc->n_events; j++) { + if (cpuc->current_idx[j] == idx) + break; + } + + if (unlikely(j == cpuc->n_events)) { + /* This can occur if the event is disabled right on a PMC overflow. */ + wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask); + return; + } + + event = cpuc->event[j]; + + if (unlikely(!event)) { + /* This should never occur! */ + irq_err_count++; + pr_warning("PMI: No event at index %d!\n", idx); + wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask); + return; + } + + hwc = &event->hw; + alpha_perf_event_update(event, hwc, idx, alpha_pmu->pmc_max_period[idx]+1); + data.period = event->hw.last_period; + + if (alpha_perf_event_set_period(event, hwc, idx)) { + if (perf_event_overflow(event, &data, regs)) { + /* Interrupts coming too quickly; "throttle" the + * counter, i.e., disable it for a little while. + */ + alpha_pmu_stop(event, 0); + } + } + wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask); + + return; +} + + + +/* + * Init call to initialise performance events at kernel startup. + */ +int __init init_hw_perf_events(void) +{ + pr_info("Performance events: "); + + if (!supported_cpu()) { + pr_cont("No support for your CPU.\n"); + return 0; + } + + pr_cont("Supported CPU type!\n"); + + /* Override performance counter IRQ vector */ + + perf_irq = alpha_perf_event_irq_handler; + + /* And set up PMU specification */ + alpha_pmu = &ev67_pmu; + + perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); + + return 0; +} +early_initcall(init_hw_perf_events); diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c new file mode 100644 index 00000000..153d3fce --- /dev/null +++ b/arch/alpha/kernel/process.c @@ -0,0 +1,457 @@ +/* + * linux/arch/alpha/kernel/process.c + * + * Copyright (C) 1995 Linus Torvalds + */ + +/* + * This file handles the architecture-dependent parts of process handling. + */ + +#include <linux/errno.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/ptrace.h> +#include <linux/user.h> +#include <linux/time.h> +#include <linux/major.h> +#include <linux/stat.h> +#include <linux/vt.h> +#include <linux/mman.h> +#include <linux/elfcore.h> +#include <linux/reboot.h> +#include <linux/tty.h> +#include <linux/console.h> +#include <linux/slab.h> + +#include <asm/reg.h> +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/hwrpb.h> +#include <asm/fpu.h> + +#include "proto.h" +#include "pci_impl.h" + +/* + * Power off function, if any + */ +void (*pm_power_off)(void) = machine_power_off; +EXPORT_SYMBOL(pm_power_off); + +void +cpu_idle(void) +{ + set_thread_flag(TIF_POLLING_NRFLAG); + + while (1) { + /* FIXME -- EV6 and LCA45 know how to power down + the CPU. */ + + while (!need_resched()) + cpu_relax(); + schedule(); + } +} + + +struct halt_info { + int mode; + char *restart_cmd; +}; + +static void +common_shutdown_1(void *generic_ptr) +{ + struct halt_info *how = (struct halt_info *)generic_ptr; + struct percpu_struct *cpup; + unsigned long *pflags, flags; + int cpuid = smp_processor_id(); + + /* No point in taking interrupts anymore. */ + local_irq_disable(); + + cpup = (struct percpu_struct *) + ((unsigned long)hwrpb + hwrpb->processor_offset + + hwrpb->processor_size * cpuid); + pflags = &cpup->flags; + flags = *pflags; + + /* Clear reason to "default"; clear "bootstrap in progress". */ + flags &= ~0x00ff0001UL; + +#ifdef CONFIG_SMP + /* Secondaries halt here. */ + if (cpuid != boot_cpuid) { + flags |= 0x00040000UL; /* "remain halted" */ + *pflags = flags; + set_cpu_present(cpuid, false); + set_cpu_possible(cpuid, false); + halt(); + } +#endif + + if (how->mode == LINUX_REBOOT_CMD_RESTART) { + if (!how->restart_cmd) { + flags |= 0x00020000UL; /* "cold bootstrap" */ + } else { + /* For SRM, we could probably set environment + variables to get this to work. We'd have to + delay this until after srm_paging_stop unless + we ever got srm_fixup working. + + At the moment, SRM will use the last boot device, + but the file and flags will be the defaults, when + doing a "warm" bootstrap. */ + flags |= 0x00030000UL; /* "warm bootstrap" */ + } + } else { + flags |= 0x00040000UL; /* "remain halted" */ + } + *pflags = flags; + +#ifdef CONFIG_SMP + /* Wait for the secondaries to halt. */ + set_cpu_present(boot_cpuid, false); + set_cpu_possible(boot_cpuid, false); + while (cpumask_weight(cpu_present_mask)) + barrier(); +#endif + + /* If booted from SRM, reset some of the original environment. */ + if (alpha_using_srm) { +#ifdef CONFIG_DUMMY_CONSOLE + /* If we've gotten here after SysRq-b, leave interrupt + context before taking over the console. */ + if (in_interrupt()) + irq_exit(); + /* This has the effect of resetting the VGA video origin. */ + take_over_console(&dummy_con, 0, MAX_NR_CONSOLES-1, 1); +#endif + pci_restore_srm_config(); + set_hae(srm_hae); + } + + if (alpha_mv.kill_arch) + alpha_mv.kill_arch(how->mode); + + if (! alpha_using_srm && how->mode != LINUX_REBOOT_CMD_RESTART) { + /* Unfortunately, since MILO doesn't currently understand + the hwrpb bits above, we can't reliably halt the + processor and keep it halted. So just loop. */ + return; + } + + if (alpha_using_srm) + srm_paging_stop(); + + halt(); +} + +static void +common_shutdown(int mode, char *restart_cmd) +{ + struct halt_info args; + args.mode = mode; + args.restart_cmd = restart_cmd; + on_each_cpu(common_shutdown_1, &args, 0); +} + +void +machine_restart(char *restart_cmd) +{ + common_shutdown(LINUX_REBOOT_CMD_RESTART, restart_cmd); +} + + +void +machine_halt(void) +{ + common_shutdown(LINUX_REBOOT_CMD_HALT, NULL); +} + + +void +machine_power_off(void) +{ + common_shutdown(LINUX_REBOOT_CMD_POWER_OFF, NULL); +} + + +/* Used by sysrq-p, among others. I don't believe r9-r15 are ever + saved in the context it's used. */ + +void +show_regs(struct pt_regs *regs) +{ + dik_show_regs(regs, NULL); +} + +/* + * Re-start a thread when doing execve() + */ +void +start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp) +{ + regs->pc = pc; + regs->ps = 8; + wrusp(sp); +} +EXPORT_SYMBOL(start_thread); + +/* + * Free current thread data structures etc.. + */ +void +exit_thread(void) +{ +} + +void +flush_thread(void) +{ + /* Arrange for each exec'ed process to start off with a clean slate + with respect to the FPU. This is all exceptions disabled. */ + current_thread_info()->ieee_state = 0; + wrfpcr(FPCR_DYN_NORMAL | ieee_swcr_to_fpcr(0)); + + /* Clean slate for TLS. */ + current_thread_info()->pcb.unique = 0; +} + +void +release_thread(struct task_struct *dead_task) +{ +} + +/* + * "alpha_clone()".. By the time we get here, the + * non-volatile registers have also been saved on the + * stack. We do some ugly pointer stuff here.. (see + * also copy_thread) + * + * Notice that "fork()" is implemented in terms of clone, + * with parameters (SIGCHLD, 0). + */ +int +alpha_clone(unsigned long clone_flags, unsigned long usp, + int __user *parent_tid, int __user *child_tid, + unsigned long tls_value, struct pt_regs *regs) +{ + if (!usp) + usp = rdusp(); + + return do_fork(clone_flags, usp, regs, 0, parent_tid, child_tid); +} + +int +alpha_vfork(struct pt_regs *regs) +{ + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, rdusp(), + regs, 0, NULL, NULL); +} + +/* + * Copy an alpha thread.. + * + * Note the "stack_offset" stuff: when returning to kernel mode, we need + * to have some extra stack-space for the kernel stack that still exists + * after the "ret_from_fork". When returning to user mode, we only want + * the space needed by the syscall stack frame (ie "struct pt_regs"). + * Use the passed "regs" pointer to determine how much space we need + * for a kernel fork(). + */ + +int +copy_thread(unsigned long clone_flags, unsigned long usp, + unsigned long unused, + struct task_struct * p, struct pt_regs * regs) +{ + extern void ret_from_fork(void); + + struct thread_info *childti = task_thread_info(p); + struct pt_regs * childregs; + struct switch_stack * childstack, *stack; + unsigned long stack_offset, settls; + + stack_offset = PAGE_SIZE - sizeof(struct pt_regs); + if (!(regs->ps & 8)) + stack_offset = (PAGE_SIZE-1) & (unsigned long) regs; + childregs = (struct pt_regs *) + (stack_offset + PAGE_SIZE + task_stack_page(p)); + + *childregs = *regs; + settls = regs->r20; + childregs->r0 = 0; + childregs->r19 = 0; + childregs->r20 = 1; /* OSF/1 has some strange fork() semantics. */ + regs->r20 = 0; + stack = ((struct switch_stack *) regs) - 1; + childstack = ((struct switch_stack *) childregs) - 1; + *childstack = *stack; + childstack->r26 = (unsigned long) ret_from_fork; + childti->pcb.usp = usp; + childti->pcb.ksp = (unsigned long) childstack; + childti->pcb.flags = 1; /* set FEN, clear everything else */ + + /* Set a new TLS for the child thread? Peek back into the + syscall arguments that we saved on syscall entry. Oops, + except we'd have clobbered it with the parent/child set + of r20. Read the saved copy. */ + /* Note: if CLONE_SETTLS is not set, then we must inherit the + value from the parent, which will have been set by the block + copy in dup_task_struct. This is non-intuitive, but is + required for proper operation in the case of a threaded + application calling fork. */ + if (clone_flags & CLONE_SETTLS) + childti->pcb.unique = settls; + + return 0; +} + +/* + * Fill in the user structure for a ELF core dump. + */ +void +dump_elf_thread(elf_greg_t *dest, struct pt_regs *pt, struct thread_info *ti) +{ + /* switch stack follows right below pt_regs: */ + struct switch_stack * sw = ((struct switch_stack *) pt) - 1; + + dest[ 0] = pt->r0; + dest[ 1] = pt->r1; + dest[ 2] = pt->r2; + dest[ 3] = pt->r3; + dest[ 4] = pt->r4; + dest[ 5] = pt->r5; + dest[ 6] = pt->r6; + dest[ 7] = pt->r7; + dest[ 8] = pt->r8; + dest[ 9] = sw->r9; + dest[10] = sw->r10; + dest[11] = sw->r11; + dest[12] = sw->r12; + dest[13] = sw->r13; + dest[14] = sw->r14; + dest[15] = sw->r15; + dest[16] = pt->r16; + dest[17] = pt->r17; + dest[18] = pt->r18; + dest[19] = pt->r19; + dest[20] = pt->r20; + dest[21] = pt->r21; + dest[22] = pt->r22; + dest[23] = pt->r23; + dest[24] = pt->r24; + dest[25] = pt->r25; + dest[26] = pt->r26; + dest[27] = pt->r27; + dest[28] = pt->r28; + dest[29] = pt->gp; + dest[30] = ti == current_thread_info() ? rdusp() : ti->pcb.usp; + dest[31] = pt->pc; + + /* Once upon a time this was the PS value. Which is stupid + since that is always 8 for usermode. Usurped for the more + useful value of the thread's UNIQUE field. */ + dest[32] = ti->pcb.unique; +} +EXPORT_SYMBOL(dump_elf_thread); + +int +dump_elf_task(elf_greg_t *dest, struct task_struct *task) +{ + dump_elf_thread(dest, task_pt_regs(task), task_thread_info(task)); + return 1; +} +EXPORT_SYMBOL(dump_elf_task); + +int +dump_elf_task_fp(elf_fpreg_t *dest, struct task_struct *task) +{ + struct switch_stack *sw = (struct switch_stack *)task_pt_regs(task) - 1; + memcpy(dest, sw->fp, 32 * 8); + return 1; +} +EXPORT_SYMBOL(dump_elf_task_fp); + +/* + * sys_execve() executes a new program. + */ +asmlinkage int +do_sys_execve(const char __user *ufilename, + const char __user *const __user *argv, + const char __user *const __user *envp, struct pt_regs *regs) +{ + int error; + char *filename; + + filename = getname(ufilename); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + goto out; + error = do_execve(filename, argv, envp, regs); + putname(filename); +out: + return error; +} + +/* + * Return saved PC of a blocked thread. This assumes the frame + * pointer is the 6th saved long on the kernel stack and that the + * saved return address is the first long in the frame. This all + * holds provided the thread blocked through a call to schedule() ($15 + * is the frame pointer in schedule() and $15 is saved at offset 48 by + * entry.S:do_switch_stack). + * + * Under heavy swap load I've seen this lose in an ugly way. So do + * some extra sanity checking on the ranges we expect these pointers + * to be in so that we can fail gracefully. This is just for ps after + * all. -- r~ + */ + +unsigned long +thread_saved_pc(struct task_struct *t) +{ + unsigned long base = (unsigned long)task_stack_page(t); + unsigned long fp, sp = task_thread_info(t)->pcb.ksp; + + if (sp > base && sp+6*8 < base + 16*1024) { + fp = ((unsigned long*)sp)[6]; + if (fp > sp && fp < base + 16*1024) + return *(unsigned long *)fp; + } + + return 0; +} + +unsigned long +get_wchan(struct task_struct *p) +{ + unsigned long schedule_frame; + unsigned long pc; + if (!p || p == current || p->state == TASK_RUNNING) + return 0; + /* + * This one depends on the frame size of schedule(). Do a + * "disass schedule" in gdb to find the frame size. Also, the + * code assumes that sleep_on() follows immediately after + * interruptible_sleep_on() and that add_timer() follows + * immediately after interruptible_sleep(). Ugly, isn't it? + * Maybe adding a wchan field to task_struct would be better, + * after all... + */ + + pc = thread_saved_pc(p); + if (in_sched_functions(pc)) { + schedule_frame = ((unsigned long *)task_thread_info(p)->pcb.ksp)[6]; + return ((unsigned long *)schedule_frame)[12]; + } + return pc; +} diff --git a/arch/alpha/kernel/proto.h b/arch/alpha/kernel/proto.h new file mode 100644 index 00000000..d3e52d3f --- /dev/null +++ b/arch/alpha/kernel/proto.h @@ -0,0 +1,224 @@ +#include <linux/interrupt.h> +#include <linux/io.h> + +#include <asm/pgtable.h> + +/* Prototypes of functions used across modules here in this directory. */ + +#define vucp volatile unsigned char * +#define vusp volatile unsigned short * +#define vip volatile int * +#define vuip volatile unsigned int * +#define vulp volatile unsigned long * + +struct pt_regs; +struct task_struct; +struct pci_dev; +struct pci_controller; + +/* core_apecs.c */ +extern struct pci_ops apecs_pci_ops; +extern void apecs_init_arch(void); +extern void apecs_pci_clr_err(void); +extern void apecs_machine_check(unsigned long vector, unsigned long la_ptr); +extern void apecs_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t); + +/* core_cia.c */ +extern struct pci_ops cia_pci_ops; +extern void cia_init_pci(void); +extern void cia_init_arch(void); +extern void pyxis_init_arch(void); +extern void cia_kill_arch(int); +extern void cia_machine_check(unsigned long vector, unsigned long la_ptr); +extern void cia_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t); + +/* core_irongate.c */ +extern struct pci_ops irongate_pci_ops; +extern int irongate_pci_clr_err(void); +extern void irongate_init_arch(void); +#define irongate_pci_tbi ((void *)0) + +/* core_lca.c */ +extern struct pci_ops lca_pci_ops; +extern void lca_init_arch(void); +extern void lca_machine_check(unsigned long vector, unsigned long la_ptr); +extern void lca_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t); + +/* core_marvel.c */ +extern struct pci_ops marvel_pci_ops; +extern void marvel_init_arch(void); +extern void marvel_kill_arch(int); +extern void marvel_machine_check(unsigned long, unsigned long); +extern void marvel_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t); +extern int marvel_pa_to_nid(unsigned long); +extern int marvel_cpuid_to_nid(int); +extern unsigned long marvel_node_mem_start(int); +extern unsigned long marvel_node_mem_size(int); +extern struct _alpha_agp_info *marvel_agp_info(void); +struct io7 *marvel_find_io7(int pe); +struct io7 *marvel_next_io7(struct io7 *prev); +void io7_clear_errors(struct io7 *io7); + +/* core_mcpcia.c */ +extern struct pci_ops mcpcia_pci_ops; +extern void mcpcia_init_arch(void); +extern void mcpcia_init_hoses(void); +extern void mcpcia_machine_check(unsigned long vector, unsigned long la_ptr); +extern void mcpcia_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t); + +/* core_polaris.c */ +extern struct pci_ops polaris_pci_ops; +extern int polaris_read_config_dword(struct pci_dev *, int, u32 *); +extern int polaris_write_config_dword(struct pci_dev *, int, u32); +extern void polaris_init_arch(void); +extern void polaris_machine_check(unsigned long vector, unsigned long la_ptr); +#define polaris_pci_tbi ((void *)0) + +/* core_t2.c */ +extern struct pci_ops t2_pci_ops; +extern void t2_init_arch(void); +extern void t2_kill_arch(int); +extern void t2_machine_check(unsigned long vector, unsigned long la_ptr); +extern void t2_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t); + +/* core_titan.c */ +extern struct pci_ops titan_pci_ops; +extern void titan_init_arch(void); +extern void titan_kill_arch(int); +extern void titan_machine_check(unsigned long, unsigned long); +extern void titan_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t); +extern struct _alpha_agp_info *titan_agp_info(void); + +/* core_tsunami.c */ +extern struct pci_ops tsunami_pci_ops; +extern void tsunami_init_arch(void); +extern void tsunami_kill_arch(int); +extern void tsunami_machine_check(unsigned long vector, unsigned long la_ptr); +extern void tsunami_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t); + +/* core_wildfire.c */ +extern struct pci_ops wildfire_pci_ops; +extern void wildfire_init_arch(void); +extern void wildfire_kill_arch(int); +extern void wildfire_machine_check(unsigned long vector, unsigned long la_ptr); +extern void wildfire_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t); +extern int wildfire_pa_to_nid(unsigned long); +extern int wildfire_cpuid_to_nid(int); +extern unsigned long wildfire_node_mem_start(int); +extern unsigned long wildfire_node_mem_size(int); + +/* console.c */ +#ifdef CONFIG_VGA_HOSE +extern void find_console_vga_hose(void); +extern void locate_and_init_vga(void *(*)(void *, void *)); +#else +static inline void find_console_vga_hose(void) { } +static inline void locate_and_init_vga(void *(*sel_func)(void *, void *)) { } +#endif + +/* setup.c */ +extern unsigned long srm_hae; +extern int boot_cpuid; +#ifdef CONFIG_VERBOSE_MCHECK +extern unsigned long alpha_verbose_mcheck; +#endif + +/* srmcons.c */ +#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_SRM) +extern void register_srm_console(void); +extern void unregister_srm_console(void); +#else +#define register_srm_console() +#define unregister_srm_console() +#endif + +/* smp.c */ +extern void setup_smp(void); +extern void handle_ipi(struct pt_regs *); +extern void smp_percpu_timer_interrupt(struct pt_regs *); + +/* bios32.c */ +/* extern void reset_for_srm(void); */ + +/* time.c */ +extern irqreturn_t timer_interrupt(int irq, void *dev); +extern void common_init_rtc(void); +extern unsigned long est_cycle_freq; +extern unsigned int common_get_rtc_time(struct rtc_time *time); +extern int common_set_rtc_time(struct rtc_time *time); + +/* smc37c93x.c */ +extern void SMC93x_Init(void); + +/* smc37c669.c */ +extern void SMC669_Init(int); + +/* es1888.c */ +extern void es1888_init(void); + +/* ../lib/fpreg.c */ +extern void alpha_write_fp_reg (unsigned long reg, unsigned long val); +extern unsigned long alpha_read_fp_reg (unsigned long reg); + +/* head.S */ +extern void wrmces(unsigned long mces); +extern void cserve_ena(unsigned long); +extern void cserve_dis(unsigned long); +extern void __smp_callin(unsigned long); + +/* entry.S */ +extern void entArith(void); +extern void entIF(void); +extern void entInt(void); +extern void entMM(void); +extern void entSys(void); +extern void entUna(void); +extern void entDbg(void); + +/* ptrace.c */ +extern int ptrace_set_bpt (struct task_struct *child); +extern int ptrace_cancel_bpt (struct task_struct *child); + +/* traps.c */ +extern void dik_show_regs(struct pt_regs *regs, unsigned long *r9_15); +extern void die_if_kernel(char *, struct pt_regs *, long, unsigned long *); + +/* sys_titan.c */ +extern void titan_dispatch_irqs(u64); + +/* ../mm/init.c */ +extern void switch_to_system_map(void); +extern void srm_paging_stop(void); + +static inline int +__alpha_remap_area_pages(unsigned long address, unsigned long phys_addr, + unsigned long size, unsigned long flags) +{ + pgprot_t prot; + + prot = __pgprot(_PAGE_VALID | _PAGE_ASM | _PAGE_KRE + | _PAGE_KWE | flags); + return ioremap_page_range(address, address + size, phys_addr, prot); +} + +/* irq.c */ + +#ifdef CONFIG_SMP +#define mcheck_expected(cpu) (cpu_data[cpu].mcheck_expected) +#define mcheck_taken(cpu) (cpu_data[cpu].mcheck_taken) +#define mcheck_extra(cpu) (cpu_data[cpu].mcheck_extra) +#else +extern struct mcheck_info +{ + unsigned char expected __attribute__((aligned(8))); + unsigned char taken; + unsigned char extra; +} __mcheck_info; + +#define mcheck_expected(cpu) (*((void)(cpu), &__mcheck_info.expected)) +#define mcheck_taken(cpu) (*((void)(cpu), &__mcheck_info.taken)) +#define mcheck_extra(cpu) (*((void)(cpu), &__mcheck_info.extra)) +#endif + +extern void process_mcheck_info(unsigned long vector, unsigned long la_ptr, + const char *machine, int expected); diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c new file mode 100644 index 00000000..54616f49 --- /dev/null +++ b/arch/alpha/kernel/ptrace.c @@ -0,0 +1,336 @@ +/* ptrace.c */ +/* By Ross Biro 1/23/92 */ +/* edited by Linus Torvalds */ +/* mangled further by Bob Manson (manson@santafe.edu) */ +/* more mutilation by David Mosberger (davidm@azstarnet.com) */ + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/errno.h> +#include <linux/ptrace.h> +#include <linux/user.h> +#include <linux/security.h> +#include <linux/signal.h> + +#include <asm/uaccess.h> +#include <asm/pgtable.h> +#include <asm/fpu.h> + +#include "proto.h" + +#define DEBUG DBG_MEM +#undef DEBUG + +#ifdef DEBUG +enum { + DBG_MEM = (1<<0), + DBG_BPT = (1<<1), + DBG_MEM_ALL = (1<<2) +}; +#define DBG(fac,args) {if ((fac) & DEBUG) printk args;} +#else +#define DBG(fac,args) +#endif + +#define BREAKINST 0x00000080 /* call_pal bpt */ + +/* + * does not yet catch signals sent when the child dies. + * in exit.c or in signal.c. + */ + +/* + * Processes always block with the following stack-layout: + * + * +================================+ <---- task + 2*PAGE_SIZE + * | PALcode saved frame (ps, pc, | ^ + * | gp, a0, a1, a2) | | + * +================================+ | struct pt_regs + * | | | + * | frame generated by SAVE_ALL | | + * | | v + * +================================+ + * | | ^ + * | frame saved by do_switch_stack | | struct switch_stack + * | | v + * +================================+ + */ + +/* + * The following table maps a register index into the stack offset at + * which the register is saved. Register indices are 0-31 for integer + * regs, 32-63 for fp regs, and 64 for the pc. Notice that sp and + * zero have no stack-slot and need to be treated specially (see + * get_reg/put_reg below). + */ +enum { + REG_R0 = 0, REG_F0 = 32, REG_FPCR = 63, REG_PC = 64 +}; + +#define PT_REG(reg) \ + (PAGE_SIZE*2 - sizeof(struct pt_regs) + offsetof(struct pt_regs, reg)) + +#define SW_REG(reg) \ + (PAGE_SIZE*2 - sizeof(struct pt_regs) - sizeof(struct switch_stack) \ + + offsetof(struct switch_stack, reg)) + +static int regoff[] = { + PT_REG( r0), PT_REG( r1), PT_REG( r2), PT_REG( r3), + PT_REG( r4), PT_REG( r5), PT_REG( r6), PT_REG( r7), + PT_REG( r8), SW_REG( r9), SW_REG( r10), SW_REG( r11), + SW_REG( r12), SW_REG( r13), SW_REG( r14), SW_REG( r15), + PT_REG( r16), PT_REG( r17), PT_REG( r18), PT_REG( r19), + PT_REG( r20), PT_REG( r21), PT_REG( r22), PT_REG( r23), + PT_REG( r24), PT_REG( r25), PT_REG( r26), PT_REG( r27), + PT_REG( r28), PT_REG( gp), -1, -1, + SW_REG(fp[ 0]), SW_REG(fp[ 1]), SW_REG(fp[ 2]), SW_REG(fp[ 3]), + SW_REG(fp[ 4]), SW_REG(fp[ 5]), SW_REG(fp[ 6]), SW_REG(fp[ 7]), + SW_REG(fp[ 8]), SW_REG(fp[ 9]), SW_REG(fp[10]), SW_REG(fp[11]), + SW_REG(fp[12]), SW_REG(fp[13]), SW_REG(fp[14]), SW_REG(fp[15]), + SW_REG(fp[16]), SW_REG(fp[17]), SW_REG(fp[18]), SW_REG(fp[19]), + SW_REG(fp[20]), SW_REG(fp[21]), SW_REG(fp[22]), SW_REG(fp[23]), + SW_REG(fp[24]), SW_REG(fp[25]), SW_REG(fp[26]), SW_REG(fp[27]), + SW_REG(fp[28]), SW_REG(fp[29]), SW_REG(fp[30]), SW_REG(fp[31]), + PT_REG( pc) +}; + +static unsigned long zero; + +/* + * Get address of register REGNO in task TASK. + */ +static unsigned long * +get_reg_addr(struct task_struct * task, unsigned long regno) +{ + unsigned long *addr; + + if (regno == 30) { + addr = &task_thread_info(task)->pcb.usp; + } else if (regno == 65) { + addr = &task_thread_info(task)->pcb.unique; + } else if (regno == 31 || regno > 65) { + zero = 0; + addr = &zero; + } else { + addr = task_stack_page(task) + regoff[regno]; + } + return addr; +} + +/* + * Get contents of register REGNO in task TASK. + */ +static unsigned long +get_reg(struct task_struct * task, unsigned long regno) +{ + /* Special hack for fpcr -- combine hardware and software bits. */ + if (regno == 63) { + unsigned long fpcr = *get_reg_addr(task, regno); + unsigned long swcr + = task_thread_info(task)->ieee_state & IEEE_SW_MASK; + swcr = swcr_update_status(swcr, fpcr); + return fpcr | swcr; + } + return *get_reg_addr(task, regno); +} + +/* + * Write contents of register REGNO in task TASK. + */ +static int +put_reg(struct task_struct *task, unsigned long regno, unsigned long data) +{ + if (regno == 63) { + task_thread_info(task)->ieee_state + = ((task_thread_info(task)->ieee_state & ~IEEE_SW_MASK) + | (data & IEEE_SW_MASK)); + data = (data & FPCR_DYN_MASK) | ieee_swcr_to_fpcr(data); + } + *get_reg_addr(task, regno) = data; + return 0; +} + +static inline int +read_int(struct task_struct *task, unsigned long addr, int * data) +{ + int copied = access_process_vm(task, addr, data, sizeof(int), 0); + return (copied == sizeof(int)) ? 0 : -EIO; +} + +static inline int +write_int(struct task_struct *task, unsigned long addr, int data) +{ + int copied = access_process_vm(task, addr, &data, sizeof(int), 1); + return (copied == sizeof(int)) ? 0 : -EIO; +} + +/* + * Set breakpoint. + */ +int +ptrace_set_bpt(struct task_struct * child) +{ + int displ, i, res, reg_b, nsaved = 0; + unsigned int insn, op_code; + unsigned long pc; + + pc = get_reg(child, REG_PC); + res = read_int(child, pc, (int *) &insn); + if (res < 0) + return res; + + op_code = insn >> 26; + if (op_code >= 0x30) { + /* + * It's a branch: instead of trying to figure out + * whether the branch will be taken or not, we'll put + * a breakpoint at either location. This is simpler, + * more reliable, and probably not a whole lot slower + * than the alternative approach of emulating the + * branch (emulation can be tricky for fp branches). + */ + displ = ((s32)(insn << 11)) >> 9; + task_thread_info(child)->bpt_addr[nsaved++] = pc + 4; + if (displ) /* guard against unoptimized code */ + task_thread_info(child)->bpt_addr[nsaved++] + = pc + 4 + displ; + DBG(DBG_BPT, ("execing branch\n")); + } else if (op_code == 0x1a) { + reg_b = (insn >> 16) & 0x1f; + task_thread_info(child)->bpt_addr[nsaved++] = get_reg(child, reg_b); + DBG(DBG_BPT, ("execing jump\n")); + } else { + task_thread_info(child)->bpt_addr[nsaved++] = pc + 4; + DBG(DBG_BPT, ("execing normal insn\n")); + } + + /* install breakpoints: */ + for (i = 0; i < nsaved; ++i) { + res = read_int(child, task_thread_info(child)->bpt_addr[i], + (int *) &insn); + if (res < 0) + return res; + task_thread_info(child)->bpt_insn[i] = insn; + DBG(DBG_BPT, (" -> next_pc=%lx\n", + task_thread_info(child)->bpt_addr[i])); + res = write_int(child, task_thread_info(child)->bpt_addr[i], + BREAKINST); + if (res < 0) + return res; + } + task_thread_info(child)->bpt_nsaved = nsaved; + return 0; +} + +/* + * Ensure no single-step breakpoint is pending. Returns non-zero + * value if child was being single-stepped. + */ +int +ptrace_cancel_bpt(struct task_struct * child) +{ + int i, nsaved = task_thread_info(child)->bpt_nsaved; + + task_thread_info(child)->bpt_nsaved = 0; + + if (nsaved > 2) { + printk("ptrace_cancel_bpt: bogus nsaved: %d!\n", nsaved); + nsaved = 2; + } + + for (i = 0; i < nsaved; ++i) { + write_int(child, task_thread_info(child)->bpt_addr[i], + task_thread_info(child)->bpt_insn[i]); + } + return (nsaved != 0); +} + +void user_enable_single_step(struct task_struct *child) +{ + /* Mark single stepping. */ + task_thread_info(child)->bpt_nsaved = -1; +} + +void user_disable_single_step(struct task_struct *child) +{ + ptrace_cancel_bpt(child); +} + +/* + * Called by kernel/ptrace.c when detaching.. + * + * Make sure the single step bit is not set. + */ +void ptrace_disable(struct task_struct *child) +{ + user_disable_single_step(child); +} + +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) +{ + unsigned long tmp; + size_t copied; + long ret; + + switch (request) { + /* When I and D space are separate, these will need to be fixed. */ + case PTRACE_PEEKTEXT: /* read word at location addr. */ + case PTRACE_PEEKDATA: + copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); + ret = -EIO; + if (copied != sizeof(tmp)) + break; + + force_successful_syscall_return(); + ret = tmp; + break; + + /* Read register number ADDR. */ + case PTRACE_PEEKUSR: + force_successful_syscall_return(); + ret = get_reg(child, addr); + DBG(DBG_MEM, ("peek $%lu->%#lx\n", addr, ret)); + break; + + /* When I and D space are separate, this will have to be fixed. */ + case PTRACE_POKETEXT: /* write the word at location addr. */ + case PTRACE_POKEDATA: + ret = generic_ptrace_pokedata(child, addr, data); + break; + + case PTRACE_POKEUSR: /* write the specified register */ + DBG(DBG_MEM, ("poke $%lu<-%#lx\n", addr, data)); + ret = put_reg(child, addr, data); + break; + default: + ret = ptrace_request(child, request, addr, data); + break; + } + return ret; +} + +asmlinkage void +syscall_trace(void) +{ + if (!test_thread_flag(TIF_SYSCALL_TRACE)) + return; + if (!(current->ptrace & PT_PTRACED)) + return; + /* The 0x80 provides a way for the tracing parent to distinguish + between a syscall stop and SIGTRAP delivery */ + ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) + ? 0x80 : 0)); + + /* + * This isn't the same as continuing with a signal, but it will do + * for normal use. strace only continues with a signal if the + * stopping signal is not SIGTRAP. -brl + */ + if (current->exit_code) { + send_sig(current->exit_code, current, 1); + current->exit_code = 0; + } +} diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c new file mode 100644 index 00000000..9e3107cc --- /dev/null +++ b/arch/alpha/kernel/setup.c @@ -0,0 +1,1514 @@ +/* + * linux/arch/alpha/kernel/setup.c + * + * Copyright (C) 1995 Linus Torvalds + */ + +/* 2.3.x bootmem, 1999 Andrea Arcangeli <andrea@suse.de> */ + +/* + * Bootup setup stuff. + */ + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/ptrace.h> +#include <linux/slab.h> +#include <linux/user.h> +#include <linux/screen_info.h> +#include <linux/delay.h> +#include <linux/mc146818rtc.h> +#include <linux/console.h> +#include <linux/cpu.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/string.h> +#include <linux/ioport.h> +#include <linux/platform_device.h> +#include <linux/bootmem.h> +#include <linux/pci.h> +#include <linux/seq_file.h> +#include <linux/root_dev.h> +#include <linux/initrd.h> +#include <linux/eisa.h> +#include <linux/pfn.h> +#ifdef CONFIG_MAGIC_SYSRQ +#include <linux/sysrq.h> +#include <linux/reboot.h> +#endif +#include <linux/notifier.h> +#include <asm/setup.h> +#include <asm/io.h> +#include <linux/log2.h> +#include <linux/export.h> + +extern struct atomic_notifier_head panic_notifier_list; +static int alpha_panic_event(struct notifier_block *, unsigned long, void *); +static struct notifier_block alpha_panic_block = { + alpha_panic_event, + NULL, + INT_MAX /* try to do it first */ +}; + +#include <asm/uaccess.h> +#include <asm/pgtable.h> +#include <asm/hwrpb.h> +#include <asm/dma.h> +#include <asm/mmu_context.h> +#include <asm/console.h> + +#include "proto.h" +#include "pci_impl.h" + + +struct hwrpb_struct *hwrpb; +EXPORT_SYMBOL(hwrpb); +unsigned long srm_hae; + +int alpha_l1i_cacheshape; +int alpha_l1d_cacheshape; +int alpha_l2_cacheshape; +int alpha_l3_cacheshape; + +#ifdef CONFIG_VERBOSE_MCHECK +/* 0=minimum, 1=verbose, 2=all */ +/* These can be overridden via the command line, ie "verbose_mcheck=2") */ +unsigned long alpha_verbose_mcheck = CONFIG_VERBOSE_MCHECK_ON; +#endif + +#ifdef CONFIG_NUMA +struct cpumask node_to_cpumask_map[MAX_NUMNODES] __read_mostly; +EXPORT_SYMBOL(node_to_cpumask_map); +#endif + +/* Which processor we booted from. */ +int boot_cpuid; + +/* + * Using SRM callbacks for initial console output. This works from + * setup_arch() time through the end of time_init(), as those places + * are under our (Alpha) control. + + * "srmcons" specified in the boot command arguments allows us to + * see kernel messages during the period of time before the true + * console device is "registered" during console_init(). + * As of this version (2.5.59), console_init() will call + * disable_early_printk() as the last action before initializing + * the console drivers. That's the last possible time srmcons can be + * unregistered without interfering with console behavior. + * + * By default, OFF; set it with a bootcommand arg of "srmcons" or + * "console=srm". The meaning of these two args is: + * "srmcons" - early callback prints + * "console=srm" - full callback based console, including early prints + */ +int srmcons_output = 0; + +/* Enforce a memory size limit; useful for testing. By default, none. */ +unsigned long mem_size_limit = 0; + +/* Set AGP GART window size (0 means disabled). */ +unsigned long alpha_agpgart_size = DEFAULT_AGP_APER_SIZE; + +#ifdef CONFIG_ALPHA_GENERIC +struct alpha_machine_vector alpha_mv; +int alpha_using_srm; +EXPORT_SYMBOL(alpha_using_srm); +#endif + +static struct alpha_machine_vector *get_sysvec(unsigned long, unsigned long, + unsigned long); +static struct alpha_machine_vector *get_sysvec_byname(const char *); +static void get_sysnames(unsigned long, unsigned long, unsigned long, + char **, char **); +static void determine_cpu_caches (unsigned int); + +static char __initdata command_line[COMMAND_LINE_SIZE]; + +/* + * The format of "screen_info" is strange, and due to early + * i386-setup code. This is just enough to make the console + * code think we're on a VGA color display. + */ + +struct screen_info screen_info = { + .orig_x = 0, + .orig_y = 25, + .orig_video_cols = 80, + .orig_video_lines = 25, + .orig_video_isVGA = 1, + .orig_video_points = 16 +}; + +EXPORT_SYMBOL(screen_info); + +/* + * The direct map I/O window, if any. This should be the same + * for all busses, since it's used by virt_to_bus. + */ + +unsigned long __direct_map_base; +unsigned long __direct_map_size; +EXPORT_SYMBOL(__direct_map_base); +EXPORT_SYMBOL(__direct_map_size); + +/* + * Declare all of the machine vectors. + */ + +/* GCC 2.7.2 (on alpha at least) is lame. It does not support either + __attribute__((weak)) or #pragma weak. Bypass it and talk directly + to the assembler. */ + +#define WEAK(X) \ + extern struct alpha_machine_vector X; \ + asm(".weak "#X) + +WEAK(alcor_mv); +WEAK(alphabook1_mv); +WEAK(avanti_mv); +WEAK(cabriolet_mv); +WEAK(clipper_mv); +WEAK(dp264_mv); +WEAK(eb164_mv); +WEAK(eb64p_mv); +WEAK(eb66_mv); +WEAK(eb66p_mv); +WEAK(eiger_mv); +WEAK(jensen_mv); +WEAK(lx164_mv); +WEAK(lynx_mv); +WEAK(marvel_ev7_mv); +WEAK(miata_mv); +WEAK(mikasa_mv); +WEAK(mikasa_primo_mv); +WEAK(monet_mv); +WEAK(nautilus_mv); +WEAK(noname_mv); +WEAK(noritake_mv); +WEAK(noritake_primo_mv); +WEAK(p2k_mv); +WEAK(pc164_mv); +WEAK(privateer_mv); +WEAK(rawhide_mv); +WEAK(ruffian_mv); +WEAK(rx164_mv); +WEAK(sable_mv); +WEAK(sable_gamma_mv); +WEAK(shark_mv); +WEAK(sx164_mv); +WEAK(takara_mv); +WEAK(titan_mv); +WEAK(webbrick_mv); +WEAK(wildfire_mv); +WEAK(xl_mv); +WEAK(xlt_mv); + +#undef WEAK + +/* + * I/O resources inherited from PeeCees. Except for perhaps the + * turbochannel alphas, everyone has these on some sort of SuperIO chip. + * + * ??? If this becomes less standard, move the struct out into the + * machine vector. + */ + +static void __init +reserve_std_resources(void) +{ + static struct resource standard_io_resources[] = { + { .name = "rtc", .start = -1, .end = -1 }, + { .name = "dma1", .start = 0x00, .end = 0x1f }, + { .name = "pic1", .start = 0x20, .end = 0x3f }, + { .name = "timer", .start = 0x40, .end = 0x5f }, + { .name = "keyboard", .start = 0x60, .end = 0x6f }, + { .name = "dma page reg", .start = 0x80, .end = 0x8f }, + { .name = "pic2", .start = 0xa0, .end = 0xbf }, + { .name = "dma2", .start = 0xc0, .end = 0xdf }, + }; + + struct resource *io = &ioport_resource; + size_t i; + + if (hose_head) { + struct pci_controller *hose; + for (hose = hose_head; hose; hose = hose->next) + if (hose->index == 0) { + io = hose->io_space; + break; + } + } + + /* Fix up for the Jensen's queer RTC placement. */ + standard_io_resources[0].start = RTC_PORT(0); + standard_io_resources[0].end = RTC_PORT(0) + 0x10; + + for (i = 0; i < ARRAY_SIZE(standard_io_resources); ++i) + request_resource(io, standard_io_resources+i); +} + +#define PFN_MAX PFN_DOWN(0x80000000) +#define for_each_mem_cluster(memdesc, _cluster, i) \ + for ((_cluster) = (memdesc)->cluster, (i) = 0; \ + (i) < (memdesc)->numclusters; (i)++, (_cluster)++) + +static unsigned long __init +get_mem_size_limit(char *s) +{ + unsigned long end = 0; + char *from = s; + + end = simple_strtoul(from, &from, 0); + if ( *from == 'K' || *from == 'k' ) { + end = end << 10; + from++; + } else if ( *from == 'M' || *from == 'm' ) { + end = end << 20; + from++; + } else if ( *from == 'G' || *from == 'g' ) { + end = end << 30; + from++; + } + return end >> PAGE_SHIFT; /* Return the PFN of the limit. */ +} + +#ifdef CONFIG_BLK_DEV_INITRD +void * __init +move_initrd(unsigned long mem_limit) +{ + void *start; + unsigned long size; + + size = initrd_end - initrd_start; + start = __alloc_bootmem(PAGE_ALIGN(size), PAGE_SIZE, 0); + if (!start || __pa(start) + size > mem_limit) { + initrd_start = initrd_end = 0; + return NULL; + } + memmove(start, (void *)initrd_start, size); + initrd_start = (unsigned long)start; + initrd_end = initrd_start + size; + printk("initrd moved to %p\n", start); + return start; +} +#endif + +#ifndef CONFIG_DISCONTIGMEM +static void __init +setup_memory(void *kernel_end) +{ + struct memclust_struct * cluster; + struct memdesc_struct * memdesc; + unsigned long start_kernel_pfn, end_kernel_pfn; + unsigned long bootmap_size, bootmap_pages, bootmap_start; + unsigned long start, end; + unsigned long i; + + /* Find free clusters, and init and free the bootmem accordingly. */ + memdesc = (struct memdesc_struct *) + (hwrpb->mddt_offset + (unsigned long) hwrpb); + + for_each_mem_cluster(memdesc, cluster, i) { + printk("memcluster %lu, usage %01lx, start %8lu, end %8lu\n", + i, cluster->usage, cluster->start_pfn, + cluster->start_pfn + cluster->numpages); + + /* Bit 0 is console/PALcode reserved. Bit 1 is + non-volatile memory -- we might want to mark + this for later. */ + if (cluster->usage & 3) + continue; + + end = cluster->start_pfn + cluster->numpages; + if (end > max_low_pfn) + max_low_pfn = end; + } + + /* + * Except for the NUMA systems (wildfire, marvel) all of the + * Alpha systems we run on support 32GB of memory or less. + * Since the NUMA systems introduce large holes in memory addressing, + * we can get into a situation where there is not enough contiguous + * memory for the memory map. + * + * Limit memory to the first 32GB to limit the NUMA systems to + * memory on their first node (wildfire) or 2 (marvel) to avoid + * not being able to produce the memory map. In order to access + * all of the memory on the NUMA systems, build with discontiguous + * memory support. + * + * If the user specified a memory limit, let that memory limit stand. + */ + if (!mem_size_limit) + mem_size_limit = (32ul * 1024 * 1024 * 1024) >> PAGE_SHIFT; + + if (mem_size_limit && max_low_pfn >= mem_size_limit) + { + printk("setup: forcing memory size to %ldK (from %ldK).\n", + mem_size_limit << (PAGE_SHIFT - 10), + max_low_pfn << (PAGE_SHIFT - 10)); + max_low_pfn = mem_size_limit; + } + + /* Find the bounds of kernel memory. */ + start_kernel_pfn = PFN_DOWN(KERNEL_START_PHYS); + end_kernel_pfn = PFN_UP(virt_to_phys(kernel_end)); + bootmap_start = -1; + + try_again: + if (max_low_pfn <= end_kernel_pfn) + panic("not enough memory to boot"); + + /* We need to know how many physically contiguous pages + we'll need for the bootmap. */ + bootmap_pages = bootmem_bootmap_pages(max_low_pfn); + + /* Now find a good region where to allocate the bootmap. */ + for_each_mem_cluster(memdesc, cluster, i) { + if (cluster->usage & 3) + continue; + + start = cluster->start_pfn; + end = start + cluster->numpages; + if (start >= max_low_pfn) + continue; + if (end > max_low_pfn) + end = max_low_pfn; + if (start < start_kernel_pfn) { + if (end > end_kernel_pfn + && end - end_kernel_pfn >= bootmap_pages) { + bootmap_start = end_kernel_pfn; + break; + } else if (end > start_kernel_pfn) + end = start_kernel_pfn; + } else if (start < end_kernel_pfn) + start = end_kernel_pfn; + if (end - start >= bootmap_pages) { + bootmap_start = start; + break; + } + } + + if (bootmap_start == ~0UL) { + max_low_pfn >>= 1; + goto try_again; + } + + /* Allocate the bootmap and mark the whole MM as reserved. */ + bootmap_size = init_bootmem(bootmap_start, max_low_pfn); + + /* Mark the free regions. */ + for_each_mem_cluster(memdesc, cluster, i) { + if (cluster->usage & 3) + continue; + + start = cluster->start_pfn; + end = cluster->start_pfn + cluster->numpages; + if (start >= max_low_pfn) + continue; + if (end > max_low_pfn) + end = max_low_pfn; + if (start < start_kernel_pfn) { + if (end > end_kernel_pfn) { + free_bootmem(PFN_PHYS(start), + (PFN_PHYS(start_kernel_pfn) + - PFN_PHYS(start))); + printk("freeing pages %ld:%ld\n", + start, start_kernel_pfn); + start = end_kernel_pfn; + } else if (end > start_kernel_pfn) + end = start_kernel_pfn; + } else if (start < end_kernel_pfn) + start = end_kernel_pfn; + if (start >= end) + continue; + + free_bootmem(PFN_PHYS(start), PFN_PHYS(end) - PFN_PHYS(start)); + printk("freeing pages %ld:%ld\n", start, end); + } + + /* Reserve the bootmap memory. */ + reserve_bootmem(PFN_PHYS(bootmap_start), bootmap_size, + BOOTMEM_DEFAULT); + printk("reserving pages %ld:%ld\n", bootmap_start, bootmap_start+PFN_UP(bootmap_size)); + +#ifdef CONFIG_BLK_DEV_INITRD + initrd_start = INITRD_START; + if (initrd_start) { + initrd_end = initrd_start+INITRD_SIZE; + printk("Initial ramdisk at: 0x%p (%lu bytes)\n", + (void *) initrd_start, INITRD_SIZE); + + if ((void *)initrd_end > phys_to_virt(PFN_PHYS(max_low_pfn))) { + if (!move_initrd(PFN_PHYS(max_low_pfn))) + printk("initrd extends beyond end of memory " + "(0x%08lx > 0x%p)\ndisabling initrd\n", + initrd_end, + phys_to_virt(PFN_PHYS(max_low_pfn))); + } else { + reserve_bootmem(virt_to_phys((void *)initrd_start), + INITRD_SIZE, BOOTMEM_DEFAULT); + } + } +#endif /* CONFIG_BLK_DEV_INITRD */ +} +#else +extern void setup_memory(void *); +#endif /* !CONFIG_DISCONTIGMEM */ + +int __init +page_is_ram(unsigned long pfn) +{ + struct memclust_struct * cluster; + struct memdesc_struct * memdesc; + unsigned long i; + + memdesc = (struct memdesc_struct *) + (hwrpb->mddt_offset + (unsigned long) hwrpb); + for_each_mem_cluster(memdesc, cluster, i) + { + if (pfn >= cluster->start_pfn && + pfn < cluster->start_pfn + cluster->numpages) { + return (cluster->usage & 3) ? 0 : 1; + } + } + + return 0; +} + +static int __init +register_cpus(void) +{ + int i; + + for_each_possible_cpu(i) { + struct cpu *p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return -ENOMEM; + register_cpu(p, i); + } + return 0; +} + +arch_initcall(register_cpus); + +void __init +setup_arch(char **cmdline_p) +{ + extern char _end[]; + + struct alpha_machine_vector *vec = NULL; + struct percpu_struct *cpu; + char *type_name, *var_name, *p; + void *kernel_end = _end; /* end of kernel */ + char *args = command_line; + + hwrpb = (struct hwrpb_struct*) __va(INIT_HWRPB->phys_addr); + boot_cpuid = hard_smp_processor_id(); + + /* + * Pre-process the system type to make sure it will be valid. + * + * This may restore real CABRIO and EB66+ family names, ie + * EB64+ and EB66. + * + * Oh, and "white box" AS800 (aka DIGITAL Server 3000 series) + * and AS1200 (DIGITAL Server 5000 series) have the type as + * the negative of the real one. + */ + if ((long)hwrpb->sys_type < 0) { + hwrpb->sys_type = -((long)hwrpb->sys_type); + hwrpb_update_checksum(hwrpb); + } + + /* Register a call for panic conditions. */ + atomic_notifier_chain_register(&panic_notifier_list, + &alpha_panic_block); + +#ifdef CONFIG_ALPHA_GENERIC + /* Assume that we've booted from SRM if we haven't booted from MILO. + Detect the later by looking for "MILO" in the system serial nr. */ + alpha_using_srm = strncmp((const char *)hwrpb->ssn, "MILO", 4) != 0; +#endif + + /* If we are using SRM, we want to allow callbacks + as early as possible, so do this NOW, and then + they should work immediately thereafter. + */ + kernel_end = callback_init(kernel_end); + + /* + * Locate the command line. + */ + /* Hack for Jensen... since we're restricted to 8 or 16 chars for + boot flags depending on the boot mode, we need some shorthand. + This should do for installation. */ + if (strcmp(COMMAND_LINE, "INSTALL") == 0) { + strlcpy(command_line, "root=/dev/fd0 load_ramdisk=1", sizeof command_line); + } else { + strlcpy(command_line, COMMAND_LINE, sizeof command_line); + } + strcpy(boot_command_line, command_line); + *cmdline_p = command_line; + + /* + * Process command-line arguments. + */ + while ((p = strsep(&args, " \t")) != NULL) { + if (!*p) continue; + if (strncmp(p, "alpha_mv=", 9) == 0) { + vec = get_sysvec_byname(p+9); + continue; + } + if (strncmp(p, "cycle=", 6) == 0) { + est_cycle_freq = simple_strtol(p+6, NULL, 0); + continue; + } + if (strncmp(p, "mem=", 4) == 0) { + mem_size_limit = get_mem_size_limit(p+4); + continue; + } + if (strncmp(p, "srmcons", 7) == 0) { + srmcons_output |= 1; + continue; + } + if (strncmp(p, "console=srm", 11) == 0) { + srmcons_output |= 2; + continue; + } + if (strncmp(p, "gartsize=", 9) == 0) { + alpha_agpgart_size = + get_mem_size_limit(p+9) << PAGE_SHIFT; + continue; + } +#ifdef CONFIG_VERBOSE_MCHECK + if (strncmp(p, "verbose_mcheck=", 15) == 0) { + alpha_verbose_mcheck = simple_strtol(p+15, NULL, 0); + continue; + } +#endif + } + + /* Replace the command line, now that we've killed it with strsep. */ + strcpy(command_line, boot_command_line); + + /* If we want SRM console printk echoing early, do it now. */ + if (alpha_using_srm && srmcons_output) { + register_srm_console(); + + /* + * If "console=srm" was specified, clear the srmcons_output + * flag now so that time.c won't unregister_srm_console + */ + if (srmcons_output & 2) + srmcons_output = 0; + } + +#ifdef CONFIG_MAGIC_SYSRQ + /* If we're using SRM, make sysrq-b halt back to the prom, + not auto-reboot. */ + if (alpha_using_srm) { + struct sysrq_key_op *op = __sysrq_get_key_op('b'); + op->handler = (void *) machine_halt; + } +#endif + + /* + * Identify and reconfigure for the current system. + */ + cpu = (struct percpu_struct*)((char*)hwrpb + hwrpb->processor_offset); + + get_sysnames(hwrpb->sys_type, hwrpb->sys_variation, + cpu->type, &type_name, &var_name); + if (*var_name == '0') + var_name = ""; + + if (!vec) { + vec = get_sysvec(hwrpb->sys_type, hwrpb->sys_variation, + cpu->type); + } + + if (!vec) { + panic("Unsupported system type: %s%s%s (%ld %ld)\n", + type_name, (*var_name ? " variation " : ""), var_name, + hwrpb->sys_type, hwrpb->sys_variation); + } + if (vec != &alpha_mv) { + alpha_mv = *vec; + } + + printk("Booting " +#ifdef CONFIG_ALPHA_GENERIC + "GENERIC " +#endif + "on %s%s%s using machine vector %s from %s\n", + type_name, (*var_name ? " variation " : ""), + var_name, alpha_mv.vector_name, + (alpha_using_srm ? "SRM" : "MILO")); + + printk("Major Options: " +#ifdef CONFIG_SMP + "SMP " +#endif +#ifdef CONFIG_ALPHA_EV56 + "EV56 " +#endif +#ifdef CONFIG_ALPHA_EV67 + "EV67 " +#endif +#ifdef CONFIG_ALPHA_LEGACY_START_ADDRESS + "LEGACY_START " +#endif +#ifdef CONFIG_VERBOSE_MCHECK + "VERBOSE_MCHECK " +#endif + +#ifdef CONFIG_DISCONTIGMEM + "DISCONTIGMEM " +#ifdef CONFIG_NUMA + "NUMA " +#endif +#endif + +#ifdef CONFIG_DEBUG_SPINLOCK + "DEBUG_SPINLOCK " +#endif +#ifdef CONFIG_MAGIC_SYSRQ + "MAGIC_SYSRQ " +#endif + "\n"); + + printk("Command line: %s\n", command_line); + + /* + * Sync up the HAE. + * Save the SRM's current value for restoration. + */ + srm_hae = *alpha_mv.hae_register; + __set_hae(alpha_mv.hae_cache); + + /* Reset enable correctable error reports. */ + wrmces(0x7); + + /* Find our memory. */ + setup_memory(kernel_end); + + /* First guess at cpu cache sizes. Do this before init_arch. */ + determine_cpu_caches(cpu->type); + + /* Initialize the machine. Usually has to do with setting up + DMA windows and the like. */ + if (alpha_mv.init_arch) + alpha_mv.init_arch(); + + /* Reserve standard resources. */ + reserve_std_resources(); + + /* + * Give us a default console. TGA users will see nothing until + * chr_dev_init is called, rather late in the boot sequence. + */ + +#ifdef CONFIG_VT +#if defined(CONFIG_VGA_CONSOLE) + conswitchp = &vga_con; +#elif defined(CONFIG_DUMMY_CONSOLE) + conswitchp = &dummy_con; +#endif +#endif + + /* Default root filesystem to sda2. */ + ROOT_DEV = Root_SDA2; + +#ifdef CONFIG_EISA + /* FIXME: only set this when we actually have EISA in this box? */ + EISA_bus = 1; +#endif + + /* + * Check ASN in HWRPB for validity, report if bad. + * FIXME: how was this failing? Should we trust it instead, + * and copy the value into alpha_mv.max_asn? + */ + + if (hwrpb->max_asn != MAX_ASN) { + printk("Max ASN from HWRPB is bad (0x%lx)\n", hwrpb->max_asn); + } + + /* + * Identify the flock of penguins. + */ + +#ifdef CONFIG_SMP + setup_smp(); +#endif + paging_init(); +} + +static char sys_unknown[] = "Unknown"; +static char systype_names[][16] = { + "0", + "ADU", "Cobra", "Ruby", "Flamingo", "Mannequin", "Jensen", + "Pelican", "Morgan", "Sable", "Medulla", "Noname", + "Turbolaser", "Avanti", "Mustang", "Alcor", "Tradewind", + "Mikasa", "EB64", "EB66", "EB64+", "AlphaBook1", + "Rawhide", "K2", "Lynx", "XL", "EB164", "Noritake", + "Cortex", "29", "Miata", "XXM", "Takara", "Yukon", + "Tsunami", "Wildfire", "CUSCO", "Eiger", "Titan", "Marvel" +}; + +static char unofficial_names[][8] = {"100", "Ruffian"}; + +static char api_names[][16] = {"200", "Nautilus"}; + +static char eb164_names[][8] = {"EB164", "PC164", "LX164", "SX164", "RX164"}; +static int eb164_indices[] = {0,0,0,1,1,1,1,1,2,2,2,2,3,3,3,3,4}; + +static char alcor_names[][16] = {"Alcor", "Maverick", "Bret"}; +static int alcor_indices[] = {0,0,0,1,1,1,0,0,0,0,0,0,2,2,2,2,2,2}; + +static char eb64p_names[][16] = {"EB64+", "Cabriolet", "AlphaPCI64"}; +static int eb64p_indices[] = {0,0,1,2}; + +static char eb66_names[][8] = {"EB66", "EB66+"}; +static int eb66_indices[] = {0,0,1}; + +static char marvel_names[][16] = { + "Marvel/EV7" +}; +static int marvel_indices[] = { 0 }; + +static char rawhide_names[][16] = { + "Dodge", "Wrangler", "Durango", "Tincup", "DaVinci" +}; +static int rawhide_indices[] = {0,0,0,1,1,2,2,3,3,4,4}; + +static char titan_names[][16] = { + "DEFAULT", "Privateer", "Falcon", "Granite" +}; +static int titan_indices[] = {0,1,2,2,3}; + +static char tsunami_names[][16] = { + "0", "DP264", "Warhol", "Windjammer", "Monet", "Clipper", + "Goldrush", "Webbrick", "Catamaran", "Brisbane", "Melbourne", + "Flying Clipper", "Shark" +}; +static int tsunami_indices[] = {0,1,2,3,4,5,6,7,8,9,10,11,12}; + +static struct alpha_machine_vector * __init +get_sysvec(unsigned long type, unsigned long variation, unsigned long cpu) +{ + static struct alpha_machine_vector *systype_vecs[] __initdata = + { + NULL, /* 0 */ + NULL, /* ADU */ + NULL, /* Cobra */ + NULL, /* Ruby */ + NULL, /* Flamingo */ + NULL, /* Mannequin */ + &jensen_mv, + NULL, /* Pelican */ + NULL, /* Morgan */ + NULL, /* Sable -- see below. */ + NULL, /* Medulla */ + &noname_mv, + NULL, /* Turbolaser */ + &avanti_mv, + NULL, /* Mustang */ + NULL, /* Alcor, Bret, Maverick. HWRPB inaccurate? */ + NULL, /* Tradewind */ + NULL, /* Mikasa -- see below. */ + NULL, /* EB64 */ + NULL, /* EB66 -- see variation. */ + NULL, /* EB64+ -- see variation. */ + &alphabook1_mv, + &rawhide_mv, + NULL, /* K2 */ + &lynx_mv, /* Lynx */ + &xl_mv, + NULL, /* EB164 -- see variation. */ + NULL, /* Noritake -- see below. */ + NULL, /* Cortex */ + NULL, /* 29 */ + &miata_mv, + NULL, /* XXM */ + &takara_mv, + NULL, /* Yukon */ + NULL, /* Tsunami -- see variation. */ + &wildfire_mv, /* Wildfire */ + NULL, /* CUSCO */ + &eiger_mv, /* Eiger */ + NULL, /* Titan */ + NULL, /* Marvel */ + }; + + static struct alpha_machine_vector *unofficial_vecs[] __initdata = + { + NULL, /* 100 */ + &ruffian_mv, + }; + + static struct alpha_machine_vector *api_vecs[] __initdata = + { + NULL, /* 200 */ + &nautilus_mv, + }; + + static struct alpha_machine_vector *alcor_vecs[] __initdata = + { + &alcor_mv, &xlt_mv, &xlt_mv + }; + + static struct alpha_machine_vector *eb164_vecs[] __initdata = + { + &eb164_mv, &pc164_mv, &lx164_mv, &sx164_mv, &rx164_mv + }; + + static struct alpha_machine_vector *eb64p_vecs[] __initdata = + { + &eb64p_mv, + &cabriolet_mv, + &cabriolet_mv /* AlphaPCI64 */ + }; + + static struct alpha_machine_vector *eb66_vecs[] __initdata = + { + &eb66_mv, + &eb66p_mv + }; + + static struct alpha_machine_vector *marvel_vecs[] __initdata = + { + &marvel_ev7_mv, + }; + + static struct alpha_machine_vector *titan_vecs[] __initdata = + { + &titan_mv, /* default */ + &privateer_mv, /* privateer */ + &titan_mv, /* falcon */ + &privateer_mv, /* granite */ + }; + + static struct alpha_machine_vector *tsunami_vecs[] __initdata = + { + NULL, + &dp264_mv, /* dp264 */ + &dp264_mv, /* warhol */ + &dp264_mv, /* windjammer */ + &monet_mv, /* monet */ + &clipper_mv, /* clipper */ + &dp264_mv, /* goldrush */ + &webbrick_mv, /* webbrick */ + &dp264_mv, /* catamaran */ + NULL, /* brisbane? */ + NULL, /* melbourne? */ + NULL, /* flying clipper? */ + &shark_mv, /* shark */ + }; + + /* ??? Do we need to distinguish between Rawhides? */ + + struct alpha_machine_vector *vec; + + /* Search the system tables first... */ + vec = NULL; + if (type < ARRAY_SIZE(systype_vecs)) { + vec = systype_vecs[type]; + } else if ((type > ST_API_BIAS) && + (type - ST_API_BIAS) < ARRAY_SIZE(api_vecs)) { + vec = api_vecs[type - ST_API_BIAS]; + } else if ((type > ST_UNOFFICIAL_BIAS) && + (type - ST_UNOFFICIAL_BIAS) < ARRAY_SIZE(unofficial_vecs)) { + vec = unofficial_vecs[type - ST_UNOFFICIAL_BIAS]; + } + + /* If we've not found one, try for a variation. */ + + if (!vec) { + /* Member ID is a bit-field. */ + unsigned long member = (variation >> 10) & 0x3f; + + cpu &= 0xffffffff; /* make it usable */ + + switch (type) { + case ST_DEC_ALCOR: + if (member < ARRAY_SIZE(alcor_indices)) + vec = alcor_vecs[alcor_indices[member]]; + break; + case ST_DEC_EB164: + if (member < ARRAY_SIZE(eb164_indices)) + vec = eb164_vecs[eb164_indices[member]]; + /* PC164 may show as EB164 variation with EV56 CPU, + but, since no true EB164 had anything but EV5... */ + if (vec == &eb164_mv && cpu == EV56_CPU) + vec = &pc164_mv; + break; + case ST_DEC_EB64P: + if (member < ARRAY_SIZE(eb64p_indices)) + vec = eb64p_vecs[eb64p_indices[member]]; + break; + case ST_DEC_EB66: + if (member < ARRAY_SIZE(eb66_indices)) + vec = eb66_vecs[eb66_indices[member]]; + break; + case ST_DEC_MARVEL: + if (member < ARRAY_SIZE(marvel_indices)) + vec = marvel_vecs[marvel_indices[member]]; + break; + case ST_DEC_TITAN: + vec = titan_vecs[0]; /* default */ + if (member < ARRAY_SIZE(titan_indices)) + vec = titan_vecs[titan_indices[member]]; + break; + case ST_DEC_TSUNAMI: + if (member < ARRAY_SIZE(tsunami_indices)) + vec = tsunami_vecs[tsunami_indices[member]]; + break; + case ST_DEC_1000: + if (cpu == EV5_CPU || cpu == EV56_CPU) + vec = &mikasa_primo_mv; + else + vec = &mikasa_mv; + break; + case ST_DEC_NORITAKE: + if (cpu == EV5_CPU || cpu == EV56_CPU) + vec = &noritake_primo_mv; + else + vec = &noritake_mv; + break; + case ST_DEC_2100_A500: + if (cpu == EV5_CPU || cpu == EV56_CPU) + vec = &sable_gamma_mv; + else + vec = &sable_mv; + break; + } + } + return vec; +} + +static struct alpha_machine_vector * __init +get_sysvec_byname(const char *name) +{ + static struct alpha_machine_vector *all_vecs[] __initdata = + { + &alcor_mv, + &alphabook1_mv, + &avanti_mv, + &cabriolet_mv, + &clipper_mv, + &dp264_mv, + &eb164_mv, + &eb64p_mv, + &eb66_mv, + &eb66p_mv, + &eiger_mv, + &jensen_mv, + &lx164_mv, + &lynx_mv, + &miata_mv, + &mikasa_mv, + &mikasa_primo_mv, + &monet_mv, + &nautilus_mv, + &noname_mv, + &noritake_mv, + &noritake_primo_mv, + &p2k_mv, + &pc164_mv, + &privateer_mv, + &rawhide_mv, + &ruffian_mv, + &rx164_mv, + &sable_mv, + &sable_gamma_mv, + &shark_mv, + &sx164_mv, + &takara_mv, + &webbrick_mv, + &wildfire_mv, + &xl_mv, + &xlt_mv + }; + + size_t i; + + for (i = 0; i < ARRAY_SIZE(all_vecs); ++i) { + struct alpha_machine_vector *mv = all_vecs[i]; + if (strcasecmp(mv->vector_name, name) == 0) + return mv; + } + return NULL; +} + +static void +get_sysnames(unsigned long type, unsigned long variation, unsigned long cpu, + char **type_name, char **variation_name) +{ + unsigned long member; + + /* If not in the tables, make it UNKNOWN, + else set type name to family */ + if (type < ARRAY_SIZE(systype_names)) { + *type_name = systype_names[type]; + } else if ((type > ST_API_BIAS) && + (type - ST_API_BIAS) < ARRAY_SIZE(api_names)) { + *type_name = api_names[type - ST_API_BIAS]; + } else if ((type > ST_UNOFFICIAL_BIAS) && + (type - ST_UNOFFICIAL_BIAS) < ARRAY_SIZE(unofficial_names)) { + *type_name = unofficial_names[type - ST_UNOFFICIAL_BIAS]; + } else { + *type_name = sys_unknown; + *variation_name = sys_unknown; + return; + } + + /* Set variation to "0"; if variation is zero, done. */ + *variation_name = systype_names[0]; + if (variation == 0) { + return; + } + + member = (variation >> 10) & 0x3f; /* member ID is a bit-field */ + + cpu &= 0xffffffff; /* make it usable */ + + switch (type) { /* select by family */ + default: /* default to variation "0" for now */ + break; + case ST_DEC_EB164: + if (member < ARRAY_SIZE(eb164_indices)) + *variation_name = eb164_names[eb164_indices[member]]; + /* PC164 may show as EB164 variation, but with EV56 CPU, + so, since no true EB164 had anything but EV5... */ + if (eb164_indices[member] == 0 && cpu == EV56_CPU) + *variation_name = eb164_names[1]; /* make it PC164 */ + break; + case ST_DEC_ALCOR: + if (member < ARRAY_SIZE(alcor_indices)) + *variation_name = alcor_names[alcor_indices[member]]; + break; + case ST_DEC_EB64P: + if (member < ARRAY_SIZE(eb64p_indices)) + *variation_name = eb64p_names[eb64p_indices[member]]; + break; + case ST_DEC_EB66: + if (member < ARRAY_SIZE(eb66_indices)) + *variation_name = eb66_names[eb66_indices[member]]; + break; + case ST_DEC_MARVEL: + if (member < ARRAY_SIZE(marvel_indices)) + *variation_name = marvel_names[marvel_indices[member]]; + break; + case ST_DEC_RAWHIDE: + if (member < ARRAY_SIZE(rawhide_indices)) + *variation_name = rawhide_names[rawhide_indices[member]]; + break; + case ST_DEC_TITAN: + *variation_name = titan_names[0]; /* default */ + if (member < ARRAY_SIZE(titan_indices)) + *variation_name = titan_names[titan_indices[member]]; + break; + case ST_DEC_TSUNAMI: + if (member < ARRAY_SIZE(tsunami_indices)) + *variation_name = tsunami_names[tsunami_indices[member]]; + break; + } +} + +/* + * A change was made to the HWRPB via an ECO and the following code + * tracks a part of the ECO. In HWRPB versions less than 5, the ECO + * was not implemented in the console firmware. If it's revision 5 or + * greater we can get the name of the platform as an ASCII string from + * the HWRPB. That's what this function does. It checks the revision + * level and if the string is in the HWRPB it returns the address of + * the string--a pointer to the name of the platform. + * + * Returns: + * - Pointer to a ASCII string if it's in the HWRPB + * - Pointer to a blank string if the data is not in the HWRPB. + */ + +static char * +platform_string(void) +{ + struct dsr_struct *dsr; + static char unk_system_string[] = "N/A"; + + /* Go to the console for the string pointer. + * If the rpb_vers is not 5 or greater the rpb + * is old and does not have this data in it. + */ + if (hwrpb->revision < 5) + return (unk_system_string); + else { + /* The Dynamic System Recognition struct + * has the system platform name starting + * after the character count of the string. + */ + dsr = ((struct dsr_struct *) + ((char *)hwrpb + hwrpb->dsr_offset)); + return ((char *)dsr + (dsr->sysname_off + + sizeof(long))); + } +} + +static int +get_nr_processors(struct percpu_struct *cpubase, unsigned long num) +{ + struct percpu_struct *cpu; + unsigned long i; + int count = 0; + + for (i = 0; i < num; i++) { + cpu = (struct percpu_struct *) + ((char *)cpubase + i*hwrpb->processor_size); + if ((cpu->flags & 0x1cc) == 0x1cc) + count++; + } + return count; +} + +static void +show_cache_size (struct seq_file *f, const char *which, int shape) +{ + if (shape == -1) + seq_printf (f, "%s\t\t: n/a\n", which); + else if (shape == 0) + seq_printf (f, "%s\t\t: unknown\n", which); + else + seq_printf (f, "%s\t\t: %dK, %d-way, %db line\n", + which, shape >> 10, shape & 15, + 1 << ((shape >> 4) & 15)); +} + +static int +show_cpuinfo(struct seq_file *f, void *slot) +{ + extern struct unaligned_stat { + unsigned long count, va, pc; + } unaligned[2]; + + static char cpu_names[][8] = { + "EV3", "EV4", "Simulate", "LCA4", "EV5", "EV45", "EV56", + "EV6", "PCA56", "PCA57", "EV67", "EV68CB", "EV68AL", + "EV68CX", "EV7", "EV79", "EV69" + }; + + struct percpu_struct *cpu = slot; + unsigned int cpu_index; + char *cpu_name; + char *systype_name; + char *sysvariation_name; + int nr_processors; + + cpu_index = (unsigned) (cpu->type - 1); + cpu_name = "Unknown"; + if (cpu_index < ARRAY_SIZE(cpu_names)) + cpu_name = cpu_names[cpu_index]; + + get_sysnames(hwrpb->sys_type, hwrpb->sys_variation, + cpu->type, &systype_name, &sysvariation_name); + + nr_processors = get_nr_processors(cpu, hwrpb->nr_processors); + + seq_printf(f, "cpu\t\t\t: Alpha\n" + "cpu model\t\t: %s\n" + "cpu variation\t\t: %ld\n" + "cpu revision\t\t: %ld\n" + "cpu serial number\t: %s\n" + "system type\t\t: %s\n" + "system variation\t: %s\n" + "system revision\t\t: %ld\n" + "system serial number\t: %s\n" + "cycle frequency [Hz]\t: %lu %s\n" + "timer frequency [Hz]\t: %lu.%02lu\n" + "page size [bytes]\t: %ld\n" + "phys. address bits\t: %ld\n" + "max. addr. space #\t: %ld\n" + "BogoMIPS\t\t: %lu.%02lu\n" + "kernel unaligned acc\t: %ld (pc=%lx,va=%lx)\n" + "user unaligned acc\t: %ld (pc=%lx,va=%lx)\n" + "platform string\t\t: %s\n" + "cpus detected\t\t: %d\n", + cpu_name, cpu->variation, cpu->revision, + (char*)cpu->serial_no, + systype_name, sysvariation_name, hwrpb->sys_revision, + (char*)hwrpb->ssn, + est_cycle_freq ? : hwrpb->cycle_freq, + est_cycle_freq ? "est." : "", + hwrpb->intr_freq / 4096, + (100 * hwrpb->intr_freq / 4096) % 100, + hwrpb->pagesize, + hwrpb->pa_bits, + hwrpb->max_asn, + loops_per_jiffy / (500000/HZ), + (loops_per_jiffy / (5000/HZ)) % 100, + unaligned[0].count, unaligned[0].pc, unaligned[0].va, + unaligned[1].count, unaligned[1].pc, unaligned[1].va, + platform_string(), nr_processors); + +#ifdef CONFIG_SMP + seq_printf(f, "cpus active\t\t: %u\n" + "cpu active mask\t\t: %016lx\n", + num_online_cpus(), cpumask_bits(cpu_possible_mask)[0]); +#endif + + show_cache_size (f, "L1 Icache", alpha_l1i_cacheshape); + show_cache_size (f, "L1 Dcache", alpha_l1d_cacheshape); + show_cache_size (f, "L2 cache", alpha_l2_cacheshape); + show_cache_size (f, "L3 cache", alpha_l3_cacheshape); + + return 0; +} + +static int __init +read_mem_block(int *addr, int stride, int size) +{ + long nloads = size / stride, cnt, tmp; + + __asm__ __volatile__( + " rpcc %0\n" + "1: ldl %3,0(%2)\n" + " subq %1,1,%1\n" + /* Next two XORs introduce an explicit data dependency between + consecutive loads in the loop, which will give us true load + latency. */ + " xor %3,%2,%2\n" + " xor %3,%2,%2\n" + " addq %2,%4,%2\n" + " bne %1,1b\n" + " rpcc %3\n" + " subl %3,%0,%0\n" + : "=&r" (cnt), "=&r" (nloads), "=&r" (addr), "=&r" (tmp) + : "r" (stride), "1" (nloads), "2" (addr)); + + return cnt / (size / stride); +} + +#define CSHAPE(totalsize, linesize, assoc) \ + ((totalsize & ~0xff) | (linesize << 4) | assoc) + +/* ??? EV5 supports up to 64M, but did the systems with more than + 16M of BCACHE ever exist? */ +#define MAX_BCACHE_SIZE 16*1024*1024 + +/* Note that the offchip caches are direct mapped on all Alphas. */ +static int __init +external_cache_probe(int minsize, int width) +{ + int cycles, prev_cycles = 1000000; + int stride = 1 << width; + long size = minsize, maxsize = MAX_BCACHE_SIZE * 2; + + if (maxsize > (max_low_pfn + 1) << PAGE_SHIFT) + maxsize = 1 << (ilog2(max_low_pfn + 1) + PAGE_SHIFT); + + /* Get the first block cached. */ + read_mem_block(__va(0), stride, size); + + while (size < maxsize) { + /* Get an average load latency in cycles. */ + cycles = read_mem_block(__va(0), stride, size); + if (cycles > prev_cycles * 2) { + /* Fine, we exceed the cache. */ + printk("%ldK Bcache detected; load hit latency %d " + "cycles, load miss latency %d cycles\n", + size >> 11, prev_cycles, cycles); + return CSHAPE(size >> 1, width, 1); + } + /* Try to get the next block cached. */ + read_mem_block(__va(size), stride, size); + prev_cycles = cycles; + size <<= 1; + } + return -1; /* No BCACHE found. */ +} + +static void __init +determine_cpu_caches (unsigned int cpu_type) +{ + int L1I, L1D, L2, L3; + + switch (cpu_type) { + case EV4_CPU: + case EV45_CPU: + { + if (cpu_type == EV4_CPU) + L1I = CSHAPE(8*1024, 5, 1); + else + L1I = CSHAPE(16*1024, 5, 1); + L1D = L1I; + L3 = -1; + + /* BIU_CTL is a write-only Abox register. PALcode has a + shadow copy, and may be available from some versions + of the CSERVE PALcall. If we can get it, then + + unsigned long biu_ctl, size; + size = 128*1024 * (1 << ((biu_ctl >> 28) & 7)); + L2 = CSHAPE (size, 5, 1); + + Unfortunately, we can't rely on that. + */ + L2 = external_cache_probe(128*1024, 5); + break; + } + + case LCA4_CPU: + { + unsigned long car, size; + + L1I = L1D = CSHAPE(8*1024, 5, 1); + L3 = -1; + + car = *(vuip) phys_to_virt (0x120000078UL); + size = 64*1024 * (1 << ((car >> 5) & 7)); + /* No typo -- 8 byte cacheline size. Whodathunk. */ + L2 = (car & 1 ? CSHAPE (size, 3, 1) : -1); + break; + } + + case EV5_CPU: + case EV56_CPU: + { + unsigned long sc_ctl, width; + + L1I = L1D = CSHAPE(8*1024, 5, 1); + + /* Check the line size of the Scache. */ + sc_ctl = *(vulp) phys_to_virt (0xfffff000a8UL); + width = sc_ctl & 0x1000 ? 6 : 5; + L2 = CSHAPE (96*1024, width, 3); + + /* BC_CONTROL and BC_CONFIG are write-only IPRs. PALcode + has a shadow copy, and may be available from some versions + of the CSERVE PALcall. If we can get it, then + + unsigned long bc_control, bc_config, size; + size = 1024*1024 * (1 << ((bc_config & 7) - 1)); + L3 = (bc_control & 1 ? CSHAPE (size, width, 1) : -1); + + Unfortunately, we can't rely on that. + */ + L3 = external_cache_probe(1024*1024, width); + break; + } + + case PCA56_CPU: + case PCA57_CPU: + { + if (cpu_type == PCA56_CPU) { + L1I = CSHAPE(16*1024, 6, 1); + L1D = CSHAPE(8*1024, 5, 1); + } else { + L1I = CSHAPE(32*1024, 6, 2); + L1D = CSHAPE(16*1024, 5, 1); + } + L3 = -1; + +#if 0 + unsigned long cbox_config, size; + + cbox_config = *(vulp) phys_to_virt (0xfffff00008UL); + size = 512*1024 * (1 << ((cbox_config >> 12) & 3)); + + L2 = ((cbox_config >> 31) & 1 ? CSHAPE (size, 6, 1) : -1); +#else + L2 = external_cache_probe(512*1024, 6); +#endif + break; + } + + case EV6_CPU: + case EV67_CPU: + case EV68CB_CPU: + case EV68AL_CPU: + case EV68CX_CPU: + case EV69_CPU: + L1I = L1D = CSHAPE(64*1024, 6, 2); + L2 = external_cache_probe(1024*1024, 6); + L3 = -1; + break; + + case EV7_CPU: + case EV79_CPU: + L1I = L1D = CSHAPE(64*1024, 6, 2); + L2 = CSHAPE(7*1024*1024/4, 6, 7); + L3 = -1; + break; + + default: + /* Nothing known about this cpu type. */ + L1I = L1D = L2 = L3 = 0; + break; + } + + alpha_l1i_cacheshape = L1I; + alpha_l1d_cacheshape = L1D; + alpha_l2_cacheshape = L2; + alpha_l3_cacheshape = L3; +} + +/* + * We show only CPU #0 info. + */ +static void * +c_start(struct seq_file *f, loff_t *pos) +{ + return *pos ? NULL : (char *)hwrpb + hwrpb->processor_offset; +} + +static void * +c_next(struct seq_file *f, void *v, loff_t *pos) +{ + return NULL; +} + +static void +c_stop(struct seq_file *f, void *v) +{ +} + +const struct seq_operations cpuinfo_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo, +}; + + +static int +alpha_panic_event(struct notifier_block *this, unsigned long event, void *ptr) +{ +#if 1 + /* FIXME FIXME FIXME */ + /* If we are using SRM and serial console, just hard halt here. */ + if (alpha_using_srm && srmcons_output) + __halt(); +#endif + return NOTIFY_DONE; +} + +static __init int add_pcspkr(void) +{ + struct platform_device *pd; + int ret; + + pd = platform_device_alloc("pcspkr", -1); + if (!pd) + return -ENOMEM; + + ret = platform_device_add(pd); + if (ret) + platform_device_put(pd); + + return ret; +} +device_initcall(add_pcspkr); diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c new file mode 100644 index 00000000..35f2ef44 --- /dev/null +++ b/arch/alpha/kernel/signal.c @@ -0,0 +1,622 @@ +/* + * linux/arch/alpha/kernel/signal.c + * + * Copyright (C) 1995 Linus Torvalds + * + * 1997-11-02 Modified for POSIX.1b signals by Richard Henderson + */ + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/signal.h> +#include <linux/errno.h> +#include <linux/wait.h> +#include <linux/ptrace.h> +#include <linux/unistd.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/stddef.h> +#include <linux/tty.h> +#include <linux/binfmts.h> +#include <linux/bitops.h> +#include <linux/syscalls.h> +#include <linux/tracehook.h> + +#include <asm/uaccess.h> +#include <asm/sigcontext.h> +#include <asm/ucontext.h> + +#include "proto.h" + + +#define DEBUG_SIG 0 + +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +asmlinkage void ret_from_sys_call(void); +static void do_signal(struct pt_regs *, struct switch_stack *, + unsigned long, unsigned long); + + +/* + * The OSF/1 sigprocmask calling sequence is different from the + * C sigprocmask() sequence.. + */ +SYSCALL_DEFINE2(osf_sigprocmask, int, how, unsigned long, newmask) +{ + sigset_t oldmask; + sigset_t mask; + unsigned long res; + + siginitset(&mask, newmask & _BLOCKABLE); + res = sigprocmask(how, &mask, &oldmask); + if (!res) { + force_successful_syscall_return(); + res = oldmask.sig[0]; + } + return res; +} + +SYSCALL_DEFINE3(osf_sigaction, int, sig, + const struct osf_sigaction __user *, act, + struct osf_sigaction __user *, oact) +{ + struct k_sigaction new_ka, old_ka; + int ret; + + if (act) { + old_sigset_t mask; + if (!access_ok(VERIFY_READ, act, sizeof(*act)) || + __get_user(new_ka.sa.sa_handler, &act->sa_handler) || + __get_user(new_ka.sa.sa_flags, &act->sa_flags) || + __get_user(mask, &act->sa_mask)) + return -EFAULT; + siginitset(&new_ka.sa.sa_mask, mask); + new_ka.ka_restorer = NULL; + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || + __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || + __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || + __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) + return -EFAULT; + } + + return ret; +} + +SYSCALL_DEFINE5(rt_sigaction, int, sig, const struct sigaction __user *, act, + struct sigaction __user *, oact, + size_t, sigsetsize, void __user *, restorer) +{ + struct k_sigaction new_ka, old_ka; + int ret; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset_t)) + return -EINVAL; + + if (act) { + new_ka.ka_restorer = restorer; + if (copy_from_user(&new_ka.sa, act, sizeof(*act))) + return -EFAULT; + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + if (copy_to_user(oact, &old_ka.sa, sizeof(*oact))) + return -EFAULT; + } + + return ret; +} + +/* + * Atomically swap in the new signal mask, and wait for a signal. + */ +SYSCALL_DEFINE1(sigsuspend, old_sigset_t, mask) +{ + sigset_t blocked; + + current->saved_sigmask = current->blocked; + + mask &= _BLOCKABLE; + siginitset(&blocked, mask); + set_current_blocked(&blocked); + + current->state = TASK_INTERRUPTIBLE; + schedule(); + set_thread_flag(TIF_RESTORE_SIGMASK); + return -ERESTARTNOHAND; +} + +asmlinkage int +sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss) +{ + return do_sigaltstack(uss, uoss, rdusp()); +} + +/* + * Do a signal return; undo the signal stack. + */ + +#if _NSIG_WORDS > 1 +# error "Non SA_SIGINFO frame needs rearranging" +#endif + +struct sigframe +{ + struct sigcontext sc; + unsigned int retcode[3]; +}; + +struct rt_sigframe +{ + struct siginfo info; + struct ucontext uc; + unsigned int retcode[3]; +}; + +/* If this changes, userland unwinders that Know Things about our signal + frame will break. Do not undertake lightly. It also implies an ABI + change wrt the size of siginfo_t, which may cause some pain. */ +extern char compile_time_assert + [offsetof(struct rt_sigframe, uc.uc_mcontext) == 176 ? 1 : -1]; + +#define INSN_MOV_R30_R16 0x47fe0410 +#define INSN_LDI_R0 0x201f0000 +#define INSN_CALLSYS 0x00000083 + +static long +restore_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, + struct switch_stack *sw) +{ + unsigned long usp; + long i, err = __get_user(regs->pc, &sc->sc_pc); + + current_thread_info()->restart_block.fn = do_no_restart_syscall; + + sw->r26 = (unsigned long) ret_from_sys_call; + + err |= __get_user(regs->r0, sc->sc_regs+0); + err |= __get_user(regs->r1, sc->sc_regs+1); + err |= __get_user(regs->r2, sc->sc_regs+2); + err |= __get_user(regs->r3, sc->sc_regs+3); + err |= __get_user(regs->r4, sc->sc_regs+4); + err |= __get_user(regs->r5, sc->sc_regs+5); + err |= __get_user(regs->r6, sc->sc_regs+6); + err |= __get_user(regs->r7, sc->sc_regs+7); + err |= __get_user(regs->r8, sc->sc_regs+8); + err |= __get_user(sw->r9, sc->sc_regs+9); + err |= __get_user(sw->r10, sc->sc_regs+10); + err |= __get_user(sw->r11, sc->sc_regs+11); + err |= __get_user(sw->r12, sc->sc_regs+12); + err |= __get_user(sw->r13, sc->sc_regs+13); + err |= __get_user(sw->r14, sc->sc_regs+14); + err |= __get_user(sw->r15, sc->sc_regs+15); + err |= __get_user(regs->r16, sc->sc_regs+16); + err |= __get_user(regs->r17, sc->sc_regs+17); + err |= __get_user(regs->r18, sc->sc_regs+18); + err |= __get_user(regs->r19, sc->sc_regs+19); + err |= __get_user(regs->r20, sc->sc_regs+20); + err |= __get_user(regs->r21, sc->sc_regs+21); + err |= __get_user(regs->r22, sc->sc_regs+22); + err |= __get_user(regs->r23, sc->sc_regs+23); + err |= __get_user(regs->r24, sc->sc_regs+24); + err |= __get_user(regs->r25, sc->sc_regs+25); + err |= __get_user(regs->r26, sc->sc_regs+26); + err |= __get_user(regs->r27, sc->sc_regs+27); + err |= __get_user(regs->r28, sc->sc_regs+28); + err |= __get_user(regs->gp, sc->sc_regs+29); + err |= __get_user(usp, sc->sc_regs+30); + wrusp(usp); + + for (i = 0; i < 31; i++) + err |= __get_user(sw->fp[i], sc->sc_fpregs+i); + err |= __get_user(sw->fp[31], &sc->sc_fpcr); + + return err; +} + +/* Note that this syscall is also used by setcontext(3) to install + a given sigcontext. This because it's impossible to set *all* + registers and transfer control from userland. */ + +asmlinkage void +do_sigreturn(struct sigcontext __user *sc, struct pt_regs *regs, + struct switch_stack *sw) +{ + sigset_t set; + + /* Verify that it's a good sigcontext before using it */ + if (!access_ok(VERIFY_READ, sc, sizeof(*sc))) + goto give_sigsegv; + if (__get_user(set.sig[0], &sc->sc_mask)) + goto give_sigsegv; + + sigdelsetmask(&set, ~_BLOCKABLE); + set_current_blocked(&set); + + if (restore_sigcontext(sc, regs, sw)) + goto give_sigsegv; + + /* Send SIGTRAP if we're single-stepping: */ + if (ptrace_cancel_bpt (current)) { + siginfo_t info; + + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_BRKPT; + info.si_addr = (void __user *) regs->pc; + info.si_trapno = 0; + send_sig_info(SIGTRAP, &info, current); + } + return; + +give_sigsegv: + force_sig(SIGSEGV, current); +} + +asmlinkage void +do_rt_sigreturn(struct rt_sigframe __user *frame, struct pt_regs *regs, + struct switch_stack *sw) +{ + sigset_t set; + + /* Verify that it's a good ucontext_t before using it */ + if (!access_ok(VERIFY_READ, &frame->uc, sizeof(frame->uc))) + goto give_sigsegv; + if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) + goto give_sigsegv; + + sigdelsetmask(&set, ~_BLOCKABLE); + set_current_blocked(&set); + + if (restore_sigcontext(&frame->uc.uc_mcontext, regs, sw)) + goto give_sigsegv; + + /* Send SIGTRAP if we're single-stepping: */ + if (ptrace_cancel_bpt (current)) { + siginfo_t info; + + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_BRKPT; + info.si_addr = (void __user *) regs->pc; + info.si_trapno = 0; + send_sig_info(SIGTRAP, &info, current); + } + return; + +give_sigsegv: + force_sig(SIGSEGV, current); +} + + +/* + * Set up a signal frame. + */ + +static inline void __user * +get_sigframe(struct k_sigaction *ka, unsigned long sp, size_t frame_size) +{ + if ((ka->sa.sa_flags & SA_ONSTACK) != 0 && ! sas_ss_flags(sp)) + sp = current->sas_ss_sp + current->sas_ss_size; + + return (void __user *)((sp - frame_size) & -32ul); +} + +static long +setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, + struct switch_stack *sw, unsigned long mask, unsigned long sp) +{ + long i, err = 0; + + err |= __put_user(on_sig_stack((unsigned long)sc), &sc->sc_onstack); + err |= __put_user(mask, &sc->sc_mask); + err |= __put_user(regs->pc, &sc->sc_pc); + err |= __put_user(8, &sc->sc_ps); + + err |= __put_user(regs->r0 , sc->sc_regs+0); + err |= __put_user(regs->r1 , sc->sc_regs+1); + err |= __put_user(regs->r2 , sc->sc_regs+2); + err |= __put_user(regs->r3 , sc->sc_regs+3); + err |= __put_user(regs->r4 , sc->sc_regs+4); + err |= __put_user(regs->r5 , sc->sc_regs+5); + err |= __put_user(regs->r6 , sc->sc_regs+6); + err |= __put_user(regs->r7 , sc->sc_regs+7); + err |= __put_user(regs->r8 , sc->sc_regs+8); + err |= __put_user(sw->r9 , sc->sc_regs+9); + err |= __put_user(sw->r10 , sc->sc_regs+10); + err |= __put_user(sw->r11 , sc->sc_regs+11); + err |= __put_user(sw->r12 , sc->sc_regs+12); + err |= __put_user(sw->r13 , sc->sc_regs+13); + err |= __put_user(sw->r14 , sc->sc_regs+14); + err |= __put_user(sw->r15 , sc->sc_regs+15); + err |= __put_user(regs->r16, sc->sc_regs+16); + err |= __put_user(regs->r17, sc->sc_regs+17); + err |= __put_user(regs->r18, sc->sc_regs+18); + err |= __put_user(regs->r19, sc->sc_regs+19); + err |= __put_user(regs->r20, sc->sc_regs+20); + err |= __put_user(regs->r21, sc->sc_regs+21); + err |= __put_user(regs->r22, sc->sc_regs+22); + err |= __put_user(regs->r23, sc->sc_regs+23); + err |= __put_user(regs->r24, sc->sc_regs+24); + err |= __put_user(regs->r25, sc->sc_regs+25); + err |= __put_user(regs->r26, sc->sc_regs+26); + err |= __put_user(regs->r27, sc->sc_regs+27); + err |= __put_user(regs->r28, sc->sc_regs+28); + err |= __put_user(regs->gp , sc->sc_regs+29); + err |= __put_user(sp, sc->sc_regs+30); + err |= __put_user(0, sc->sc_regs+31); + + for (i = 0; i < 31; i++) + err |= __put_user(sw->fp[i], sc->sc_fpregs+i); + err |= __put_user(0, sc->sc_fpregs+31); + err |= __put_user(sw->fp[31], &sc->sc_fpcr); + + err |= __put_user(regs->trap_a0, &sc->sc_traparg_a0); + err |= __put_user(regs->trap_a1, &sc->sc_traparg_a1); + err |= __put_user(regs->trap_a2, &sc->sc_traparg_a2); + + return err; +} + +static int +setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, + struct pt_regs *regs, struct switch_stack * sw) +{ + unsigned long oldsp, r26, err = 0; + struct sigframe __user *frame; + + oldsp = rdusp(); + frame = get_sigframe(ka, oldsp, sizeof(*frame)); + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + goto give_sigsegv; + + err |= setup_sigcontext(&frame->sc, regs, sw, set->sig[0], oldsp); + if (err) + goto give_sigsegv; + + /* Set up to return from userspace. If provided, use a stub + already in userspace. */ + if (ka->ka_restorer) { + r26 = (unsigned long) ka->ka_restorer; + } else { + err |= __put_user(INSN_MOV_R30_R16, frame->retcode+0); + err |= __put_user(INSN_LDI_R0+__NR_sigreturn, frame->retcode+1); + err |= __put_user(INSN_CALLSYS, frame->retcode+2); + imb(); + r26 = (unsigned long) frame->retcode; + } + + /* Check that everything was written properly. */ + if (err) + goto give_sigsegv; + + /* "Return" to the handler */ + regs->r26 = r26; + regs->r27 = regs->pc = (unsigned long) ka->sa.sa_handler; + regs->r16 = sig; /* a0: signal number */ + regs->r17 = 0; /* a1: exception code */ + regs->r18 = (unsigned long) &frame->sc; /* a2: sigcontext pointer */ + wrusp((unsigned long) frame); + +#if DEBUG_SIG + printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", + current->comm, current->pid, frame, regs->pc, regs->r26); +#endif + + return 0; + +give_sigsegv: + force_sigsegv(sig, current); + return -EFAULT; +} + +static int +setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs, struct switch_stack * sw) +{ + unsigned long oldsp, r26, err = 0; + struct rt_sigframe __user *frame; + + oldsp = rdusp(); + frame = get_sigframe(ka, oldsp, sizeof(*frame)); + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + goto give_sigsegv; + + err |= copy_siginfo_to_user(&frame->info, info); + + /* Create the ucontext. */ + err |= __put_user(0, &frame->uc.uc_flags); + err |= __put_user(0, &frame->uc.uc_link); + err |= __put_user(set->sig[0], &frame->uc.uc_osf_sigmask); + err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + err |= __put_user(sas_ss_flags(oldsp), &frame->uc.uc_stack.ss_flags); + err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, sw, + set->sig[0], oldsp); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) + goto give_sigsegv; + + /* Set up to return from userspace. If provided, use a stub + already in userspace. */ + if (ka->ka_restorer) { + r26 = (unsigned long) ka->ka_restorer; + } else { + err |= __put_user(INSN_MOV_R30_R16, frame->retcode+0); + err |= __put_user(INSN_LDI_R0+__NR_rt_sigreturn, + frame->retcode+1); + err |= __put_user(INSN_CALLSYS, frame->retcode+2); + imb(); + r26 = (unsigned long) frame->retcode; + } + + if (err) + goto give_sigsegv; + + /* "Return" to the handler */ + regs->r26 = r26; + regs->r27 = regs->pc = (unsigned long) ka->sa.sa_handler; + regs->r16 = sig; /* a0: signal number */ + regs->r17 = (unsigned long) &frame->info; /* a1: siginfo pointer */ + regs->r18 = (unsigned long) &frame->uc; /* a2: ucontext pointer */ + wrusp((unsigned long) frame); + +#if DEBUG_SIG + printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", + current->comm, current->pid, frame, regs->pc, regs->r26); +#endif + + return 0; + +give_sigsegv: + force_sigsegv(sig, current); + return -EFAULT; +} + + +/* + * OK, we're invoking a handler. + */ +static inline int +handle_signal(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *oldset, struct pt_regs * regs, struct switch_stack *sw) +{ + int ret; + + if (ka->sa.sa_flags & SA_SIGINFO) + ret = setup_rt_frame(sig, ka, info, oldset, regs, sw); + else + ret = setup_frame(sig, ka, oldset, regs, sw); + + if (ret == 0) + block_sigmask(ka, sig); + + return ret; +} + +static inline void +syscall_restart(unsigned long r0, unsigned long r19, + struct pt_regs *regs, struct k_sigaction *ka) +{ + switch (regs->r0) { + case ERESTARTSYS: + if (!(ka->sa.sa_flags & SA_RESTART)) { + case ERESTARTNOHAND: + regs->r0 = EINTR; + break; + } + /* fallthrough */ + case ERESTARTNOINTR: + regs->r0 = r0; /* reset v0 and a3 and replay syscall */ + regs->r19 = r19; + regs->pc -= 4; + break; + case ERESTART_RESTARTBLOCK: + regs->r0 = EINTR; + break; + } +} + + +/* + * Note that 'init' is a special process: it doesn't get signals it doesn't + * want to handle. Thus you cannot kill init even with a SIGKILL even by + * mistake. + * + * Note that we go through the signals twice: once to check the signals that + * the kernel can handle, and then we build all the user-level signal handling + * stack-frames in one go after that. + * + * "r0" and "r19" are the registers we need to restore for system call + * restart. "r0" is also used as an indicator whether we can restart at + * all (if we get here from anything but a syscall return, it will be 0) + */ +static void +do_signal(struct pt_regs * regs, struct switch_stack * sw, + unsigned long r0, unsigned long r19) +{ + siginfo_t info; + int signr; + unsigned long single_stepping = ptrace_cancel_bpt(current); + struct k_sigaction ka; + sigset_t *oldset; + + if (test_thread_flag(TIF_RESTORE_SIGMASK)) + oldset = ¤t->saved_sigmask; + else + oldset = ¤t->blocked; + + /* This lets the debugger run, ... */ + signr = get_signal_to_deliver(&info, &ka, regs, NULL); + + /* ... so re-check the single stepping. */ + single_stepping |= ptrace_cancel_bpt(current); + + if (signr > 0) { + /* Whee! Actually deliver the signal. */ + if (r0) + syscall_restart(r0, r19, regs, &ka); + if (handle_signal(signr, &ka, &info, oldset, regs, sw) == 0) { + /* A signal was successfully delivered, and the + saved sigmask was stored on the signal frame, + and will be restored by sigreturn. So we can + simply clear the restore sigmask flag. */ + if (test_thread_flag(TIF_RESTORE_SIGMASK)) + clear_thread_flag(TIF_RESTORE_SIGMASK); + } + if (single_stepping) + ptrace_set_bpt(current); /* re-set bpt */ + return; + } + + if (r0) { + switch (regs->r0) { + case ERESTARTNOHAND: + case ERESTARTSYS: + case ERESTARTNOINTR: + /* Reset v0 and a3 and replay syscall. */ + regs->r0 = r0; + regs->r19 = r19; + regs->pc -= 4; + break; + case ERESTART_RESTARTBLOCK: + /* Force v0 to the restart syscall and reply. */ + regs->r0 = __NR_restart_syscall; + regs->pc -= 4; + break; + } + } + + /* If there's no signal to deliver, we just restore the saved mask. */ + if (test_thread_flag(TIF_RESTORE_SIGMASK)) { + clear_thread_flag(TIF_RESTORE_SIGMASK); + sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); + } + + if (single_stepping) + ptrace_set_bpt(current); /* re-set breakpoint */ +} + +void +do_notify_resume(struct pt_regs *regs, struct switch_stack *sw, + unsigned long thread_info_flags, + unsigned long r0, unsigned long r19) +{ + if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) + do_signal(regs, sw, r0, r19); + + if (thread_info_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + if (current->replacement_session_keyring) + key_replace_session_keyring(); + } +} diff --git a/arch/alpha/kernel/smc37c669.c b/arch/alpha/kernel/smc37c669.c new file mode 100644 index 00000000..0435921d --- /dev/null +++ b/arch/alpha/kernel/smc37c669.c @@ -0,0 +1,2553 @@ +/* + * SMC 37C669 initialization code + */ +#include <linux/kernel.h> + +#include <linux/mm.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/spinlock.h> + +#include <asm/hwrpb.h> +#include <asm/io.h> +#include <asm/segment.h> + +#if 0 +# define DBG_DEVS(args) printk args +#else +# define DBG_DEVS(args) +#endif + +#define KB 1024 +#define MB (1024*KB) +#define GB (1024*MB) + +#define SMC_DEBUG 0 + +/* File: smcc669_def.h + * + * Copyright (C) 1997 by + * Digital Equipment Corporation, Maynard, Massachusetts. + * All rights reserved. + * + * This software is furnished under a license and may be used and copied + * only in accordance of the terms of such license and with the + * inclusion of the above copyright notice. This software or any other + * copies thereof may not be provided or otherwise made available to any + * other person. No title to and ownership of the software is hereby + * transferred. + * + * The information in this software is subject to change without notice + * and should not be construed as a commitment by Digital Equipment + * Corporation. + * + * Digital assumes no responsibility for the use or reliability of its + * software on equipment which is not supplied by Digital. + * + * + * Abstract: + * + * This file contains header definitions for the SMC37c669 + * Super I/O controller. + * + * Author: + * + * Eric Rasmussen + * + * Modification History: + * + * er 28-Jan-1997 Initial Entry + */ + +#ifndef __SMC37c669_H +#define __SMC37c669_H + +/* +** Macros for handling device IRQs +** +** The mask acts as a flag used in mapping actual ISA IRQs (0 - 15) +** to device IRQs (A - H). +*/ +#define SMC37c669_DEVICE_IRQ_MASK 0x80000000 +#define SMC37c669_DEVICE_IRQ( __i ) \ + ((SMC37c669_DEVICE_IRQ_MASK) | (__i)) +#define SMC37c669_IS_DEVICE_IRQ(__i) \ + (((__i) & (SMC37c669_DEVICE_IRQ_MASK)) == (SMC37c669_DEVICE_IRQ_MASK)) +#define SMC37c669_RAW_DEVICE_IRQ(__i) \ + ((__i) & ~(SMC37c669_DEVICE_IRQ_MASK)) + +/* +** Macros for handling device DRQs +** +** The mask acts as a flag used in mapping actual ISA DMA +** channels to device DMA channels (A - C). +*/ +#define SMC37c669_DEVICE_DRQ_MASK 0x80000000 +#define SMC37c669_DEVICE_DRQ(__d) \ + ((SMC37c669_DEVICE_DRQ_MASK) | (__d)) +#define SMC37c669_IS_DEVICE_DRQ(__d) \ + (((__d) & (SMC37c669_DEVICE_DRQ_MASK)) == (SMC37c669_DEVICE_DRQ_MASK)) +#define SMC37c669_RAW_DEVICE_DRQ(__d) \ + ((__d) & ~(SMC37c669_DEVICE_DRQ_MASK)) + +#define SMC37c669_DEVICE_ID 0x3 + +/* +** SMC37c669 Device Function Definitions +*/ +#define SERIAL_0 0 +#define SERIAL_1 1 +#define PARALLEL_0 2 +#define FLOPPY_0 3 +#define IDE_0 4 +#define NUM_FUNCS 5 + +/* +** Default Device Function Mappings +*/ +#define COM1_BASE 0x3F8 +#define COM1_IRQ 4 +#define COM2_BASE 0x2F8 +#define COM2_IRQ 3 +#define PARP_BASE 0x3BC +#define PARP_IRQ 7 +#define PARP_DRQ 3 +#define FDC_BASE 0x3F0 +#define FDC_IRQ 6 +#define FDC_DRQ 2 + +/* +** Configuration On/Off Key Definitions +*/ +#define SMC37c669_CONFIG_ON_KEY 0x55 +#define SMC37c669_CONFIG_OFF_KEY 0xAA + +/* +** SMC 37c669 Device IRQs +*/ +#define SMC37c669_DEVICE_IRQ_A ( SMC37c669_DEVICE_IRQ( 0x01 ) ) +#define SMC37c669_DEVICE_IRQ_B ( SMC37c669_DEVICE_IRQ( 0x02 ) ) +#define SMC37c669_DEVICE_IRQ_C ( SMC37c669_DEVICE_IRQ( 0x03 ) ) +#define SMC37c669_DEVICE_IRQ_D ( SMC37c669_DEVICE_IRQ( 0x04 ) ) +#define SMC37c669_DEVICE_IRQ_E ( SMC37c669_DEVICE_IRQ( 0x05 ) ) +#define SMC37c669_DEVICE_IRQ_F ( SMC37c669_DEVICE_IRQ( 0x06 ) ) +/* SMC37c669_DEVICE_IRQ_G *** RESERVED ***/ +#define SMC37c669_DEVICE_IRQ_H ( SMC37c669_DEVICE_IRQ( 0x08 ) ) + +/* +** SMC 37c669 Device DMA Channel Definitions +*/ +#define SMC37c669_DEVICE_DRQ_A ( SMC37c669_DEVICE_DRQ( 0x01 ) ) +#define SMC37c669_DEVICE_DRQ_B ( SMC37c669_DEVICE_DRQ( 0x02 ) ) +#define SMC37c669_DEVICE_DRQ_C ( SMC37c669_DEVICE_DRQ( 0x03 ) ) + +/* +** Configuration Register Index Definitions +*/ +#define SMC37c669_CR00_INDEX 0x00 +#define SMC37c669_CR01_INDEX 0x01 +#define SMC37c669_CR02_INDEX 0x02 +#define SMC37c669_CR03_INDEX 0x03 +#define SMC37c669_CR04_INDEX 0x04 +#define SMC37c669_CR05_INDEX 0x05 +#define SMC37c669_CR06_INDEX 0x06 +#define SMC37c669_CR07_INDEX 0x07 +#define SMC37c669_CR08_INDEX 0x08 +#define SMC37c669_CR09_INDEX 0x09 +#define SMC37c669_CR0A_INDEX 0x0A +#define SMC37c669_CR0B_INDEX 0x0B +#define SMC37c669_CR0C_INDEX 0x0C +#define SMC37c669_CR0D_INDEX 0x0D +#define SMC37c669_CR0E_INDEX 0x0E +#define SMC37c669_CR0F_INDEX 0x0F +#define SMC37c669_CR10_INDEX 0x10 +#define SMC37c669_CR11_INDEX 0x11 +#define SMC37c669_CR12_INDEX 0x12 +#define SMC37c669_CR13_INDEX 0x13 +#define SMC37c669_CR14_INDEX 0x14 +#define SMC37c669_CR15_INDEX 0x15 +#define SMC37c669_CR16_INDEX 0x16 +#define SMC37c669_CR17_INDEX 0x17 +#define SMC37c669_CR18_INDEX 0x18 +#define SMC37c669_CR19_INDEX 0x19 +#define SMC37c669_CR1A_INDEX 0x1A +#define SMC37c669_CR1B_INDEX 0x1B +#define SMC37c669_CR1C_INDEX 0x1C +#define SMC37c669_CR1D_INDEX 0x1D +#define SMC37c669_CR1E_INDEX 0x1E +#define SMC37c669_CR1F_INDEX 0x1F +#define SMC37c669_CR20_INDEX 0x20 +#define SMC37c669_CR21_INDEX 0x21 +#define SMC37c669_CR22_INDEX 0x22 +#define SMC37c669_CR23_INDEX 0x23 +#define SMC37c669_CR24_INDEX 0x24 +#define SMC37c669_CR25_INDEX 0x25 +#define SMC37c669_CR26_INDEX 0x26 +#define SMC37c669_CR27_INDEX 0x27 +#define SMC37c669_CR28_INDEX 0x28 +#define SMC37c669_CR29_INDEX 0x29 + +/* +** Configuration Register Alias Definitions +*/ +#define SMC37c669_DEVICE_ID_INDEX SMC37c669_CR0D_INDEX +#define SMC37c669_DEVICE_REVISION_INDEX SMC37c669_CR0E_INDEX +#define SMC37c669_FDC_BASE_ADDRESS_INDEX SMC37c669_CR20_INDEX +#define SMC37c669_IDE_BASE_ADDRESS_INDEX SMC37c669_CR21_INDEX +#define SMC37c669_IDE_ALTERNATE_ADDRESS_INDEX SMC37c669_CR22_INDEX +#define SMC37c669_PARALLEL0_BASE_ADDRESS_INDEX SMC37c669_CR23_INDEX +#define SMC37c669_SERIAL0_BASE_ADDRESS_INDEX SMC37c669_CR24_INDEX +#define SMC37c669_SERIAL1_BASE_ADDRESS_INDEX SMC37c669_CR25_INDEX +#define SMC37c669_PARALLEL_FDC_DRQ_INDEX SMC37c669_CR26_INDEX +#define SMC37c669_PARALLEL_FDC_IRQ_INDEX SMC37c669_CR27_INDEX +#define SMC37c669_SERIAL_IRQ_INDEX SMC37c669_CR28_INDEX + +/* +** Configuration Register Definitions +** +** The INDEX (write only) and DATA (read/write) ports are effective +** only when the chip is in the Configuration State. +*/ +typedef struct _SMC37c669_CONFIG_REGS { + unsigned char index_port; + unsigned char data_port; +} SMC37c669_CONFIG_REGS; + +/* +** CR00 - default value 0x28 +** +** IDE_EN (CR00<1:0>): +** 0x - 30ua pull-ups on nIDEEN, nHDCS0, NHDCS1 +** 11 - IRQ_H available as IRQ output, +** IRRX2, IRTX2 available as alternate IR pins +** 10 - nIDEEN, nHDCS0, nHDCS1 used to control IDE +** +** VALID (CR00<7>): +** A high level on this software controlled bit can +** be used to indicate that a valid configuration +** cycle has occurred. The control software must +** take care to set this bit at the appropriate times. +** Set to zero after power up. This bit has no +** effect on any other hardware in the chip. +** +*/ +typedef union _SMC37c669_CR00 { + unsigned char as_uchar; + struct { + unsigned ide_en : 2; /* See note above */ + unsigned reserved1 : 1; /* RAZ */ + unsigned fdc_pwr : 1; /* 1 = supply power to FDC */ + unsigned reserved2 : 3; /* Read as 010b */ + unsigned valid : 1; /* See note above */ + } by_field; +} SMC37c669_CR00; + +/* +** CR01 - default value 0x9C +*/ +typedef union _SMC37c669_CR01 { + unsigned char as_uchar; + struct { + unsigned reserved1 : 2; /* RAZ */ + unsigned ppt_pwr : 1; /* 1 = supply power to PPT */ + unsigned ppt_mode : 1; /* 1 = Printer mode, 0 = EPP */ + unsigned reserved2 : 1; /* Read as 1 */ + unsigned reserved3 : 2; /* RAZ */ + unsigned lock_crx: 1; /* Lock CR00 - CR18 */ + } by_field; +} SMC37c669_CR01; + +/* +** CR02 - default value 0x88 +*/ +typedef union _SMC37c669_CR02 { + unsigned char as_uchar; + struct { + unsigned reserved1 : 3; /* RAZ */ + unsigned uart1_pwr : 1; /* 1 = supply power to UART1 */ + unsigned reserved2 : 3; /* RAZ */ + unsigned uart2_pwr : 1; /* 1 = supply power to UART2 */ + } by_field; +} SMC37c669_CR02; + +/* +** CR03 - default value 0x78 +** +** CR03<7> CR03<2> Pin 94 +** ------- ------- ------ +** 0 X DRV2 (input) +** 1 0 ADRX +** 1 1 IRQ_B +** +** CR03<6> CR03<5> Op Mode +** ------- ------- ------- +** 0 0 Model 30 +** 0 1 PS/2 +** 1 0 Reserved +** 1 1 AT Mode +*/ +typedef union _SMC37c669_CR03 { + unsigned char as_uchar; + struct { + unsigned pwrgd_gamecs : 1; /* 1 = PWRGD, 0 = GAMECS */ + unsigned fdc_mode2 : 1; /* 1 = Enhanced Mode 2 */ + unsigned pin94_0 : 1; /* See note above */ + unsigned reserved1 : 1; /* RAZ */ + unsigned drvden : 1; /* 1 = high, 0 - output */ + unsigned op_mode : 2; /* See note above */ + unsigned pin94_1 : 1; /* See note above */ + } by_field; +} SMC37c669_CR03; + +/* +** CR04 - default value 0x00 +** +** PP_EXT_MODE: +** If CR01<PP_MODE> = 0 and PP_EXT_MODE = +** 00 - Standard and Bidirectional +** 01 - EPP mode and SPP +** 10 - ECP mode +** In this mode, 2 drives can be supported +** directly, 3 or 4 drives must use external +** 4 drive support. SPP can be selected +** through the ECR register of ECP as mode 000. +** 11 - ECP mode and EPP mode +** In this mode, 2 drives can be supported +** directly, 3 or 4 drives must use external +** 4 drive support. SPP can be selected +** through the ECR register of ECP as mode 000. +** In this mode, EPP can be selected through +** the ECR register of ECP as mode 100. +** +** PP_FDC: +** 00 - Normal +** 01 - PPFD1 +** 10 - PPFD2 +** 11 - Reserved +** +** MIDI1: +** Serial Clock Select: +** A low level on this bit disables MIDI support, +** clock = divide by 13. A high level on this +** bit enables MIDI support, clock = divide by 12. +** +** MIDI operates at 31.25 Kbps which can be derived +** from 125 KHz (24 MHz / 12 = 2 MHz, 2 MHz / 16 = 125 KHz) +** +** ALT_IO: +** 0 - Use pins IRRX, IRTX +** 1 - Use pins IRRX2, IRTX2 +** +** If this bit is set, the IR receive and transmit +** functions will not be available on pins 25 and 26 +** unless CR00<IDE_EN> = 11. +*/ +typedef union _SMC37c669_CR04 { + unsigned char as_uchar; + struct { + unsigned ppt_ext_mode : 2; /* See note above */ + unsigned ppt_fdc : 2; /* See note above */ + unsigned midi1 : 1; /* See note above */ + unsigned midi2 : 1; /* See note above */ + unsigned epp_type : 1; /* 0 = EPP 1.9, 1 = EPP 1.7 */ + unsigned alt_io : 1; /* See note above */ + } by_field; +} SMC37c669_CR04; + +/* +** CR05 - default value 0x00 +** +** DEN_SEL: +** 00 - Densel output normal +** 01 - Reserved +** 10 - Densel output 1 +** 11 - Densel output 0 +** +*/ +typedef union _SMC37c669_CR05 { + unsigned char as_uchar; + struct { + unsigned reserved1 : 2; /* RAZ */ + unsigned fdc_dma_mode : 1; /* 0 = burst, 1 = non-burst */ + unsigned den_sel : 2; /* See note above */ + unsigned swap_drv : 1; /* Swap the FDC motor selects */ + unsigned extx4 : 1; /* 0 = 2 drive, 1 = external 4 drive decode */ + unsigned reserved2 : 1; /* RAZ */ + } by_field; +} SMC37c669_CR05; + +/* +** CR06 - default value 0xFF +*/ +typedef union _SMC37c669_CR06 { + unsigned char as_uchar; + struct { + unsigned floppy_a : 2; /* Type of floppy drive A */ + unsigned floppy_b : 2; /* Type of floppy drive B */ + unsigned floppy_c : 2; /* Type of floppy drive C */ + unsigned floppy_d : 2; /* Type of floppy drive D */ + } by_field; +} SMC37c669_CR06; + +/* +** CR07 - default value 0x00 +** +** Auto Power Management CR07<7:4>: +** 0 - Auto Powerdown disabled (default) +** 1 - Auto Powerdown enabled +** +** This bit is reset to the default state by POR or +** a hardware reset. +** +*/ +typedef union _SMC37c669_CR07 { + unsigned char as_uchar; + struct { + unsigned floppy_boot : 2; /* 0 = A:, 1 = B: */ + unsigned reserved1 : 2; /* RAZ */ + unsigned ppt_en : 1; /* See note above */ + unsigned uart1_en : 1; /* See note above */ + unsigned uart2_en : 1; /* See note above */ + unsigned fdc_en : 1; /* See note above */ + } by_field; +} SMC37c669_CR07; + +/* +** CR08 - default value 0x00 +*/ +typedef union _SMC37c669_CR08 { + unsigned char as_uchar; + struct { + unsigned zero : 4; /* 0 */ + unsigned addrx7_4 : 4; /* ADR<7:3> for ADRx decode */ + } by_field; +} SMC37c669_CR08; + +/* +** CR09 - default value 0x00 +** +** ADRx_CONFIG: +** 00 - ADRx disabled +** 01 - 1 byte decode A<3:0> = 0000b +** 10 - 8 byte block decode A<3:0> = 0XXXb +** 11 - 16 byte block decode A<3:0> = XXXXb +** +*/ +typedef union _SMC37c669_CR09 { + unsigned char as_uchar; + struct { + unsigned adra8 : 3; /* ADR<10:8> for ADRx decode */ + unsigned reserved1 : 3; + unsigned adrx_config : 2; /* See note above */ + } by_field; +} SMC37c669_CR09; + +/* +** CR0A - default value 0x00 +*/ +typedef union _SMC37c669_CR0A { + unsigned char as_uchar; + struct { + unsigned ecp_fifo_threshold : 4; + unsigned reserved1 : 4; + } by_field; +} SMC37c669_CR0A; + +/* +** CR0B - default value 0x00 +*/ +typedef union _SMC37c669_CR0B { + unsigned char as_uchar; + struct { + unsigned fdd0_drtx : 2; /* FDD0 Data Rate Table */ + unsigned fdd1_drtx : 2; /* FDD1 Data Rate Table */ + unsigned fdd2_drtx : 2; /* FDD2 Data Rate Table */ + unsigned fdd3_drtx : 2; /* FDD3 Data Rate Table */ + } by_field; +} SMC37c669_CR0B; + +/* +** CR0C - default value 0x00 +** +** UART2_MODE: +** 000 - Standard (default) +** 001 - IrDA (HPSIR) +** 010 - Amplitude Shift Keyed IR @500 KHz +** 011 - Reserved +** 1xx - Reserved +** +*/ +typedef union _SMC37c669_CR0C { + unsigned char as_uchar; + struct { + unsigned uart2_rcv_polarity : 1; /* 1 = invert RX */ + unsigned uart2_xmit_polarity : 1; /* 1 = invert TX */ + unsigned uart2_duplex : 1; /* 1 = full, 0 = half */ + unsigned uart2_mode : 3; /* See note above */ + unsigned uart1_speed : 1; /* 1 = high speed enabled */ + unsigned uart2_speed : 1; /* 1 = high speed enabled */ + } by_field; +} SMC37c669_CR0C; + +/* +** CR0D - default value 0x03 +** +** Device ID Register - read only +*/ +typedef union _SMC37c669_CR0D { + unsigned char as_uchar; + struct { + unsigned device_id : 8; /* Returns 0x3 in this field */ + } by_field; +} SMC37c669_CR0D; + +/* +** CR0E - default value 0x02 +** +** Device Revision Register - read only +*/ +typedef union _SMC37c669_CR0E { + unsigned char as_uchar; + struct { + unsigned device_rev : 8; /* Returns 0x2 in this field */ + } by_field; +} SMC37c669_CR0E; + +/* +** CR0F - default value 0x00 +*/ +typedef union _SMC37c669_CR0F { + unsigned char as_uchar; + struct { + unsigned test0 : 1; /* Reserved - set to 0 */ + unsigned test1 : 1; /* Reserved - set to 0 */ + unsigned test2 : 1; /* Reserved - set to 0 */ + unsigned test3 : 1; /* Reserved - set t0 0 */ + unsigned test4 : 1; /* Reserved - set to 0 */ + unsigned test5 : 1; /* Reserved - set t0 0 */ + unsigned test6 : 1; /* Reserved - set t0 0 */ + unsigned test7 : 1; /* Reserved - set to 0 */ + } by_field; +} SMC37c669_CR0F; + +/* +** CR10 - default value 0x00 +*/ +typedef union _SMC37c669_CR10 { + unsigned char as_uchar; + struct { + unsigned reserved1 : 3; /* RAZ */ + unsigned pll_gain : 1; /* 1 = 3V, 2 = 5V operation */ + unsigned pll_stop : 1; /* 1 = stop PLLs */ + unsigned ace_stop : 1; /* 1 = stop UART clocks */ + unsigned pll_clock_ctrl : 1; /* 0 = 14.318 MHz, 1 = 24 MHz */ + unsigned ir_test : 1; /* Enable IR test mode */ + } by_field; +} SMC37c669_CR10; + +/* +** CR11 - default value 0x00 +*/ +typedef union _SMC37c669_CR11 { + unsigned char as_uchar; + struct { + unsigned ir_loopback : 1; /* Internal IR loop back */ + unsigned test_10ms : 1; /* Test 10ms autopowerdown FDC timeout */ + unsigned reserved1 : 6; /* RAZ */ + } by_field; +} SMC37c669_CR11; + +/* +** CR12 - CR1D are reserved registers +*/ + +/* +** CR1E - default value 0x80 +** +** GAMECS: +** 00 - GAMECS disabled +** 01 - 1 byte decode ADR<3:0> = 0001b +** 10 - 8 byte block decode ADR<3:0> = 0XXXb +** 11 - 16 byte block decode ADR<3:0> = XXXXb +** +*/ +typedef union _SMC37c66_CR1E { + unsigned char as_uchar; + struct { + unsigned gamecs_config: 2; /* See note above */ + unsigned gamecs_addr9_4 : 6; /* GAMECS Addr<9:4> */ + } by_field; +} SMC37c669_CR1E; + +/* +** CR1F - default value 0x00 +** +** DT0 DT1 DRVDEN0 DRVDEN1 Drive Type +** --- --- ------- ------- ---------- +** 0 0 DENSEL DRATE0 4/2/1 MB 3.5" +** 2/1 MB 5.25" +** 2/1.6/1 MB 3.5" (3-mode) +** 0 1 DRATE1 DRATE0 +** 1 0 nDENSEL DRATE0 PS/2 +** 1 1 DRATE0 DRATE1 +** +** Note: DENSEL, DRATE1, and DRATE0 map onto two output +** pins - DRVDEN0 and DRVDEN1. +** +*/ +typedef union _SMC37c669_CR1F { + unsigned char as_uchar; + struct { + unsigned fdd0_drive_type : 2; /* FDD0 drive type */ + unsigned fdd1_drive_type : 2; /* FDD1 drive type */ + unsigned fdd2_drive_type : 2; /* FDD2 drive type */ + unsigned fdd3_drive_type : 2; /* FDD3 drive type */ + } by_field; +} SMC37c669_CR1F; + +/* +** CR20 - default value 0x3C +** +** FDC Base Address Register +** - To disable this decode set Addr<9:8> = 0 +** - A<10> = 0, A<3:0> = 0XXXb to access. +** +*/ +typedef union _SMC37c669_CR20 { + unsigned char as_uchar; + struct { + unsigned zero : 2; /* 0 */ + unsigned addr9_4 : 6; /* FDC Addr<9:4> */ + } by_field; +} SMC37c669_CR20; + +/* +** CR21 - default value 0x3C +** +** IDE Base Address Register +** - To disable this decode set Addr<9:8> = 0 +** - A<10> = 0, A<3:0> = 0XXXb to access. +** +*/ +typedef union _SMC37c669_CR21 { + unsigned char as_uchar; + struct { + unsigned zero : 2; /* 0 */ + unsigned addr9_4 : 6; /* IDE Addr<9:4> */ + } by_field; +} SMC37c669_CR21; + +/* +** CR22 - default value 0x3D +** +** IDE Alternate Status Base Address Register +** - To disable this decode set Addr<9:8> = 0 +** - A<10> = 0, A<3:0> = 0110b to access. +** +*/ +typedef union _SMC37c669_CR22 { + unsigned char as_uchar; + struct { + unsigned zero : 2; /* 0 */ + unsigned addr9_4 : 6; /* IDE Alt Status Addr<9:4> */ + } by_field; +} SMC37c669_CR22; + +/* +** CR23 - default value 0x00 +** +** Parallel Port Base Address Register +** - To disable this decode set Addr<9:8> = 0 +** - A<10> = 0 to access. +** - If EPP is enabled, A<2:0> = XXXb to access. +** If EPP is NOT enabled, A<1:0> = XXb to access +** +*/ +typedef union _SMC37c669_CR23 { + unsigned char as_uchar; + struct { + unsigned addr9_2 : 8; /* Parallel Port Addr<9:2> */ + } by_field; +} SMC37c669_CR23; + +/* +** CR24 - default value 0x00 +** +** UART1 Base Address Register +** - To disable this decode set Addr<9:8> = 0 +** - A<10> = 0, A<2:0> = XXXb to access. +** +*/ +typedef union _SMC37c669_CR24 { + unsigned char as_uchar; + struct { + unsigned zero : 1; /* 0 */ + unsigned addr9_3 : 7; /* UART1 Addr<9:3> */ + } by_field; +} SMC37c669_CR24; + +/* +** CR25 - default value 0x00 +** +** UART2 Base Address Register +** - To disable this decode set Addr<9:8> = 0 +** - A<10> = 0, A<2:0> = XXXb to access. +** +*/ +typedef union _SMC37c669_CR25 { + unsigned char as_uchar; + struct { + unsigned zero : 1; /* 0 */ + unsigned addr9_3 : 7; /* UART2 Addr<9:3> */ + } by_field; +} SMC37c669_CR25; + +/* +** CR26 - default value 0x00 +** +** Parallel Port / FDC DMA Select Register +** +** D3 - D0 DMA +** D7 - D4 Selected +** ------- -------- +** 0000 None +** 0001 DMA_A +** 0010 DMA_B +** 0011 DMA_C +** +*/ +typedef union _SMC37c669_CR26 { + unsigned char as_uchar; + struct { + unsigned ppt_drq : 4; /* See note above */ + unsigned fdc_drq : 4; /* See note above */ + } by_field; +} SMC37c669_CR26; + +/* +** CR27 - default value 0x00 +** +** Parallel Port / FDC IRQ Select Register +** +** D3 - D0 IRQ +** D7 - D4 Selected +** ------- -------- +** 0000 None +** 0001 IRQ_A +** 0010 IRQ_B +** 0011 IRQ_C +** 0100 IRQ_D +** 0101 IRQ_E +** 0110 IRQ_F +** 0111 Reserved +** 1000 IRQ_H +** +** Any unselected IRQ REQ is in tristate +** +*/ +typedef union _SMC37c669_CR27 { + unsigned char as_uchar; + struct { + unsigned ppt_irq : 4; /* See note above */ + unsigned fdc_irq : 4; /* See note above */ + } by_field; +} SMC37c669_CR27; + +/* +** CR28 - default value 0x00 +** +** UART IRQ Select Register +** +** D3 - D0 IRQ +** D7 - D4 Selected +** ------- -------- +** 0000 None +** 0001 IRQ_A +** 0010 IRQ_B +** 0011 IRQ_C +** 0100 IRQ_D +** 0101 IRQ_E +** 0110 IRQ_F +** 0111 Reserved +** 1000 IRQ_H +** 1111 share with UART1 (only for UART2) +** +** Any unselected IRQ REQ is in tristate +** +** To share an IRQ between UART1 and UART2, set +** UART1 to use the desired IRQ and set UART2 to +** 0xF to enable sharing mechanism. +** +*/ +typedef union _SMC37c669_CR28 { + unsigned char as_uchar; + struct { + unsigned uart2_irq : 4; /* See note above */ + unsigned uart1_irq : 4; /* See note above */ + } by_field; +} SMC37c669_CR28; + +/* +** CR29 - default value 0x00 +** +** IRQIN IRQ Select Register +** +** D3 - D0 IRQ +** D7 - D4 Selected +** ------- -------- +** 0000 None +** 0001 IRQ_A +** 0010 IRQ_B +** 0011 IRQ_C +** 0100 IRQ_D +** 0101 IRQ_E +** 0110 IRQ_F +** 0111 Reserved +** 1000 IRQ_H +** +** Any unselected IRQ REQ is in tristate +** +*/ +typedef union _SMC37c669_CR29 { + unsigned char as_uchar; + struct { + unsigned irqin_irq : 4; /* See note above */ + unsigned reserved1 : 4; /* RAZ */ + } by_field; +} SMC37c669_CR29; + +/* +** Aliases of Configuration Register formats (should match +** the set of index aliases). +** +** Note that CR24 and CR25 have the same format and are the +** base address registers for UART1 and UART2. Because of +** this we only define 1 alias here - for CR24 - as the serial +** base address register. +** +** Note that CR21 and CR22 have the same format and are the +** base address and alternate status address registers for +** the IDE controller. Because of this we only define 1 alias +** here - for CR21 - as the IDE address register. +** +*/ +typedef SMC37c669_CR0D SMC37c669_DEVICE_ID_REGISTER; +typedef SMC37c669_CR0E SMC37c669_DEVICE_REVISION_REGISTER; +typedef SMC37c669_CR20 SMC37c669_FDC_BASE_ADDRESS_REGISTER; +typedef SMC37c669_CR21 SMC37c669_IDE_ADDRESS_REGISTER; +typedef SMC37c669_CR23 SMC37c669_PARALLEL_BASE_ADDRESS_REGISTER; +typedef SMC37c669_CR24 SMC37c669_SERIAL_BASE_ADDRESS_REGISTER; +typedef SMC37c669_CR26 SMC37c669_PARALLEL_FDC_DRQ_REGISTER; +typedef SMC37c669_CR27 SMC37c669_PARALLEL_FDC_IRQ_REGISTER; +typedef SMC37c669_CR28 SMC37c669_SERIAL_IRQ_REGISTER; + +/* +** ISA/Device IRQ Translation Table Entry Definition +*/ +typedef struct _SMC37c669_IRQ_TRANSLATION_ENTRY { + int device_irq; + int isa_irq; +} SMC37c669_IRQ_TRANSLATION_ENTRY; + +/* +** ISA/Device DMA Translation Table Entry Definition +*/ +typedef struct _SMC37c669_DRQ_TRANSLATION_ENTRY { + int device_drq; + int isa_drq; +} SMC37c669_DRQ_TRANSLATION_ENTRY; + +/* +** External Interface Function Prototype Declarations +*/ + +SMC37c669_CONFIG_REGS *SMC37c669_detect( + int +); + +unsigned int SMC37c669_enable_device( + unsigned int func +); + +unsigned int SMC37c669_disable_device( + unsigned int func +); + +unsigned int SMC37c669_configure_device( + unsigned int func, + int port, + int irq, + int drq +); + +void SMC37c669_display_device_info( + void +); + +#endif /* __SMC37c669_H */ + +/* file: smcc669.c + * + * Copyright (C) 1997 by + * Digital Equipment Corporation, Maynard, Massachusetts. + * All rights reserved. + * + * This software is furnished under a license and may be used and copied + * only in accordance of the terms of such license and with the + * inclusion of the above copyright notice. This software or any other + * copies thereof may not be provided or otherwise made available to any + * other person. No title to and ownership of the software is hereby + * transferred. + * + * The information in this software is subject to change without notice + * and should not be construed as a commitment by digital equipment + * corporation. + * + * Digital assumes no responsibility for the use or reliability of its + * software on equipment which is not supplied by digital. + */ + +/* + *++ + * FACILITY: + * + * Alpha SRM Console Firmware + * + * MODULE DESCRIPTION: + * + * SMC37c669 Super I/O controller configuration routines. + * + * AUTHORS: + * + * Eric Rasmussen + * + * CREATION DATE: + * + * 28-Jan-1997 + * + * MODIFICATION HISTORY: + * + * er 01-May-1997 Fixed pointer conversion errors in + * SMC37c669_get_device_config(). + * er 28-Jan-1997 Initial version. + * + *-- + */ +#if 0 +/* $INCLUDE_OPTIONS$ */ +#include "cp$inc:platform_io.h" +/* $INCLUDE_OPTIONS_END$ */ +#include "cp$src:common.h" +#include "cp$inc:prototypes.h" +#include "cp$src:kernel_def.h" +#include "cp$src:msg_def.h" +#include "cp$src:smcc669_def.h" +/* Platform-specific includes */ +#include "cp$src:platform.h" +#endif + +#ifndef TRUE +#define TRUE 1 +#endif +#ifndef FALSE +#define FALSE 0 +#endif + +#define wb( _x_, _y_ ) outb( _y_, (unsigned int)((unsigned long)_x_) ) +#define rb( _x_ ) inb( (unsigned int)((unsigned long)_x_) ) + +/* +** Local storage for device configuration information. +** +** Since the SMC37c669 does not provide an explicit +** mechanism for enabling/disabling individual device +** functions, other than unmapping the device, local +** storage for device configuration information is +** allocated here for use in implementing our own +** function enable/disable scheme. +*/ +static struct DEVICE_CONFIG { + unsigned int port1; + unsigned int port2; + int irq; + int drq; +} local_config [NUM_FUNCS]; + +/* +** List of all possible addresses for the Super I/O chip +*/ +static unsigned long SMC37c669_Addresses[] __initdata = + { + 0x3F0UL, /* Primary address */ + 0x370UL, /* Secondary address */ + 0UL /* End of list */ + }; + +/* +** Global Pointer to the Super I/O device +*/ +static SMC37c669_CONFIG_REGS *SMC37c669 __initdata = NULL; + +/* +** IRQ Translation Table +** +** The IRQ translation table is a list of SMC37c669 device +** and standard ISA IRQs. +** +*/ +static SMC37c669_IRQ_TRANSLATION_ENTRY *SMC37c669_irq_table __initdata; + +/* +** The following definition is for the default IRQ +** translation table. +*/ +static SMC37c669_IRQ_TRANSLATION_ENTRY SMC37c669_default_irq_table[] +__initdata = + { + { SMC37c669_DEVICE_IRQ_A, -1 }, + { SMC37c669_DEVICE_IRQ_B, -1 }, + { SMC37c669_DEVICE_IRQ_C, 7 }, + { SMC37c669_DEVICE_IRQ_D, 6 }, + { SMC37c669_DEVICE_IRQ_E, 4 }, + { SMC37c669_DEVICE_IRQ_F, 3 }, + { SMC37c669_DEVICE_IRQ_H, -1 }, + { -1, -1 } /* End of table */ + }; + +/* +** The following definition is for the MONET (XP1000) IRQ +** translation table. +*/ +static SMC37c669_IRQ_TRANSLATION_ENTRY SMC37c669_monet_irq_table[] +__initdata = + { + { SMC37c669_DEVICE_IRQ_A, -1 }, + { SMC37c669_DEVICE_IRQ_B, -1 }, + { SMC37c669_DEVICE_IRQ_C, 6 }, + { SMC37c669_DEVICE_IRQ_D, 7 }, + { SMC37c669_DEVICE_IRQ_E, 4 }, + { SMC37c669_DEVICE_IRQ_F, 3 }, + { SMC37c669_DEVICE_IRQ_H, -1 }, + { -1, -1 } /* End of table */ + }; + +static SMC37c669_IRQ_TRANSLATION_ENTRY *SMC37c669_irq_tables[] __initdata = + { + SMC37c669_default_irq_table, + SMC37c669_monet_irq_table + }; + +/* +** DRQ Translation Table +** +** The DRQ translation table is a list of SMC37c669 device and +** ISA DMA channels. +** +*/ +static SMC37c669_DRQ_TRANSLATION_ENTRY *SMC37c669_drq_table __initdata; + +/* +** The following definition is the default DRQ +** translation table. +*/ +static SMC37c669_DRQ_TRANSLATION_ENTRY SMC37c669_default_drq_table[] +__initdata = + { + { SMC37c669_DEVICE_DRQ_A, 2 }, + { SMC37c669_DEVICE_DRQ_B, 3 }, + { SMC37c669_DEVICE_DRQ_C, -1 }, + { -1, -1 } /* End of table */ + }; + +/* +** Local Function Prototype Declarations +*/ + +static unsigned int SMC37c669_is_device_enabled( + unsigned int func +); + +#if 0 +static unsigned int SMC37c669_get_device_config( + unsigned int func, + int *port, + int *irq, + int *drq +); +#endif + +static void SMC37c669_config_mode( + unsigned int enable +); + +static unsigned char SMC37c669_read_config( + unsigned char index +); + +static void SMC37c669_write_config( + unsigned char index, + unsigned char data +); + +static void SMC37c669_init_local_config( void ); + +static struct DEVICE_CONFIG *SMC37c669_get_config( + unsigned int func +); + +static int SMC37c669_xlate_irq( + int irq +); + +static int SMC37c669_xlate_drq( + int drq +); + +static __cacheline_aligned DEFINE_SPINLOCK(smc_lock); + +/* +**++ +** FUNCTIONAL DESCRIPTION: +** +** This function detects the presence of an SMC37c669 Super I/O +** controller. +** +** FORMAL PARAMETERS: +** +** None +** +** RETURN VALUE: +** +** Returns a pointer to the device if found, otherwise, +** the NULL pointer is returned. +** +** SIDE EFFECTS: +** +** None +** +**-- +*/ +SMC37c669_CONFIG_REGS * __init SMC37c669_detect( int index ) +{ + int i; + SMC37c669_DEVICE_ID_REGISTER id; + + for ( i = 0; SMC37c669_Addresses[i] != 0; i++ ) { +/* +** Initialize the device pointer even though we don't yet know if +** the controller is at this address. The support functions access +** the controller through this device pointer so we need to set it +** even when we are looking ... +*/ + SMC37c669 = ( SMC37c669_CONFIG_REGS * )SMC37c669_Addresses[i]; +/* +** Enter configuration mode +*/ + SMC37c669_config_mode( TRUE ); +/* +** Read the device id +*/ + id.as_uchar = SMC37c669_read_config( SMC37c669_DEVICE_ID_INDEX ); +/* +** Exit configuration mode +*/ + SMC37c669_config_mode( FALSE ); +/* +** Does the device id match? If so, assume we have found an +** SMC37c669 controller at this address. +*/ + if ( id.by_field.device_id == SMC37c669_DEVICE_ID ) { +/* +** Initialize the IRQ and DRQ translation tables. +*/ + SMC37c669_irq_table = SMC37c669_irq_tables[ index ]; + SMC37c669_drq_table = SMC37c669_default_drq_table; +/* +** erfix +** +** If the platform can't use the IRQ and DRQ defaults set up in this +** file, it should call a platform-specific external routine at this +** point to reset the IRQ and DRQ translation table pointers to point +** at the appropriate tables for the platform. If the defaults are +** acceptable, then the external routine should do nothing. +*/ + +/* +** Put the chip back into configuration mode +*/ + SMC37c669_config_mode( TRUE ); +/* +** Initialize local storage for configuration information +*/ + SMC37c669_init_local_config( ); +/* +** Exit configuration mode +*/ + SMC37c669_config_mode( FALSE ); +/* +** SMC37c669 controller found, break out of search loop +*/ + break; + } + else { +/* +** Otherwise, we did not find an SMC37c669 controller at this +** address so set the device pointer to NULL. +*/ + SMC37c669 = NULL; + } + } + return SMC37c669; +} + + +/* +**++ +** FUNCTIONAL DESCRIPTION: +** +** This function enables an SMC37c669 device function. +** +** FORMAL PARAMETERS: +** +** func: +** Which device function to enable +** +** RETURN VALUE: +** +** Returns TRUE is the device function was enabled, otherwise, FALSE +** +** SIDE EFFECTS: +** +** {@description or none@} +** +** DESIGN: +** +** Enabling a device function in the SMC37c669 controller involves +** setting all of its mappings (port, irq, drq ...). A local +** "shadow" copy of the device configuration is kept so we can +** just set each mapping to what the local copy says. +** +** This function ALWAYS updates the local shadow configuration of +** the device function being enabled, even if the device is always +** enabled. To avoid replication of code, functions such as +** configure_device set up the local copy and then call this +** function to the update the real device. +** +**-- +*/ +unsigned int __init SMC37c669_enable_device ( unsigned int func ) +{ + unsigned int ret_val = FALSE; +/* +** Put the device into configuration mode +*/ + SMC37c669_config_mode( TRUE ); + switch ( func ) { + case SERIAL_0: + { + SMC37c669_SERIAL_BASE_ADDRESS_REGISTER base_addr; + SMC37c669_SERIAL_IRQ_REGISTER irq; +/* +** Enable the serial 1 IRQ mapping +*/ + irq.as_uchar = + SMC37c669_read_config( SMC37c669_SERIAL_IRQ_INDEX ); + + irq.by_field.uart1_irq = + SMC37c669_RAW_DEVICE_IRQ( + SMC37c669_xlate_irq( local_config[ func ].irq ) + ); + + SMC37c669_write_config( SMC37c669_SERIAL_IRQ_INDEX, irq.as_uchar ); +/* +** Enable the serial 1 port base address mapping +*/ + base_addr.as_uchar = 0; + base_addr.by_field.addr9_3 = local_config[ func ].port1 >> 3; + + SMC37c669_write_config( + SMC37c669_SERIAL0_BASE_ADDRESS_INDEX, + base_addr.as_uchar + ); + ret_val = TRUE; + break; + } + case SERIAL_1: + { + SMC37c669_SERIAL_BASE_ADDRESS_REGISTER base_addr; + SMC37c669_SERIAL_IRQ_REGISTER irq; +/* +** Enable the serial 2 IRQ mapping +*/ + irq.as_uchar = + SMC37c669_read_config( SMC37c669_SERIAL_IRQ_INDEX ); + + irq.by_field.uart2_irq = + SMC37c669_RAW_DEVICE_IRQ( + SMC37c669_xlate_irq( local_config[ func ].irq ) + ); + + SMC37c669_write_config( SMC37c669_SERIAL_IRQ_INDEX, irq.as_uchar ); +/* +** Enable the serial 2 port base address mapping +*/ + base_addr.as_uchar = 0; + base_addr.by_field.addr9_3 = local_config[ func ].port1 >> 3; + + SMC37c669_write_config( + SMC37c669_SERIAL1_BASE_ADDRESS_INDEX, + base_addr.as_uchar + ); + ret_val = TRUE; + break; + } + case PARALLEL_0: + { + SMC37c669_PARALLEL_BASE_ADDRESS_REGISTER base_addr; + SMC37c669_PARALLEL_FDC_IRQ_REGISTER irq; + SMC37c669_PARALLEL_FDC_DRQ_REGISTER drq; +/* +** Enable the parallel port DMA channel mapping +*/ + drq.as_uchar = + SMC37c669_read_config( SMC37c669_PARALLEL_FDC_DRQ_INDEX ); + + drq.by_field.ppt_drq = + SMC37c669_RAW_DEVICE_DRQ( + SMC37c669_xlate_drq( local_config[ func ].drq ) + ); + + SMC37c669_write_config( + SMC37c669_PARALLEL_FDC_DRQ_INDEX, + drq.as_uchar + ); +/* +** Enable the parallel port IRQ mapping +*/ + irq.as_uchar = + SMC37c669_read_config( SMC37c669_PARALLEL_FDC_IRQ_INDEX ); + + irq.by_field.ppt_irq = + SMC37c669_RAW_DEVICE_IRQ( + SMC37c669_xlate_irq( local_config[ func ].irq ) + ); + + SMC37c669_write_config( + SMC37c669_PARALLEL_FDC_IRQ_INDEX, + irq.as_uchar + ); +/* +** Enable the parallel port base address mapping +*/ + base_addr.as_uchar = 0; + base_addr.by_field.addr9_2 = local_config[ func ].port1 >> 2; + + SMC37c669_write_config( + SMC37c669_PARALLEL0_BASE_ADDRESS_INDEX, + base_addr.as_uchar + ); + ret_val = TRUE; + break; + } + case FLOPPY_0: + { + SMC37c669_FDC_BASE_ADDRESS_REGISTER base_addr; + SMC37c669_PARALLEL_FDC_IRQ_REGISTER irq; + SMC37c669_PARALLEL_FDC_DRQ_REGISTER drq; +/* +** Enable the floppy controller DMA channel mapping +*/ + drq.as_uchar = + SMC37c669_read_config( SMC37c669_PARALLEL_FDC_DRQ_INDEX ); + + drq.by_field.fdc_drq = + SMC37c669_RAW_DEVICE_DRQ( + SMC37c669_xlate_drq( local_config[ func ].drq ) + ); + + SMC37c669_write_config( + SMC37c669_PARALLEL_FDC_DRQ_INDEX, + drq.as_uchar + ); +/* +** Enable the floppy controller IRQ mapping +*/ + irq.as_uchar = + SMC37c669_read_config( SMC37c669_PARALLEL_FDC_IRQ_INDEX ); + + irq.by_field.fdc_irq = + SMC37c669_RAW_DEVICE_IRQ( + SMC37c669_xlate_irq( local_config[ func ].irq ) + ); + + SMC37c669_write_config( + SMC37c669_PARALLEL_FDC_IRQ_INDEX, + irq.as_uchar + ); +/* +** Enable the floppy controller base address mapping +*/ + base_addr.as_uchar = 0; + base_addr.by_field.addr9_4 = local_config[ func ].port1 >> 4; + + SMC37c669_write_config( + SMC37c669_FDC_BASE_ADDRESS_INDEX, + base_addr.as_uchar + ); + ret_val = TRUE; + break; + } + case IDE_0: + { + SMC37c669_IDE_ADDRESS_REGISTER ide_addr; +/* +** Enable the IDE alternate status base address mapping +*/ + ide_addr.as_uchar = 0; + ide_addr.by_field.addr9_4 = local_config[ func ].port2 >> 4; + + SMC37c669_write_config( + SMC37c669_IDE_ALTERNATE_ADDRESS_INDEX, + ide_addr.as_uchar + ); +/* +** Enable the IDE controller base address mapping +*/ + ide_addr.as_uchar = 0; + ide_addr.by_field.addr9_4 = local_config[ func ].port1 >> 4; + + SMC37c669_write_config( + SMC37c669_IDE_BASE_ADDRESS_INDEX, + ide_addr.as_uchar + ); + ret_val = TRUE; + break; + } + } +/* +** Exit configuration mode and return +*/ + SMC37c669_config_mode( FALSE ); + + return ret_val; +} + + +/* +**++ +** FUNCTIONAL DESCRIPTION: +** +** This function disables a device function within the +** SMC37c669 Super I/O controller. +** +** FORMAL PARAMETERS: +** +** func: +** Which function to disable +** +** RETURN VALUE: +** +** Return TRUE if the device function was disabled, otherwise, FALSE +** +** SIDE EFFECTS: +** +** {@description or none@} +** +** DESIGN: +** +** Disabling a function in the SMC37c669 device involves +** disabling all the function's mappings (port, irq, drq ...). +** A shadow copy of the device configuration is maintained +** in local storage so we won't worry aboving saving the +** current configuration information. +** +**-- +*/ +unsigned int __init SMC37c669_disable_device ( unsigned int func ) +{ + unsigned int ret_val = FALSE; + +/* +** Put the device into configuration mode +*/ + SMC37c669_config_mode( TRUE ); + switch ( func ) { + case SERIAL_0: + { + SMC37c669_SERIAL_BASE_ADDRESS_REGISTER base_addr; + SMC37c669_SERIAL_IRQ_REGISTER irq; +/* +** Disable the serial 1 IRQ mapping +*/ + irq.as_uchar = + SMC37c669_read_config( SMC37c669_SERIAL_IRQ_INDEX ); + + irq.by_field.uart1_irq = 0; + + SMC37c669_write_config( SMC37c669_SERIAL_IRQ_INDEX, irq.as_uchar ); +/* +** Disable the serial 1 port base address mapping +*/ + base_addr.as_uchar = 0; + SMC37c669_write_config( + SMC37c669_SERIAL0_BASE_ADDRESS_INDEX, + base_addr.as_uchar + ); + ret_val = TRUE; + break; + } + case SERIAL_1: + { + SMC37c669_SERIAL_BASE_ADDRESS_REGISTER base_addr; + SMC37c669_SERIAL_IRQ_REGISTER irq; +/* +** Disable the serial 2 IRQ mapping +*/ + irq.as_uchar = + SMC37c669_read_config( SMC37c669_SERIAL_IRQ_INDEX ); + + irq.by_field.uart2_irq = 0; + + SMC37c669_write_config( SMC37c669_SERIAL_IRQ_INDEX, irq.as_uchar ); +/* +** Disable the serial 2 port base address mapping +*/ + base_addr.as_uchar = 0; + + SMC37c669_write_config( + SMC37c669_SERIAL1_BASE_ADDRESS_INDEX, + base_addr.as_uchar + ); + ret_val = TRUE; + break; + } + case PARALLEL_0: + { + SMC37c669_PARALLEL_BASE_ADDRESS_REGISTER base_addr; + SMC37c669_PARALLEL_FDC_IRQ_REGISTER irq; + SMC37c669_PARALLEL_FDC_DRQ_REGISTER drq; +/* +** Disable the parallel port DMA channel mapping +*/ + drq.as_uchar = + SMC37c669_read_config( SMC37c669_PARALLEL_FDC_DRQ_INDEX ); + + drq.by_field.ppt_drq = 0; + + SMC37c669_write_config( + SMC37c669_PARALLEL_FDC_DRQ_INDEX, + drq.as_uchar + ); +/* +** Disable the parallel port IRQ mapping +*/ + irq.as_uchar = + SMC37c669_read_config( SMC37c669_PARALLEL_FDC_IRQ_INDEX ); + + irq.by_field.ppt_irq = 0; + + SMC37c669_write_config( + SMC37c669_PARALLEL_FDC_IRQ_INDEX, + irq.as_uchar + ); +/* +** Disable the parallel port base address mapping +*/ + base_addr.as_uchar = 0; + + SMC37c669_write_config( + SMC37c669_PARALLEL0_BASE_ADDRESS_INDEX, + base_addr.as_uchar + ); + ret_val = TRUE; + break; + } + case FLOPPY_0: + { + SMC37c669_FDC_BASE_ADDRESS_REGISTER base_addr; + SMC37c669_PARALLEL_FDC_IRQ_REGISTER irq; + SMC37c669_PARALLEL_FDC_DRQ_REGISTER drq; +/* +** Disable the floppy controller DMA channel mapping +*/ + drq.as_uchar = + SMC37c669_read_config( SMC37c669_PARALLEL_FDC_DRQ_INDEX ); + + drq.by_field.fdc_drq = 0; + + SMC37c669_write_config( + SMC37c669_PARALLEL_FDC_DRQ_INDEX, + drq.as_uchar + ); +/* +** Disable the floppy controller IRQ mapping +*/ + irq.as_uchar = + SMC37c669_read_config( SMC37c669_PARALLEL_FDC_IRQ_INDEX ); + + irq.by_field.fdc_irq = 0; + + SMC37c669_write_config( + SMC37c669_PARALLEL_FDC_IRQ_INDEX, + irq.as_uchar + ); +/* +** Disable the floppy controller base address mapping +*/ + base_addr.as_uchar = 0; + + SMC37c669_write_config( + SMC37c669_FDC_BASE_ADDRESS_INDEX, + base_addr.as_uchar + ); + ret_val = TRUE; + break; + } + case IDE_0: + { + SMC37c669_IDE_ADDRESS_REGISTER ide_addr; +/* +** Disable the IDE alternate status base address mapping +*/ + ide_addr.as_uchar = 0; + + SMC37c669_write_config( + SMC37c669_IDE_ALTERNATE_ADDRESS_INDEX, + ide_addr.as_uchar + ); +/* +** Disable the IDE controller base address mapping +*/ + ide_addr.as_uchar = 0; + + SMC37c669_write_config( + SMC37c669_IDE_BASE_ADDRESS_INDEX, + ide_addr.as_uchar + ); + ret_val = TRUE; + break; + } + } +/* +** Exit configuration mode and return +*/ + SMC37c669_config_mode( FALSE ); + + return ret_val; +} + + +/* +**++ +** FUNCTIONAL DESCRIPTION: +** +** This function configures a device function within the +** SMC37c669 Super I/O controller. +** +** FORMAL PARAMETERS: +** +** func: +** Which device function +** +** port: +** I/O port for the function to use +** +** irq: +** IRQ for the device function to use +** +** drq: +** DMA channel for the device function to use +** +** RETURN VALUE: +** +** Returns TRUE if the device function was configured, +** otherwise, FALSE. +** +** SIDE EFFECTS: +** +** {@description or none@} +** +** DESIGN: +** +** If this function returns TRUE, the local shadow copy of +** the configuration is also updated. If the device function +** is currently disabled, only the local shadow copy is +** updated and the actual device function will be updated +** if/when it is enabled. +** +**-- +*/ +unsigned int __init SMC37c669_configure_device ( + unsigned int func, + int port, + int irq, + int drq ) +{ + struct DEVICE_CONFIG *cp; + +/* +** Check for a valid configuration +*/ + if ( ( cp = SMC37c669_get_config ( func ) ) != NULL ) { +/* +** Configuration is valid, update the local shadow copy +*/ + if ( ( drq & ~0xFF ) == 0 ) { + cp->drq = drq; + } + if ( ( irq & ~0xFF ) == 0 ) { + cp->irq = irq; + } + if ( ( port & ~0xFFFF ) == 0 ) { + cp->port1 = port; + } +/* +** If the device function is enabled, update the actual +** device configuration. +*/ + if ( SMC37c669_is_device_enabled( func ) ) { + SMC37c669_enable_device( func ); + } + return TRUE; + } + return FALSE; +} + + +/* +**++ +** FUNCTIONAL DESCRIPTION: +** +** This function determines whether a device function +** within the SMC37c669 controller is enabled. +** +** FORMAL PARAMETERS: +** +** func: +** Which device function +** +** RETURN VALUE: +** +** Returns TRUE if the device function is enabled, otherwise, FALSE +** +** SIDE EFFECTS: +** +** {@description or none@} +** +** DESIGN: +** +** To check whether a device is enabled we will only look at +** the port base address mapping. According to the SMC37c669 +** specification, all of the port base address mappings are +** disabled if the addr<9:8> (bits <7:6> of the register) are +** zero. +** +**-- +*/ +static unsigned int __init SMC37c669_is_device_enabled ( unsigned int func ) +{ + unsigned char base_addr = 0; + unsigned int dev_ok = FALSE; + unsigned int ret_val = FALSE; +/* +** Enter configuration mode +*/ + SMC37c669_config_mode( TRUE ); + + switch ( func ) { + case SERIAL_0: + base_addr = + SMC37c669_read_config( SMC37c669_SERIAL0_BASE_ADDRESS_INDEX ); + dev_ok = TRUE; + break; + case SERIAL_1: + base_addr = + SMC37c669_read_config( SMC37c669_SERIAL1_BASE_ADDRESS_INDEX ); + dev_ok = TRUE; + break; + case PARALLEL_0: + base_addr = + SMC37c669_read_config( SMC37c669_PARALLEL0_BASE_ADDRESS_INDEX ); + dev_ok = TRUE; + break; + case FLOPPY_0: + base_addr = + SMC37c669_read_config( SMC37c669_FDC_BASE_ADDRESS_INDEX ); + dev_ok = TRUE; + break; + case IDE_0: + base_addr = + SMC37c669_read_config( SMC37c669_IDE_BASE_ADDRESS_INDEX ); + dev_ok = TRUE; + break; + } +/* +** If we have a valid device, check base_addr<7:6> to see if the +** device is enabled (mapped). +*/ + if ( ( dev_ok ) && ( ( base_addr & 0xC0 ) != 0 ) ) { +/* +** The mapping is not disabled, so assume that the function is +** enabled. +*/ + ret_val = TRUE; + } +/* +** Exit configuration mode +*/ + SMC37c669_config_mode( FALSE ); + + return ret_val; +} + + +#if 0 +/* +**++ +** FUNCTIONAL DESCRIPTION: +** +** This function retrieves the configuration information of a +** device function within the SMC37c699 Super I/O controller. +** +** FORMAL PARAMETERS: +** +** func: +** Which device function +** +** port: +** I/O port returned +** +** irq: +** IRQ returned +** +** drq: +** DMA channel returned +** +** RETURN VALUE: +** +** Returns TRUE if the device configuration was successfully +** retrieved, otherwise, FALSE. +** +** SIDE EFFECTS: +** +** The data pointed to by the port, irq, and drq parameters +** my be modified even if the configuration is not successfully +** retrieved. +** +** DESIGN: +** +** The device configuration is fetched from the local shadow +** copy. Any unused parameters will be set to -1. Any +** parameter which is not desired can specify the NULL +** pointer. +** +**-- +*/ +static unsigned int __init SMC37c669_get_device_config ( + unsigned int func, + int *port, + int *irq, + int *drq ) +{ + struct DEVICE_CONFIG *cp; + unsigned int ret_val = FALSE; +/* +** Check for a valid device configuration +*/ + if ( ( cp = SMC37c669_get_config( func ) ) != NULL ) { + if ( drq != NULL ) { + *drq = cp->drq; + ret_val = TRUE; + } + if ( irq != NULL ) { + *irq = cp->irq; + ret_val = TRUE; + } + if ( port != NULL ) { + *port = cp->port1; + ret_val = TRUE; + } + } + return ret_val; +} +#endif + + +/* +**++ +** FUNCTIONAL DESCRIPTION: +** +** This function displays the current state of the SMC37c699 +** Super I/O controller's device functions. +** +** FORMAL PARAMETERS: +** +** None +** +** RETURN VALUE: +** +** None +** +** SIDE EFFECTS: +** +** None +** +**-- +*/ +void __init SMC37c669_display_device_info ( void ) +{ + if ( SMC37c669_is_device_enabled( SERIAL_0 ) ) { + printk( " Serial 0: Enabled [ Port 0x%x, IRQ %d ]\n", + local_config[ SERIAL_0 ].port1, + local_config[ SERIAL_0 ].irq + ); + } + else { + printk( " Serial 0: Disabled\n" ); + } + + if ( SMC37c669_is_device_enabled( SERIAL_1 ) ) { + printk( " Serial 1: Enabled [ Port 0x%x, IRQ %d ]\n", + local_config[ SERIAL_1 ].port1, + local_config[ SERIAL_1 ].irq + ); + } + else { + printk( " Serial 1: Disabled\n" ); + } + + if ( SMC37c669_is_device_enabled( PARALLEL_0 ) ) { + printk( " Parallel: Enabled [ Port 0x%x, IRQ %d/%d ]\n", + local_config[ PARALLEL_0 ].port1, + local_config[ PARALLEL_0 ].irq, + local_config[ PARALLEL_0 ].drq + ); + } + else { + printk( " Parallel: Disabled\n" ); + } + + if ( SMC37c669_is_device_enabled( FLOPPY_0 ) ) { + printk( " Floppy Ctrl: Enabled [ Port 0x%x, IRQ %d/%d ]\n", + local_config[ FLOPPY_0 ].port1, + local_config[ FLOPPY_0 ].irq, + local_config[ FLOPPY_0 ].drq + ); + } + else { + printk( " Floppy Ctrl: Disabled\n" ); + } + + if ( SMC37c669_is_device_enabled( IDE_0 ) ) { + printk( " IDE 0: Enabled [ Port 0x%x, IRQ %d ]\n", + local_config[ IDE_0 ].port1, + local_config[ IDE_0 ].irq + ); + } + else { + printk( " IDE 0: Disabled\n" ); + } +} + + +/* +**++ +** FUNCTIONAL DESCRIPTION: +** +** This function puts the SMC37c669 Super I/O controller into, +** and takes it out of, configuration mode. +** +** FORMAL PARAMETERS: +** +** enable: +** TRUE to enter configuration mode, FALSE to exit. +** +** RETURN VALUE: +** +** None +** +** SIDE EFFECTS: +** +** The SMC37c669 controller may be left in configuration mode. +** +**-- +*/ +static void __init SMC37c669_config_mode( + unsigned int enable ) +{ + if ( enable ) { +/* +** To enter configuration mode, two writes in succession to the index +** port are required. If a write to another address or port occurs +** between these two writes, the chip does not enter configuration +** mode. Therefore, a spinlock is placed around the two writes to +** guarantee that they complete uninterrupted. +*/ + spin_lock(&smc_lock); + wb( &SMC37c669->index_port, SMC37c669_CONFIG_ON_KEY ); + wb( &SMC37c669->index_port, SMC37c669_CONFIG_ON_KEY ); + spin_unlock(&smc_lock); + } + else { + wb( &SMC37c669->index_port, SMC37c669_CONFIG_OFF_KEY ); + } +} + +/* +**++ +** FUNCTIONAL DESCRIPTION: +** +** This function reads an SMC37c669 Super I/O controller +** configuration register. This function assumes that the +** device is already in configuration mode. +** +** FORMAL PARAMETERS: +** +** index: +** Index value of configuration register to read +** +** RETURN VALUE: +** +** Data read from configuration register +** +** SIDE EFFECTS: +** +** None +** +**-- +*/ +static unsigned char __init SMC37c669_read_config( + unsigned char index ) +{ + unsigned char data; + + wb( &SMC37c669->index_port, index ); + data = rb( &SMC37c669->data_port ); + return data; +} + +/* +**++ +** FUNCTIONAL DESCRIPTION: +** +** This function writes an SMC37c669 Super I/O controller +** configuration register. This function assumes that the +** device is already in configuration mode. +** +** FORMAL PARAMETERS: +** +** index: +** Index of configuration register to write +** +** data: +** Data to be written +** +** RETURN VALUE: +** +** None +** +** SIDE EFFECTS: +** +** None +** +**-- +*/ +static void __init SMC37c669_write_config( + unsigned char index, + unsigned char data ) +{ + wb( &SMC37c669->index_port, index ); + wb( &SMC37c669->data_port, data ); +} + + +/* +**++ +** FUNCTIONAL DESCRIPTION: +** +** This function initializes the local device +** configuration storage. This function assumes +** that the device is already in configuration +** mode. +** +** FORMAL PARAMETERS: +** +** None +** +** RETURN VALUE: +** +** None +** +** SIDE EFFECTS: +** +** Local storage for device configuration information +** is initialized. +** +**-- +*/ +static void __init SMC37c669_init_local_config ( void ) +{ + SMC37c669_SERIAL_BASE_ADDRESS_REGISTER uart_base; + SMC37c669_SERIAL_IRQ_REGISTER uart_irqs; + SMC37c669_PARALLEL_BASE_ADDRESS_REGISTER ppt_base; + SMC37c669_PARALLEL_FDC_IRQ_REGISTER ppt_fdc_irqs; + SMC37c669_PARALLEL_FDC_DRQ_REGISTER ppt_fdc_drqs; + SMC37c669_FDC_BASE_ADDRESS_REGISTER fdc_base; + SMC37c669_IDE_ADDRESS_REGISTER ide_base; + SMC37c669_IDE_ADDRESS_REGISTER ide_alt; + +/* +** Get serial port 1 base address +*/ + uart_base.as_uchar = + SMC37c669_read_config( SMC37c669_SERIAL0_BASE_ADDRESS_INDEX ); +/* +** Get IRQs for serial ports 1 & 2 +*/ + uart_irqs.as_uchar = + SMC37c669_read_config( SMC37c669_SERIAL_IRQ_INDEX ); +/* +** Store local configuration information for serial port 1 +*/ + local_config[SERIAL_0].port1 = uart_base.by_field.addr9_3 << 3; + local_config[SERIAL_0].irq = + SMC37c669_xlate_irq( + SMC37c669_DEVICE_IRQ( uart_irqs.by_field.uart1_irq ) + ); +/* +** Get serial port 2 base address +*/ + uart_base.as_uchar = + SMC37c669_read_config( SMC37c669_SERIAL1_BASE_ADDRESS_INDEX ); +/* +** Store local configuration information for serial port 2 +*/ + local_config[SERIAL_1].port1 = uart_base.by_field.addr9_3 << 3; + local_config[SERIAL_1].irq = + SMC37c669_xlate_irq( + SMC37c669_DEVICE_IRQ( uart_irqs.by_field.uart2_irq ) + ); +/* +** Get parallel port base address +*/ + ppt_base.as_uchar = + SMC37c669_read_config( SMC37c669_PARALLEL0_BASE_ADDRESS_INDEX ); +/* +** Get IRQs for parallel port and floppy controller +*/ + ppt_fdc_irqs.as_uchar = + SMC37c669_read_config( SMC37c669_PARALLEL_FDC_IRQ_INDEX ); +/* +** Get DRQs for parallel port and floppy controller +*/ + ppt_fdc_drqs.as_uchar = + SMC37c669_read_config( SMC37c669_PARALLEL_FDC_DRQ_INDEX ); +/* +** Store local configuration information for parallel port +*/ + local_config[PARALLEL_0].port1 = ppt_base.by_field.addr9_2 << 2; + local_config[PARALLEL_0].irq = + SMC37c669_xlate_irq( + SMC37c669_DEVICE_IRQ( ppt_fdc_irqs.by_field.ppt_irq ) + ); + local_config[PARALLEL_0].drq = + SMC37c669_xlate_drq( + SMC37c669_DEVICE_DRQ( ppt_fdc_drqs.by_field.ppt_drq ) + ); +/* +** Get floppy controller base address +*/ + fdc_base.as_uchar = + SMC37c669_read_config( SMC37c669_FDC_BASE_ADDRESS_INDEX ); +/* +** Store local configuration information for floppy controller +*/ + local_config[FLOPPY_0].port1 = fdc_base.by_field.addr9_4 << 4; + local_config[FLOPPY_0].irq = + SMC37c669_xlate_irq( + SMC37c669_DEVICE_IRQ( ppt_fdc_irqs.by_field.fdc_irq ) + ); + local_config[FLOPPY_0].drq = + SMC37c669_xlate_drq( + SMC37c669_DEVICE_DRQ( ppt_fdc_drqs.by_field.fdc_drq ) + ); +/* +** Get IDE controller base address +*/ + ide_base.as_uchar = + SMC37c669_read_config( SMC37c669_IDE_BASE_ADDRESS_INDEX ); +/* +** Get IDE alternate status base address +*/ + ide_alt.as_uchar = + SMC37c669_read_config( SMC37c669_IDE_ALTERNATE_ADDRESS_INDEX ); +/* +** Store local configuration information for IDE controller +*/ + local_config[IDE_0].port1 = ide_base.by_field.addr9_4 << 4; + local_config[IDE_0].port2 = ide_alt.by_field.addr9_4 << 4; + local_config[IDE_0].irq = 14; +} + + +/* +**++ +** FUNCTIONAL DESCRIPTION: +** +** This function returns a pointer to the local shadow +** configuration of the requested device function. +** +** FORMAL PARAMETERS: +** +** func: +** Which device function +** +** RETURN VALUE: +** +** Returns a pointer to the DEVICE_CONFIG structure for the +** requested function, otherwise, NULL. +** +** SIDE EFFECTS: +** +** {@description or none@} +** +**-- +*/ +static struct DEVICE_CONFIG * __init SMC37c669_get_config( unsigned int func ) +{ + struct DEVICE_CONFIG *cp = NULL; + + switch ( func ) { + case SERIAL_0: + cp = &local_config[ SERIAL_0 ]; + break; + case SERIAL_1: + cp = &local_config[ SERIAL_1 ]; + break; + case PARALLEL_0: + cp = &local_config[ PARALLEL_0 ]; + break; + case FLOPPY_0: + cp = &local_config[ FLOPPY_0 ]; + break; + case IDE_0: + cp = &local_config[ IDE_0 ]; + break; + } + return cp; +} + +/* +**++ +** FUNCTIONAL DESCRIPTION: +** +** This function translates IRQs back and forth between ISA +** IRQs and SMC37c669 device IRQs. +** +** FORMAL PARAMETERS: +** +** irq: +** The IRQ to translate +** +** RETURN VALUE: +** +** Returns the translated IRQ, otherwise, returns -1. +** +** SIDE EFFECTS: +** +** {@description or none@} +** +**-- +*/ +static int __init SMC37c669_xlate_irq ( int irq ) +{ + int i, translated_irq = -1; + + if ( SMC37c669_IS_DEVICE_IRQ( irq ) ) { +/* +** We are translating a device IRQ to an ISA IRQ +*/ + for ( i = 0; ( SMC37c669_irq_table[i].device_irq != -1 ) || ( SMC37c669_irq_table[i].isa_irq != -1 ); i++ ) { + if ( irq == SMC37c669_irq_table[i].device_irq ) { + translated_irq = SMC37c669_irq_table[i].isa_irq; + break; + } + } + } + else { +/* +** We are translating an ISA IRQ to a device IRQ +*/ + for ( i = 0; ( SMC37c669_irq_table[i].isa_irq != -1 ) || ( SMC37c669_irq_table[i].device_irq != -1 ); i++ ) { + if ( irq == SMC37c669_irq_table[i].isa_irq ) { + translated_irq = SMC37c669_irq_table[i].device_irq; + break; + } + } + } + return translated_irq; +} + + +/* +**++ +** FUNCTIONAL DESCRIPTION: +** +** This function translates DMA channels back and forth between +** ISA DMA channels and SMC37c669 device DMA channels. +** +** FORMAL PARAMETERS: +** +** drq: +** The DMA channel to translate +** +** RETURN VALUE: +** +** Returns the translated DMA channel, otherwise, returns -1 +** +** SIDE EFFECTS: +** +** {@description or none@} +** +**-- +*/ +static int __init SMC37c669_xlate_drq ( int drq ) +{ + int i, translated_drq = -1; + + if ( SMC37c669_IS_DEVICE_DRQ( drq ) ) { +/* +** We are translating a device DMA channel to an ISA DMA channel +*/ + for ( i = 0; ( SMC37c669_drq_table[i].device_drq != -1 ) || ( SMC37c669_drq_table[i].isa_drq != -1 ); i++ ) { + if ( drq == SMC37c669_drq_table[i].device_drq ) { + translated_drq = SMC37c669_drq_table[i].isa_drq; + break; + } + } + } + else { +/* +** We are translating an ISA DMA channel to a device DMA channel +*/ + for ( i = 0; ( SMC37c669_drq_table[i].isa_drq != -1 ) || ( SMC37c669_drq_table[i].device_drq != -1 ); i++ ) { + if ( drq == SMC37c669_drq_table[i].isa_drq ) { + translated_drq = SMC37c669_drq_table[i].device_drq; + break; + } + } + } + return translated_drq; +} + +#if 0 +int __init smcc669_init ( void ) +{ + struct INODE *ip; + + allocinode( smc_ddb.name, 1, &ip ); + ip->dva = &smc_ddb; + ip->attr = ATTR$M_WRITE | ATTR$M_READ; + ip->len[0] = 0x30; + ip->misc = 0; + INODE_UNLOCK( ip ); + + return msg_success; +} + +int __init smcc669_open( struct FILE *fp, char *info, char *next, char *mode ) +{ + struct INODE *ip; +/* +** Allow multiple readers but only one writer. ip->misc keeps track +** of the number of writers +*/ + ip = fp->ip; + INODE_LOCK( ip ); + if ( fp->mode & ATTR$M_WRITE ) { + if ( ip->misc ) { + INODE_UNLOCK( ip ); + return msg_failure; /* too many writers */ + } + ip->misc++; + } +/* +** Treat the information field as a byte offset +*/ + *fp->offset = xtoi( info ); + INODE_UNLOCK( ip ); + + return msg_success; +} + +int __init smcc669_close( struct FILE *fp ) +{ + struct INODE *ip; + + ip = fp->ip; + if ( fp->mode & ATTR$M_WRITE ) { + INODE_LOCK( ip ); + ip->misc--; + INODE_UNLOCK( ip ); + } + return msg_success; +} + +int __init smcc669_read( struct FILE *fp, int size, int number, unsigned char *buf ) +{ + int i; + int length; + int nbytes; + struct INODE *ip; + +/* +** Always access a byte at a time +*/ + ip = fp->ip; + length = size * number; + nbytes = 0; + + SMC37c669_config_mode( TRUE ); + for ( i = 0; i < length; i++ ) { + if ( !inrange( *fp->offset, 0, ip->len[0] ) ) + break; + *buf++ = SMC37c669_read_config( *fp->offset ); + *fp->offset += 1; + nbytes++; + } + SMC37c669_config_mode( FALSE ); + return nbytes; +} + +int __init smcc669_write( struct FILE *fp, int size, int number, unsigned char *buf ) +{ + int i; + int length; + int nbytes; + struct INODE *ip; +/* +** Always access a byte at a time +*/ + ip = fp->ip; + length = size * number; + nbytes = 0; + + SMC37c669_config_mode( TRUE ); + for ( i = 0; i < length; i++ ) { + if ( !inrange( *fp->offset, 0, ip->len[0] ) ) + break; + SMC37c669_write_config( *fp->offset, *buf ); + *fp->offset += 1; + buf++; + nbytes++; + } + SMC37c669_config_mode( FALSE ); + return nbytes; +} +#endif + +void __init +SMC37c669_dump_registers(void) +{ + int i; + for (i = 0; i <= 0x29; i++) + printk("-- CR%02x : %02x\n", i, SMC37c669_read_config(i)); +} +/*+ + * ============================================================================ + * = SMC_init - SMC37c669 Super I/O controller initialization = + * ============================================================================ + * + * OVERVIEW: + * + * This routine configures and enables device functions on the + * SMC37c669 Super I/O controller. + * + * FORM OF CALL: + * + * SMC_init( ); + * + * RETURNS: + * + * Nothing + * + * ARGUMENTS: + * + * None + * + * SIDE EFFECTS: + * + * None + * + */ +void __init SMC669_Init ( int index ) +{ + SMC37c669_CONFIG_REGS *SMC_base; + unsigned long flags; + + local_irq_save(flags); + if ( ( SMC_base = SMC37c669_detect( index ) ) != NULL ) { +#if SMC_DEBUG + SMC37c669_config_mode( TRUE ); + SMC37c669_dump_registers( ); + SMC37c669_config_mode( FALSE ); + SMC37c669_display_device_info( ); +#endif + SMC37c669_disable_device( SERIAL_0 ); + SMC37c669_configure_device( + SERIAL_0, + COM1_BASE, + COM1_IRQ, + -1 + ); + SMC37c669_enable_device( SERIAL_0 ); + + SMC37c669_disable_device( SERIAL_1 ); + SMC37c669_configure_device( + SERIAL_1, + COM2_BASE, + COM2_IRQ, + -1 + ); + SMC37c669_enable_device( SERIAL_1 ); + + SMC37c669_disable_device( PARALLEL_0 ); + SMC37c669_configure_device( + PARALLEL_0, + PARP_BASE, + PARP_IRQ, + PARP_DRQ + ); + SMC37c669_enable_device( PARALLEL_0 ); + + SMC37c669_disable_device( FLOPPY_0 ); + SMC37c669_configure_device( + FLOPPY_0, + FDC_BASE, + FDC_IRQ, + FDC_DRQ + ); + SMC37c669_enable_device( FLOPPY_0 ); + + /* Wake up sometimes forgotten floppy, especially on DP264. */ + outb(0xc, 0x3f2); + + SMC37c669_disable_device( IDE_0 ); + +#if SMC_DEBUG + SMC37c669_config_mode( TRUE ); + SMC37c669_dump_registers( ); + SMC37c669_config_mode( FALSE ); + SMC37c669_display_device_info( ); +#endif + local_irq_restore(flags); + printk( "SMC37c669 Super I/O Controller found @ 0x%p\n", + SMC_base ); + } + else { + local_irq_restore(flags); +#if SMC_DEBUG + printk( "No SMC37c669 Super I/O Controller found\n" ); +#endif + } +} diff --git a/arch/alpha/kernel/smc37c93x.c b/arch/alpha/kernel/smc37c93x.c new file mode 100644 index 00000000..6886b834 --- /dev/null +++ b/arch/alpha/kernel/smc37c93x.c @@ -0,0 +1,274 @@ +/* + * SMC 37C93X initialization code + */ + +#include <linux/kernel.h> + +#include <linux/mm.h> +#include <linux/init.h> +#include <linux/delay.h> + +#include <asm/hwrpb.h> +#include <asm/io.h> +#include <asm/segment.h> + +#define SMC_DEBUG 0 + +#if SMC_DEBUG +# define DBG_DEVS(args) printk args +#else +# define DBG_DEVS(args) +#endif + +#define KB 1024 +#define MB (1024*KB) +#define GB (1024*MB) + +/* device "activate" register contents */ +#define DEVICE_ON 1 +#define DEVICE_OFF 0 + +/* configuration on/off keys */ +#define CONFIG_ON_KEY 0x55 +#define CONFIG_OFF_KEY 0xaa + +/* configuration space device definitions */ +#define FDC 0 +#define IDE1 1 +#define IDE2 2 +#define PARP 3 +#define SER1 4 +#define SER2 5 +#define RTCL 6 +#define KYBD 7 +#define AUXIO 8 + +/* Chip register offsets from base */ +#define CONFIG_CONTROL 0x02 +#define INDEX_ADDRESS 0x03 +#define LOGICAL_DEVICE_NUMBER 0x07 +#define DEVICE_ID 0x20 +#define DEVICE_REV 0x21 +#define POWER_CONTROL 0x22 +#define POWER_MGMT 0x23 +#define OSC 0x24 + +#define ACTIVATE 0x30 +#define ADDR_HI 0x60 +#define ADDR_LO 0x61 +#define INTERRUPT_SEL 0x70 +#define INTERRUPT_SEL_2 0x72 /* KYBD/MOUS only */ +#define DMA_CHANNEL_SEL 0x74 /* FDC/PARP only */ + +#define FDD_MODE_REGISTER 0x90 +#define FDD_OPTION_REGISTER 0x91 + +/* values that we read back that are expected ... */ +#define VALID_DEVICE_ID 2 + +/* default device addresses */ +#define KYBD_INTERRUPT 1 +#define MOUS_INTERRUPT 12 +#define COM2_BASE 0x2f8 +#define COM2_INTERRUPT 3 +#define COM1_BASE 0x3f8 +#define COM1_INTERRUPT 4 +#define PARP_BASE 0x3bc +#define PARP_INTERRUPT 7 + +static unsigned long __init SMCConfigState(unsigned long baseAddr) +{ + unsigned char devId; + + unsigned long configPort; + unsigned long indexPort; + unsigned long dataPort; + + int i; + + configPort = indexPort = baseAddr; + dataPort = configPort + 1; + +#define NUM_RETRIES 5 + + for (i = 0; i < NUM_RETRIES; i++) + { + outb(CONFIG_ON_KEY, configPort); + outb(CONFIG_ON_KEY, configPort); + outb(DEVICE_ID, indexPort); + devId = inb(dataPort); + if (devId == VALID_DEVICE_ID) { + outb(DEVICE_REV, indexPort); + /* unsigned char devRev = */ inb(dataPort); + break; + } + else + udelay(100); + } + return (i != NUM_RETRIES) ? baseAddr : 0L; +} + +static void __init SMCRunState(unsigned long baseAddr) +{ + outb(CONFIG_OFF_KEY, baseAddr); +} + +static unsigned long __init SMCDetectUltraIO(void) +{ + unsigned long baseAddr; + + baseAddr = 0x3F0; + if ( ( baseAddr = SMCConfigState( baseAddr ) ) == 0x3F0 ) { + return( baseAddr ); + } + baseAddr = 0x370; + if ( ( baseAddr = SMCConfigState( baseAddr ) ) == 0x370 ) { + return( baseAddr ); + } + return( ( unsigned long )0 ); +} + +static void __init SMCEnableDevice(unsigned long baseAddr, + unsigned long device, + unsigned long portaddr, + unsigned long interrupt) +{ + unsigned long indexPort; + unsigned long dataPort; + + indexPort = baseAddr; + dataPort = baseAddr + 1; + + outb(LOGICAL_DEVICE_NUMBER, indexPort); + outb(device, dataPort); + + outb(ADDR_LO, indexPort); + outb(( portaddr & 0xFF ), dataPort); + + outb(ADDR_HI, indexPort); + outb((portaddr >> 8) & 0xFF, dataPort); + + outb(INTERRUPT_SEL, indexPort); + outb(interrupt, dataPort); + + outb(ACTIVATE, indexPort); + outb(DEVICE_ON, dataPort); +} + +static void __init SMCEnableKYBD(unsigned long baseAddr) +{ + unsigned long indexPort; + unsigned long dataPort; + + indexPort = baseAddr; + dataPort = baseAddr + 1; + + outb(LOGICAL_DEVICE_NUMBER, indexPort); + outb(KYBD, dataPort); + + outb(INTERRUPT_SEL, indexPort); /* Primary interrupt select */ + outb(KYBD_INTERRUPT, dataPort); + + outb(INTERRUPT_SEL_2, indexPort); /* Secondary interrupt select */ + outb(MOUS_INTERRUPT, dataPort); + + outb(ACTIVATE, indexPort); + outb(DEVICE_ON, dataPort); +} + +static void __init SMCEnableFDC(unsigned long baseAddr) +{ + unsigned long indexPort; + unsigned long dataPort; + + unsigned char oldValue; + + indexPort = baseAddr; + dataPort = baseAddr + 1; + + outb(LOGICAL_DEVICE_NUMBER, indexPort); + outb(FDC, dataPort); + + outb(FDD_MODE_REGISTER, indexPort); + oldValue = inb(dataPort); + + oldValue |= 0x0E; /* Enable burst mode */ + outb(oldValue, dataPort); + + outb(INTERRUPT_SEL, indexPort); /* Primary interrupt select */ + outb(0x06, dataPort ); + + outb(DMA_CHANNEL_SEL, indexPort); /* DMA channel select */ + outb(0x02, dataPort); + + outb(ACTIVATE, indexPort); + outb(DEVICE_ON, dataPort); +} + +#if SMC_DEBUG +static void __init SMCReportDeviceStatus(unsigned long baseAddr) +{ + unsigned long indexPort; + unsigned long dataPort; + unsigned char currentControl; + + indexPort = baseAddr; + dataPort = baseAddr + 1; + + outb(POWER_CONTROL, indexPort); + currentControl = inb(dataPort); + + printk(currentControl & (1 << FDC) + ? "\t+FDC Enabled\n" : "\t-FDC Disabled\n"); + printk(currentControl & (1 << IDE1) + ? "\t+IDE1 Enabled\n" : "\t-IDE1 Disabled\n"); + printk(currentControl & (1 << IDE2) + ? "\t+IDE2 Enabled\n" : "\t-IDE2 Disabled\n"); + printk(currentControl & (1 << PARP) + ? "\t+PARP Enabled\n" : "\t-PARP Disabled\n"); + printk(currentControl & (1 << SER1) + ? "\t+SER1 Enabled\n" : "\t-SER1 Disabled\n"); + printk(currentControl & (1 << SER2) + ? "\t+SER2 Enabled\n" : "\t-SER2 Disabled\n"); + + printk( "\n" ); +} +#endif + +int __init SMC93x_Init(void) +{ + unsigned long SMCUltraBase; + unsigned long flags; + + local_irq_save(flags); + if ((SMCUltraBase = SMCDetectUltraIO()) != 0UL) { +#if SMC_DEBUG + SMCReportDeviceStatus(SMCUltraBase); +#endif + SMCEnableDevice(SMCUltraBase, SER1, COM1_BASE, COM1_INTERRUPT); + DBG_DEVS(("SMC FDC37C93X: SER1 done\n")); + SMCEnableDevice(SMCUltraBase, SER2, COM2_BASE, COM2_INTERRUPT); + DBG_DEVS(("SMC FDC37C93X: SER2 done\n")); + SMCEnableDevice(SMCUltraBase, PARP, PARP_BASE, PARP_INTERRUPT); + DBG_DEVS(("SMC FDC37C93X: PARP done\n")); + /* On PC164, IDE on the SMC is not enabled; + CMD646 (PCI) on MB */ + SMCEnableKYBD(SMCUltraBase); + DBG_DEVS(("SMC FDC37C93X: KYB done\n")); + SMCEnableFDC(SMCUltraBase); + DBG_DEVS(("SMC FDC37C93X: FDC done\n")); +#if SMC_DEBUG + SMCReportDeviceStatus(SMCUltraBase); +#endif + SMCRunState(SMCUltraBase); + local_irq_restore(flags); + printk("SMC FDC37C93X Ultra I/O Controller found @ 0x%lx\n", + SMCUltraBase); + return 1; + } + else { + local_irq_restore(flags); + DBG_DEVS(("No SMC FDC37C93X Ultra I/O Controller found\n")); + return 0; + } +} diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c new file mode 100644 index 00000000..50d438db --- /dev/null +++ b/arch/alpha/kernel/smp.c @@ -0,0 +1,825 @@ +/* + * linux/arch/alpha/kernel/smp.c + * + * 2001-07-09 Phil Ezolt (Phillip.Ezolt@compaq.com) + * Renamed modified smp_call_function to smp_call_function_on_cpu() + * Created an function that conforms to the old calling convention + * of smp_call_function(). + * + * This is helpful for DCPI. + * + */ + +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/err.h> +#include <linux/threads.h> +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/spinlock.h> +#include <linux/irq.h> +#include <linux/cache.h> +#include <linux/profile.h> +#include <linux/bitops.h> +#include <linux/cpu.h> + +#include <asm/hwrpb.h> +#include <asm/ptrace.h> +#include <linux/atomic.h> + +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/mmu_context.h> +#include <asm/tlbflush.h> + +#include "proto.h" +#include "irq_impl.h" + + +#define DEBUG_SMP 0 +#if DEBUG_SMP +#define DBGS(args) printk args +#else +#define DBGS(args) +#endif + +/* A collection of per-processor data. */ +struct cpuinfo_alpha cpu_data[NR_CPUS]; +EXPORT_SYMBOL(cpu_data); + +/* A collection of single bit ipi messages. */ +static struct { + unsigned long bits ____cacheline_aligned; +} ipi_data[NR_CPUS] __cacheline_aligned; + +enum ipi_message_type { + IPI_RESCHEDULE, + IPI_CALL_FUNC, + IPI_CALL_FUNC_SINGLE, + IPI_CPU_STOP, +}; + +/* Set to a secondary's cpuid when it comes online. */ +static int smp_secondary_alive __devinitdata = 0; + +int smp_num_probed; /* Internal processor count */ +int smp_num_cpus = 1; /* Number that came online. */ +EXPORT_SYMBOL(smp_num_cpus); + +/* + * Called by both boot and secondaries to move global data into + * per-processor storage. + */ +static inline void __init +smp_store_cpu_info(int cpuid) +{ + cpu_data[cpuid].loops_per_jiffy = loops_per_jiffy; + cpu_data[cpuid].last_asn = ASN_FIRST_VERSION; + cpu_data[cpuid].need_new_asn = 0; + cpu_data[cpuid].asn_lock = 0; +} + +/* + * Ideally sets up per-cpu profiling hooks. Doesn't do much now... + */ +static inline void __init +smp_setup_percpu_timer(int cpuid) +{ + cpu_data[cpuid].prof_counter = 1; + cpu_data[cpuid].prof_multiplier = 1; +} + +static void __init +wait_boot_cpu_to_stop(int cpuid) +{ + unsigned long stop = jiffies + 10*HZ; + + while (time_before(jiffies, stop)) { + if (!smp_secondary_alive) + return; + barrier(); + } + + printk("wait_boot_cpu_to_stop: FAILED on CPU %d, hanging now\n", cpuid); + for (;;) + barrier(); +} + +/* + * Where secondaries begin a life of C. + */ +void __cpuinit +smp_callin(void) +{ + int cpuid = hard_smp_processor_id(); + + if (cpu_online(cpuid)) { + printk("??, cpu 0x%x already present??\n", cpuid); + BUG(); + } + set_cpu_online(cpuid, true); + + /* Turn on machine checks. */ + wrmces(7); + + /* Set trap vectors. */ + trap_init(); + + /* Set interrupt vector. */ + wrent(entInt, 0); + + /* Get our local ticker going. */ + smp_setup_percpu_timer(cpuid); + + /* Call platform-specific callin, if specified */ + if (alpha_mv.smp_callin) alpha_mv.smp_callin(); + + /* All kernel threads share the same mm context. */ + atomic_inc(&init_mm.mm_count); + current->active_mm = &init_mm; + + /* inform the notifiers about the new cpu */ + notify_cpu_starting(cpuid); + + /* Must have completely accurate bogos. */ + local_irq_enable(); + + /* Wait boot CPU to stop with irq enabled before running + calibrate_delay. */ + wait_boot_cpu_to_stop(cpuid); + mb(); + calibrate_delay(); + + smp_store_cpu_info(cpuid); + /* Allow master to continue only after we written loops_per_jiffy. */ + wmb(); + smp_secondary_alive = 1; + + DBGS(("smp_callin: commencing CPU %d current %p active_mm %p\n", + cpuid, current, current->active_mm)); + + /* Do nothing. */ + cpu_idle(); +} + +/* Wait until hwrpb->txrdy is clear for cpu. Return -1 on timeout. */ +static int __devinit +wait_for_txrdy (unsigned long cpumask) +{ + unsigned long timeout; + + if (!(hwrpb->txrdy & cpumask)) + return 0; + + timeout = jiffies + 10*HZ; + while (time_before(jiffies, timeout)) { + if (!(hwrpb->txrdy & cpumask)) + return 0; + udelay(10); + barrier(); + } + + return -1; +} + +/* + * Send a message to a secondary's console. "START" is one such + * interesting message. ;-) + */ +static void __cpuinit +send_secondary_console_msg(char *str, int cpuid) +{ + struct percpu_struct *cpu; + register char *cp1, *cp2; + unsigned long cpumask; + size_t len; + + cpu = (struct percpu_struct *) + ((char*)hwrpb + + hwrpb->processor_offset + + cpuid * hwrpb->processor_size); + + cpumask = (1UL << cpuid); + if (wait_for_txrdy(cpumask)) + goto timeout; + + cp2 = str; + len = strlen(cp2); + *(unsigned int *)&cpu->ipc_buffer[0] = len; + cp1 = (char *) &cpu->ipc_buffer[1]; + memcpy(cp1, cp2, len); + + /* atomic test and set */ + wmb(); + set_bit(cpuid, &hwrpb->rxrdy); + + if (wait_for_txrdy(cpumask)) + goto timeout; + return; + + timeout: + printk("Processor %x not ready\n", cpuid); +} + +/* + * A secondary console wants to send a message. Receive it. + */ +static void +recv_secondary_console_msg(void) +{ + int mycpu, i, cnt; + unsigned long txrdy = hwrpb->txrdy; + char *cp1, *cp2, buf[80]; + struct percpu_struct *cpu; + + DBGS(("recv_secondary_console_msg: TXRDY 0x%lx.\n", txrdy)); + + mycpu = hard_smp_processor_id(); + + for (i = 0; i < NR_CPUS; i++) { + if (!(txrdy & (1UL << i))) + continue; + + DBGS(("recv_secondary_console_msg: " + "TXRDY contains CPU %d.\n", i)); + + cpu = (struct percpu_struct *) + ((char*)hwrpb + + hwrpb->processor_offset + + i * hwrpb->processor_size); + + DBGS(("recv_secondary_console_msg: on %d from %d" + " HALT_REASON 0x%lx FLAGS 0x%lx\n", + mycpu, i, cpu->halt_reason, cpu->flags)); + + cnt = cpu->ipc_buffer[0] >> 32; + if (cnt <= 0 || cnt >= 80) + strcpy(buf, "<<< BOGUS MSG >>>"); + else { + cp1 = (char *) &cpu->ipc_buffer[11]; + cp2 = buf; + strcpy(cp2, cp1); + + while ((cp2 = strchr(cp2, '\r')) != 0) { + *cp2 = ' '; + if (cp2[1] == '\n') + cp2[1] = ' '; + } + } + + DBGS((KERN_INFO "recv_secondary_console_msg: on %d " + "message is '%s'\n", mycpu, buf)); + } + + hwrpb->txrdy = 0; +} + +/* + * Convince the console to have a secondary cpu begin execution. + */ +static int __cpuinit +secondary_cpu_start(int cpuid, struct task_struct *idle) +{ + struct percpu_struct *cpu; + struct pcb_struct *hwpcb, *ipcb; + unsigned long timeout; + + cpu = (struct percpu_struct *) + ((char*)hwrpb + + hwrpb->processor_offset + + cpuid * hwrpb->processor_size); + hwpcb = (struct pcb_struct *) cpu->hwpcb; + ipcb = &task_thread_info(idle)->pcb; + + /* Initialize the CPU's HWPCB to something just good enough for + us to get started. Immediately after starting, we'll swpctx + to the target idle task's pcb. Reuse the stack in the mean + time. Precalculate the target PCBB. */ + hwpcb->ksp = (unsigned long)ipcb + sizeof(union thread_union) - 16; + hwpcb->usp = 0; + hwpcb->ptbr = ipcb->ptbr; + hwpcb->pcc = 0; + hwpcb->asn = 0; + hwpcb->unique = virt_to_phys(ipcb); + hwpcb->flags = ipcb->flags; + hwpcb->res1 = hwpcb->res2 = 0; + +#if 0 + DBGS(("KSP 0x%lx PTBR 0x%lx VPTBR 0x%lx UNIQUE 0x%lx\n", + hwpcb->ksp, hwpcb->ptbr, hwrpb->vptb, hwpcb->unique)); +#endif + DBGS(("Starting secondary cpu %d: state 0x%lx pal_flags 0x%lx\n", + cpuid, idle->state, ipcb->flags)); + + /* Setup HWRPB fields that SRM uses to activate secondary CPU */ + hwrpb->CPU_restart = __smp_callin; + hwrpb->CPU_restart_data = (unsigned long) __smp_callin; + + /* Recalculate and update the HWRPB checksum */ + hwrpb_update_checksum(hwrpb); + + /* + * Send a "start" command to the specified processor. + */ + + /* SRM III 3.4.1.3 */ + cpu->flags |= 0x22; /* turn on Context Valid and Restart Capable */ + cpu->flags &= ~1; /* turn off Bootstrap In Progress */ + wmb(); + + send_secondary_console_msg("START\r\n", cpuid); + + /* Wait 10 seconds for an ACK from the console. */ + timeout = jiffies + 10*HZ; + while (time_before(jiffies, timeout)) { + if (cpu->flags & 1) + goto started; + udelay(10); + barrier(); + } + printk(KERN_ERR "SMP: Processor %d failed to start.\n", cpuid); + return -1; + + started: + DBGS(("secondary_cpu_start: SUCCESS for CPU %d!!!\n", cpuid)); + return 0; +} + +/* + * Bring one cpu online. + */ +static int __cpuinit +smp_boot_one_cpu(int cpuid) +{ + struct task_struct *idle; + unsigned long timeout; + + /* Cook up an idler for this guy. Note that the address we + give to kernel_thread is irrelevant -- it's going to start + where HWRPB.CPU_restart says to start. But this gets all + the other task-y sort of data structures set up like we + wish. We can't use kernel_thread since we must avoid + rescheduling the child. */ + idle = fork_idle(cpuid); + if (IS_ERR(idle)) + panic("failed fork for CPU %d", cpuid); + + DBGS(("smp_boot_one_cpu: CPU %d state 0x%lx flags 0x%lx\n", + cpuid, idle->state, idle->flags)); + + /* Signal the secondary to wait a moment. */ + smp_secondary_alive = -1; + + /* Whirrr, whirrr, whirrrrrrrrr... */ + if (secondary_cpu_start(cpuid, idle)) + return -1; + + /* Notify the secondary CPU it can run calibrate_delay. */ + mb(); + smp_secondary_alive = 0; + + /* We've been acked by the console; wait one second for + the task to start up for real. */ + timeout = jiffies + 1*HZ; + while (time_before(jiffies, timeout)) { + if (smp_secondary_alive == 1) + goto alive; + udelay(10); + barrier(); + } + + /* We failed to boot the CPU. */ + + printk(KERN_ERR "SMP: Processor %d is stuck.\n", cpuid); + return -1; + + alive: + /* Another "Red Snapper". */ + return 0; +} + +/* + * Called from setup_arch. Detect an SMP system and which processors + * are present. + */ +void __init +setup_smp(void) +{ + struct percpu_struct *cpubase, *cpu; + unsigned long i; + + if (boot_cpuid != 0) { + printk(KERN_WARNING "SMP: Booting off cpu %d instead of 0?\n", + boot_cpuid); + } + + if (hwrpb->nr_processors > 1) { + int boot_cpu_palrev; + + DBGS(("setup_smp: nr_processors %ld\n", + hwrpb->nr_processors)); + + cpubase = (struct percpu_struct *) + ((char*)hwrpb + hwrpb->processor_offset); + boot_cpu_palrev = cpubase->pal_revision; + + for (i = 0; i < hwrpb->nr_processors; i++) { + cpu = (struct percpu_struct *) + ((char *)cpubase + i*hwrpb->processor_size); + if ((cpu->flags & 0x1cc) == 0x1cc) { + smp_num_probed++; + set_cpu_possible(i, true); + set_cpu_present(i, true); + cpu->pal_revision = boot_cpu_palrev; + } + + DBGS(("setup_smp: CPU %d: flags 0x%lx type 0x%lx\n", + i, cpu->flags, cpu->type)); + DBGS(("setup_smp: CPU %d: PAL rev 0x%lx\n", + i, cpu->pal_revision)); + } + } else { + smp_num_probed = 1; + } + + printk(KERN_INFO "SMP: %d CPUs probed -- cpu_present_mask = %lx\n", + smp_num_probed, cpumask_bits(cpu_present_mask)[0]); +} + +/* + * Called by smp_init prepare the secondaries + */ +void __init +smp_prepare_cpus(unsigned int max_cpus) +{ + /* Take care of some initial bookkeeping. */ + memset(ipi_data, 0, sizeof(ipi_data)); + + current_thread_info()->cpu = boot_cpuid; + + smp_store_cpu_info(boot_cpuid); + smp_setup_percpu_timer(boot_cpuid); + + /* Nothing to do on a UP box, or when told not to. */ + if (smp_num_probed == 1 || max_cpus == 0) { + init_cpu_possible(cpumask_of(boot_cpuid)); + init_cpu_present(cpumask_of(boot_cpuid)); + printk(KERN_INFO "SMP mode deactivated.\n"); + return; + } + + printk(KERN_INFO "SMP starting up secondaries.\n"); + + smp_num_cpus = smp_num_probed; +} + +void __devinit +smp_prepare_boot_cpu(void) +{ +} + +int __cpuinit +__cpu_up(unsigned int cpu) +{ + smp_boot_one_cpu(cpu); + + return cpu_online(cpu) ? 0 : -ENOSYS; +} + +void __init +smp_cpus_done(unsigned int max_cpus) +{ + int cpu; + unsigned long bogosum = 0; + + for(cpu = 0; cpu < NR_CPUS; cpu++) + if (cpu_online(cpu)) + bogosum += cpu_data[cpu].loops_per_jiffy; + + printk(KERN_INFO "SMP: Total of %d processors activated " + "(%lu.%02lu BogoMIPS).\n", + num_online_cpus(), + (bogosum + 2500) / (500000/HZ), + ((bogosum + 2500) / (5000/HZ)) % 100); +} + + +void +smp_percpu_timer_interrupt(struct pt_regs *regs) +{ + struct pt_regs *old_regs; + int cpu = smp_processor_id(); + unsigned long user = user_mode(regs); + struct cpuinfo_alpha *data = &cpu_data[cpu]; + + old_regs = set_irq_regs(regs); + + /* Record kernel PC. */ + profile_tick(CPU_PROFILING); + + if (!--data->prof_counter) { + /* We need to make like a normal interrupt -- otherwise + timer interrupts ignore the global interrupt lock, + which would be a Bad Thing. */ + irq_enter(); + + update_process_times(user); + + data->prof_counter = data->prof_multiplier; + + irq_exit(); + } + set_irq_regs(old_regs); +} + +int +setup_profiling_timer(unsigned int multiplier) +{ + return -EINVAL; +} + + +static void +send_ipi_message(const struct cpumask *to_whom, enum ipi_message_type operation) +{ + int i; + + mb(); + for_each_cpu(i, to_whom) + set_bit(operation, &ipi_data[i].bits); + + mb(); + for_each_cpu(i, to_whom) + wripir(i); +} + +void +handle_ipi(struct pt_regs *regs) +{ + int this_cpu = smp_processor_id(); + unsigned long *pending_ipis = &ipi_data[this_cpu].bits; + unsigned long ops; + +#if 0 + DBGS(("handle_ipi: on CPU %d ops 0x%lx PC 0x%lx\n", + this_cpu, *pending_ipis, regs->pc)); +#endif + + mb(); /* Order interrupt and bit testing. */ + while ((ops = xchg(pending_ipis, 0)) != 0) { + mb(); /* Order bit clearing and data access. */ + do { + unsigned long which; + + which = ops & -ops; + ops &= ~which; + which = __ffs(which); + + switch (which) { + case IPI_RESCHEDULE: + scheduler_ipi(); + break; + + case IPI_CALL_FUNC: + generic_smp_call_function_interrupt(); + break; + + case IPI_CALL_FUNC_SINGLE: + generic_smp_call_function_single_interrupt(); + break; + + case IPI_CPU_STOP: + halt(); + + default: + printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", + this_cpu, which); + break; + } + } while (ops); + + mb(); /* Order data access and bit testing. */ + } + + cpu_data[this_cpu].ipi_count++; + + if (hwrpb->txrdy) + recv_secondary_console_msg(); +} + +void +smp_send_reschedule(int cpu) +{ +#ifdef DEBUG_IPI_MSG + if (cpu == hard_smp_processor_id()) + printk(KERN_WARNING + "smp_send_reschedule: Sending IPI to self.\n"); +#endif + send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE); +} + +void +smp_send_stop(void) +{ + cpumask_t to_whom; + cpumask_copy(&to_whom, cpu_possible_mask); + cpumask_clear_cpu(smp_processor_id(), &to_whom); +#ifdef DEBUG_IPI_MSG + if (hard_smp_processor_id() != boot_cpu_id) + printk(KERN_WARNING "smp_send_stop: Not on boot cpu.\n"); +#endif + send_ipi_message(&to_whom, IPI_CPU_STOP); +} + +void arch_send_call_function_ipi_mask(const struct cpumask *mask) +{ + send_ipi_message(mask, IPI_CALL_FUNC); +} + +void arch_send_call_function_single_ipi(int cpu) +{ + send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); +} + +static void +ipi_imb(void *ignored) +{ + imb(); +} + +void +smp_imb(void) +{ + /* Must wait other processors to flush their icache before continue. */ + if (on_each_cpu(ipi_imb, NULL, 1)) + printk(KERN_CRIT "smp_imb: timed out\n"); +} +EXPORT_SYMBOL(smp_imb); + +static void +ipi_flush_tlb_all(void *ignored) +{ + tbia(); +} + +void +flush_tlb_all(void) +{ + /* Although we don't have any data to pass, we do want to + synchronize with the other processors. */ + if (on_each_cpu(ipi_flush_tlb_all, NULL, 1)) { + printk(KERN_CRIT "flush_tlb_all: timed out\n"); + } +} + +#define asn_locked() (cpu_data[smp_processor_id()].asn_lock) + +static void +ipi_flush_tlb_mm(void *x) +{ + struct mm_struct *mm = (struct mm_struct *) x; + if (mm == current->active_mm && !asn_locked()) + flush_tlb_current(mm); + else + flush_tlb_other(mm); +} + +void +flush_tlb_mm(struct mm_struct *mm) +{ + preempt_disable(); + + if (mm == current->active_mm) { + flush_tlb_current(mm); + if (atomic_read(&mm->mm_users) <= 1) { + int cpu, this_cpu = smp_processor_id(); + for (cpu = 0; cpu < NR_CPUS; cpu++) { + if (!cpu_online(cpu) || cpu == this_cpu) + continue; + if (mm->context[cpu]) + mm->context[cpu] = 0; + } + preempt_enable(); + return; + } + } + + if (smp_call_function(ipi_flush_tlb_mm, mm, 1)) { + printk(KERN_CRIT "flush_tlb_mm: timed out\n"); + } + + preempt_enable(); +} +EXPORT_SYMBOL(flush_tlb_mm); + +struct flush_tlb_page_struct { + struct vm_area_struct *vma; + struct mm_struct *mm; + unsigned long addr; +}; + +static void +ipi_flush_tlb_page(void *x) +{ + struct flush_tlb_page_struct *data = (struct flush_tlb_page_struct *)x; + struct mm_struct * mm = data->mm; + + if (mm == current->active_mm && !asn_locked()) + flush_tlb_current_page(mm, data->vma, data->addr); + else + flush_tlb_other(mm); +} + +void +flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) +{ + struct flush_tlb_page_struct data; + struct mm_struct *mm = vma->vm_mm; + + preempt_disable(); + + if (mm == current->active_mm) { + flush_tlb_current_page(mm, vma, addr); + if (atomic_read(&mm->mm_users) <= 1) { + int cpu, this_cpu = smp_processor_id(); + for (cpu = 0; cpu < NR_CPUS; cpu++) { + if (!cpu_online(cpu) || cpu == this_cpu) + continue; + if (mm->context[cpu]) + mm->context[cpu] = 0; + } + preempt_enable(); + return; + } + } + + data.vma = vma; + data.mm = mm; + data.addr = addr; + + if (smp_call_function(ipi_flush_tlb_page, &data, 1)) { + printk(KERN_CRIT "flush_tlb_page: timed out\n"); + } + + preempt_enable(); +} +EXPORT_SYMBOL(flush_tlb_page); + +void +flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) +{ + /* On the Alpha we always flush the whole user tlb. */ + flush_tlb_mm(vma->vm_mm); +} +EXPORT_SYMBOL(flush_tlb_range); + +static void +ipi_flush_icache_page(void *x) +{ + struct mm_struct *mm = (struct mm_struct *) x; + if (mm == current->active_mm && !asn_locked()) + __load_new_mm_context(mm); + else + flush_tlb_other(mm); +} + +void +flush_icache_user_range(struct vm_area_struct *vma, struct page *page, + unsigned long addr, int len) +{ + struct mm_struct *mm = vma->vm_mm; + + if ((vma->vm_flags & VM_EXEC) == 0) + return; + + preempt_disable(); + + if (mm == current->active_mm) { + __load_new_mm_context(mm); + if (atomic_read(&mm->mm_users) <= 1) { + int cpu, this_cpu = smp_processor_id(); + for (cpu = 0; cpu < NR_CPUS; cpu++) { + if (!cpu_online(cpu) || cpu == this_cpu) + continue; + if (mm->context[cpu]) + mm->context[cpu] = 0; + } + preempt_enable(); + return; + } + } + + if (smp_call_function(ipi_flush_icache_page, mm, 1)) { + printk(KERN_CRIT "flush_icache_page: timed out\n"); + } + + preempt_enable(); +} diff --git a/arch/alpha/kernel/srm_env.c b/arch/alpha/kernel/srm_env.c new file mode 100644 index 00000000..b9fc6c30 --- /dev/null +++ b/arch/alpha/kernel/srm_env.c @@ -0,0 +1,300 @@ +/* + * srm_env.c - Access to SRM environment + * variables through linux' procfs + * + * (C) 2001,2002,2006 by Jan-Benedict Glaw <jbglaw@lug-owl.de> + * + * This driver is a modified version of Erik Mouw's example proc + * interface, so: thank you, Erik! He can be reached via email at + * <J.A.K.Mouw@its.tudelft.nl>. It is based on an idea + * provided by DEC^WCompaq^WIntel's "Jumpstart" CD. They + * included a patch like this as well. Thanks for idea! + * + * This program is free software; you can redistribute + * it and/or modify it under the terms of the GNU General + * Public License version 2 as published by the Free Software + * Foundation. + * + * This program is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/kernel.h> +#include <linux/gfp.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <asm/console.h> +#include <asm/uaccess.h> +#include <asm/machvec.h> + +#define BASE_DIR "srm_environment" /* Subdir in /proc/ */ +#define NAMED_DIR "named_variables" /* Subdir for known variables */ +#define NUMBERED_DIR "numbered_variables" /* Subdir for all variables */ +#define VERSION "0.0.6" /* Module version */ +#define NAME "srm_env" /* Module name */ + +MODULE_AUTHOR("Jan-Benedict Glaw <jbglaw@lug-owl.de>"); +MODULE_DESCRIPTION("Accessing Alpha SRM environment through procfs interface"); +MODULE_LICENSE("GPL"); + +typedef struct _srm_env { + char *name; + unsigned long id; + struct proc_dir_entry *proc_entry; +} srm_env_t; + +static struct proc_dir_entry *base_dir; +static struct proc_dir_entry *named_dir; +static struct proc_dir_entry *numbered_dir; +static char number[256][4]; + +static srm_env_t srm_named_entries[] = { + { "auto_action", ENV_AUTO_ACTION }, + { "boot_dev", ENV_BOOT_DEV }, + { "bootdef_dev", ENV_BOOTDEF_DEV }, + { "booted_dev", ENV_BOOTED_DEV }, + { "boot_file", ENV_BOOT_FILE }, + { "booted_file", ENV_BOOTED_FILE }, + { "boot_osflags", ENV_BOOT_OSFLAGS }, + { "booted_osflags", ENV_BOOTED_OSFLAGS }, + { "boot_reset", ENV_BOOT_RESET }, + { "dump_dev", ENV_DUMP_DEV }, + { "enable_audit", ENV_ENABLE_AUDIT }, + { "license", ENV_LICENSE }, + { "char_set", ENV_CHAR_SET }, + { "language", ENV_LANGUAGE }, + { "tty_dev", ENV_TTY_DEV }, + { NULL, 0 }, +}; +static srm_env_t srm_numbered_entries[256]; + + +static int srm_env_proc_show(struct seq_file *m, void *v) +{ + unsigned long ret; + srm_env_t *entry; + char *page; + + entry = m->private; + page = (char *)__get_free_page(GFP_USER); + if (!page) + return -ENOMEM; + + ret = callback_getenv(entry->id, page, PAGE_SIZE); + + if ((ret >> 61) == 0) { + seq_write(m, page, ret); + ret = 0; + } else + ret = -EFAULT; + free_page((unsigned long)page); + return ret; +} + +static int srm_env_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, srm_env_proc_show, PDE(inode)->data); +} + +static ssize_t srm_env_proc_write(struct file *file, const char __user *buffer, + size_t count, loff_t *pos) +{ + int res; + srm_env_t *entry = PDE(file->f_path.dentry->d_inode)->data; + char *buf = (char *) __get_free_page(GFP_USER); + unsigned long ret1, ret2; + + if (!buf) + return -ENOMEM; + + res = -EINVAL; + if (count >= PAGE_SIZE) + goto out; + + res = -EFAULT; + if (copy_from_user(buf, buffer, count)) + goto out; + buf[count] = '\0'; + + ret1 = callback_setenv(entry->id, buf, count); + if ((ret1 >> 61) == 0) { + do + ret2 = callback_save_env(); + while((ret2 >> 61) == 1); + res = (int) ret1; + } + + out: + free_page((unsigned long)buf); + return res; +} + +static const struct file_operations srm_env_proc_fops = { + .owner = THIS_MODULE, + .open = srm_env_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = srm_env_proc_write, +}; + +static void +srm_env_cleanup(void) +{ + srm_env_t *entry; + unsigned long var_num; + + if (base_dir) { + /* + * Remove named entries + */ + if (named_dir) { + entry = srm_named_entries; + while (entry->name != NULL && entry->id != 0) { + if (entry->proc_entry) { + remove_proc_entry(entry->name, + named_dir); + entry->proc_entry = NULL; + } + entry++; + } + remove_proc_entry(NAMED_DIR, base_dir); + } + + /* + * Remove numbered entries + */ + if (numbered_dir) { + for (var_num = 0; var_num <= 255; var_num++) { + entry = &srm_numbered_entries[var_num]; + + if (entry->proc_entry) { + remove_proc_entry(entry->name, + numbered_dir); + entry->proc_entry = NULL; + entry->name = NULL; + } + } + remove_proc_entry(NUMBERED_DIR, base_dir); + } + + remove_proc_entry(BASE_DIR, NULL); + } + + return; +} + +static int __init +srm_env_init(void) +{ + srm_env_t *entry; + unsigned long var_num; + + /* + * Check system + */ + if (!alpha_using_srm) { + printk(KERN_INFO "%s: This Alpha system doesn't " + "know about SRM (or you've booted " + "SRM->MILO->Linux, which gets " + "misdetected)...\n", __func__); + return -ENODEV; + } + + /* + * Init numbers + */ + for (var_num = 0; var_num <= 255; var_num++) + sprintf(number[var_num], "%ld", var_num); + + /* + * Create base directory + */ + base_dir = proc_mkdir(BASE_DIR, NULL); + if (!base_dir) { + printk(KERN_ERR "Couldn't create base dir /proc/%s\n", + BASE_DIR); + goto cleanup; + } + + /* + * Create per-name subdirectory + */ + named_dir = proc_mkdir(NAMED_DIR, base_dir); + if (!named_dir) { + printk(KERN_ERR "Couldn't create dir /proc/%s/%s\n", + BASE_DIR, NAMED_DIR); + goto cleanup; + } + + /* + * Create per-number subdirectory + */ + numbered_dir = proc_mkdir(NUMBERED_DIR, base_dir); + if (!numbered_dir) { + printk(KERN_ERR "Couldn't create dir /proc/%s/%s\n", + BASE_DIR, NUMBERED_DIR); + goto cleanup; + + } + + /* + * Create all named nodes + */ + entry = srm_named_entries; + while (entry->name && entry->id) { + entry->proc_entry = proc_create_data(entry->name, 0644, named_dir, + &srm_env_proc_fops, entry); + if (!entry->proc_entry) + goto cleanup; + entry++; + } + + /* + * Create all numbered nodes + */ + for (var_num = 0; var_num <= 255; var_num++) { + entry = &srm_numbered_entries[var_num]; + entry->name = number[var_num]; + + entry->proc_entry = proc_create_data(entry->name, 0644, numbered_dir, + &srm_env_proc_fops, entry); + if (!entry->proc_entry) + goto cleanup; + + entry->id = var_num; + } + + printk(KERN_INFO "%s: version %s loaded successfully\n", NAME, + VERSION); + + return 0; + +cleanup: + srm_env_cleanup(); + + return -ENOMEM; +} + +static void __exit +srm_env_exit(void) +{ + srm_env_cleanup(); + printk(KERN_INFO "%s: unloaded successfully\n", NAME); + + return; +} + +module_init(srm_env_init); +module_exit(srm_env_exit); diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c new file mode 100644 index 00000000..3ea80943 --- /dev/null +++ b/arch/alpha/kernel/srmcons.c @@ -0,0 +1,293 @@ +/* + * linux/arch/alpha/kernel/srmcons.c + * + * Callback based driver for SRM Console console device. + * (TTY driver and console driver) + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/console.h> +#include <linux/delay.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/timer.h> +#include <linux/tty.h> +#include <linux/tty_driver.h> +#include <linux/tty_flip.h> + +#include <asm/console.h> +#include <asm/uaccess.h> + + +static DEFINE_SPINLOCK(srmcons_callback_lock); +static int srm_is_registered_console = 0; + +/* + * The TTY driver + */ +#define MAX_SRM_CONSOLE_DEVICES 1 /* only support 1 console device */ + +struct srmcons_private { + struct tty_port port; + struct timer_list timer; +} srmcons_singleton; + +typedef union _srmcons_result { + struct { + unsigned long c :61; + unsigned long status :3; + } bits; + long as_long; +} srmcons_result; + +/* called with callback_lock held */ +static int +srmcons_do_receive_chars(struct tty_struct *tty) +{ + srmcons_result result; + int count = 0, loops = 0; + + do { + result.as_long = callback_getc(0); + if (result.bits.status < 2) { + tty_insert_flip_char(tty, (char)result.bits.c, 0); + count++; + } + } while((result.bits.status & 1) && (++loops < 10)); + + if (count) + tty_schedule_flip(tty); + + return count; +} + +static void +srmcons_receive_chars(unsigned long data) +{ + struct srmcons_private *srmconsp = (struct srmcons_private *)data; + struct tty_port *port = &srmconsp->port; + unsigned long flags; + int incr = 10; + + local_irq_save(flags); + if (spin_trylock(&srmcons_callback_lock)) { + if (!srmcons_do_receive_chars(port->tty)) + incr = 100; + spin_unlock(&srmcons_callback_lock); + } + + spin_lock(&port->lock); + if (port->tty) + mod_timer(&srmconsp->timer, jiffies + incr); + spin_unlock(&port->lock); + + local_irq_restore(flags); +} + +/* called with callback_lock held */ +static int +srmcons_do_write(struct tty_struct *tty, const char *buf, int count) +{ + static char str_cr[1] = "\r"; + long c, remaining = count; + srmcons_result result; + char *cur; + int need_cr; + + for (cur = (char *)buf; remaining > 0; ) { + need_cr = 0; + /* + * Break it up into reasonable size chunks to allow a chance + * for input to get in + */ + for (c = 0; c < min_t(long, 128L, remaining) && !need_cr; c++) + if (cur[c] == '\n') + need_cr = 1; + + while (c > 0) { + result.as_long = callback_puts(0, cur, c); + c -= result.bits.c; + remaining -= result.bits.c; + cur += result.bits.c; + + /* + * Check for pending input iff a tty was provided + */ + if (tty) + srmcons_do_receive_chars(tty); + } + + while (need_cr) { + result.as_long = callback_puts(0, str_cr, 1); + if (result.bits.c > 0) + need_cr = 0; + } + } + return count; +} + +static int +srmcons_write(struct tty_struct *tty, + const unsigned char *buf, int count) +{ + unsigned long flags; + + spin_lock_irqsave(&srmcons_callback_lock, flags); + srmcons_do_write(tty, (const char *) buf, count); + spin_unlock_irqrestore(&srmcons_callback_lock, flags); + + return count; +} + +static int +srmcons_write_room(struct tty_struct *tty) +{ + return 512; +} + +static int +srmcons_chars_in_buffer(struct tty_struct *tty) +{ + return 0; +} + +static int +srmcons_open(struct tty_struct *tty, struct file *filp) +{ + struct srmcons_private *srmconsp = &srmcons_singleton; + struct tty_port *port = &srmconsp->port; + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + + if (!port->tty) { + tty->driver_data = srmconsp; + tty->port = port; + port->tty = tty; /* XXX proper refcounting */ + mod_timer(&srmconsp->timer, jiffies + 10); + } + + spin_unlock_irqrestore(&port->lock, flags); + + return 0; +} + +static void +srmcons_close(struct tty_struct *tty, struct file *filp) +{ + struct srmcons_private *srmconsp = tty->driver_data; + struct tty_port *port = &srmconsp->port; + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + + if (tty->count == 1) { + port->tty = NULL; + del_timer(&srmconsp->timer); + } + + spin_unlock_irqrestore(&port->lock, flags); +} + + +static struct tty_driver *srmcons_driver; + +static const struct tty_operations srmcons_ops = { + .open = srmcons_open, + .close = srmcons_close, + .write = srmcons_write, + .write_room = srmcons_write_room, + .chars_in_buffer= srmcons_chars_in_buffer, +}; + +static int __init +srmcons_init(void) +{ + tty_port_init(&srmcons_singleton.port); + setup_timer(&srmcons_singleton.timer, srmcons_receive_chars, + (unsigned long)&srmcons_singleton); + if (srm_is_registered_console) { + struct tty_driver *driver; + int err; + + driver = alloc_tty_driver(MAX_SRM_CONSOLE_DEVICES); + if (!driver) + return -ENOMEM; + driver->driver_name = "srm"; + driver->name = "srm"; + driver->major = 0; /* dynamic */ + driver->minor_start = 0; + driver->type = TTY_DRIVER_TYPE_SYSTEM; + driver->subtype = SYSTEM_TYPE_SYSCONS; + driver->init_termios = tty_std_termios; + tty_set_operations(driver, &srmcons_ops); + err = tty_register_driver(driver); + if (err) { + put_tty_driver(driver); + return err; + } + srmcons_driver = driver; + } + + return -ENODEV; +} + +module_init(srmcons_init); + + +/* + * The console driver + */ +static void +srm_console_write(struct console *co, const char *s, unsigned count) +{ + unsigned long flags; + + spin_lock_irqsave(&srmcons_callback_lock, flags); + srmcons_do_write(NULL, s, count); + spin_unlock_irqrestore(&srmcons_callback_lock, flags); +} + +static struct tty_driver * +srm_console_device(struct console *co, int *index) +{ + *index = co->index; + return srmcons_driver; +} + +static int +srm_console_setup(struct console *co, char *options) +{ + return 0; +} + +static struct console srmcons = { + .name = "srm", + .write = srm_console_write, + .device = srm_console_device, + .setup = srm_console_setup, + .flags = CON_PRINTBUFFER | CON_BOOT, + .index = -1, +}; + +void __init +register_srm_console(void) +{ + if (!srm_is_registered_console) { + callback_open_console(); + register_console(&srmcons); + srm_is_registered_console = 1; + } +} + +void __init +unregister_srm_console(void) +{ + if (srm_is_registered_console) { + callback_close_console(); + unregister_console(&srmcons); + srm_is_registered_console = 0; + } +} diff --git a/arch/alpha/kernel/sys_alcor.c b/arch/alpha/kernel/sys_alcor.c new file mode 100644 index 00000000..118dc6af --- /dev/null +++ b/arch/alpha/kernel/sys_alcor.c @@ -0,0 +1,308 @@ +/* + * linux/arch/alpha/kernel/sys_alcor.c + * + * Copyright (C) 1995 David A Rusling + * Copyright (C) 1996 Jay A Estabrook + * Copyright (C) 1998, 1999 Richard Henderson + * + * Code supporting the ALCOR and XLT (XL-300/366/433). + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/reboot.h> +#include <linux/bitops.h> + +#include <asm/ptrace.h> +#include <asm/io.h> +#include <asm/dma.h> +#include <asm/mmu_context.h> +#include <asm/irq.h> +#include <asm/pgtable.h> +#include <asm/core_cia.h> +#include <asm/tlbflush.h> + +#include "proto.h" +#include "irq_impl.h" +#include "pci_impl.h" +#include "machvec_impl.h" + + +/* Note mask bit is true for ENABLED irqs. */ +static unsigned long cached_irq_mask; + +static inline void +alcor_update_irq_hw(unsigned long mask) +{ + *(vuip)GRU_INT_MASK = mask; + mb(); +} + +static inline void +alcor_enable_irq(struct irq_data *d) +{ + alcor_update_irq_hw(cached_irq_mask |= 1UL << (d->irq - 16)); +} + +static void +alcor_disable_irq(struct irq_data *d) +{ + alcor_update_irq_hw(cached_irq_mask &= ~(1UL << (d->irq - 16))); +} + +static void +alcor_mask_and_ack_irq(struct irq_data *d) +{ + alcor_disable_irq(d); + + /* On ALCOR/XLT, need to dismiss interrupt via GRU. */ + *(vuip)GRU_INT_CLEAR = 1 << (d->irq - 16); mb(); + *(vuip)GRU_INT_CLEAR = 0; mb(); +} + +static void +alcor_isa_mask_and_ack_irq(struct irq_data *d) +{ + i8259a_mask_and_ack_irq(d); + + /* On ALCOR/XLT, need to dismiss interrupt via GRU. */ + *(vuip)GRU_INT_CLEAR = 0x80000000; mb(); + *(vuip)GRU_INT_CLEAR = 0; mb(); +} + +static struct irq_chip alcor_irq_type = { + .name = "ALCOR", + .irq_unmask = alcor_enable_irq, + .irq_mask = alcor_disable_irq, + .irq_mask_ack = alcor_mask_and_ack_irq, +}; + +static void +alcor_device_interrupt(unsigned long vector) +{ + unsigned long pld; + unsigned int i; + + /* Read the interrupt summary register of the GRU */ + pld = (*(vuip)GRU_INT_REQ) & GRU_INT_REQ_BITS; + + /* + * Now for every possible bit set, work through them and call + * the appropriate interrupt handler. + */ + while (pld) { + i = ffz(~pld); + pld &= pld - 1; /* clear least bit set */ + if (i == 31) { + isa_device_interrupt(vector); + } else { + handle_irq(16 + i); + } + } +} + +static void __init +alcor_init_irq(void) +{ + long i; + + if (alpha_using_srm) + alpha_mv.device_interrupt = srm_device_interrupt; + + *(vuip)GRU_INT_MASK = 0; mb(); /* all disabled */ + *(vuip)GRU_INT_EDGE = 0; mb(); /* all are level */ + *(vuip)GRU_INT_HILO = 0x80000000U; mb(); /* ISA only HI */ + *(vuip)GRU_INT_CLEAR = 0; mb(); /* all clear */ + + for (i = 16; i < 48; ++i) { + /* On Alcor, at least, lines 20..30 are not connected + and can generate spurious interrupts if we turn them + on while IRQ probing. */ + if (i >= 16+20 && i <= 16+30) + continue; + irq_set_chip_and_handler(i, &alcor_irq_type, handle_level_irq); + irq_set_status_flags(i, IRQ_LEVEL); + } + i8259a_irq_type.irq_ack = alcor_isa_mask_and_ack_irq; + + init_i8259a_irqs(); + common_init_isa_dma(); + + setup_irq(16+31, &isa_cascade_irqaction); +} + + +/* + * PCI Fixup configuration. + * + * Summary @ GRU_INT_REQ: + * Bit Meaning + * 0 Interrupt Line A from slot 2 + * 1 Interrupt Line B from slot 2 + * 2 Interrupt Line C from slot 2 + * 3 Interrupt Line D from slot 2 + * 4 Interrupt Line A from slot 1 + * 5 Interrupt line B from slot 1 + * 6 Interrupt Line C from slot 1 + * 7 Interrupt Line D from slot 1 + * 8 Interrupt Line A from slot 0 + * 9 Interrupt Line B from slot 0 + *10 Interrupt Line C from slot 0 + *11 Interrupt Line D from slot 0 + *12 Interrupt Line A from slot 4 + *13 Interrupt Line B from slot 4 + *14 Interrupt Line C from slot 4 + *15 Interrupt Line D from slot 4 + *16 Interrupt Line D from slot 3 + *17 Interrupt Line D from slot 3 + *18 Interrupt Line D from slot 3 + *19 Interrupt Line D from slot 3 + *20-30 Reserved + *31 EISA interrupt + * + * The device to slot mapping looks like: + * + * Slot Device + * 6 built-in TULIP (XLT only) + * 7 PCI on board slot 0 + * 8 PCI on board slot 3 + * 9 PCI on board slot 4 + * 10 PCEB (PCI-EISA bridge) + * 11 PCI on board slot 2 + * 12 PCI on board slot 1 + * + * + * This two layered interrupt approach means that we allocate IRQ 16 and + * above for PCI interrupts. The IRQ relates to which bit the interrupt + * comes in on. This makes interrupt processing much easier. + */ + +static int __init +alcor_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + static char irq_tab[7][5] __initdata = { + /*INT INTA INTB INTC INTD */ + /* note: IDSEL 17 is XLT only */ + {16+13, 16+13, 16+13, 16+13, 16+13}, /* IdSel 17, TULIP */ + { 16+8, 16+8, 16+9, 16+10, 16+11}, /* IdSel 18, slot 0 */ + {16+16, 16+16, 16+17, 16+18, 16+19}, /* IdSel 19, slot 3 */ + {16+12, 16+12, 16+13, 16+14, 16+15}, /* IdSel 20, slot 4 */ + { -1, -1, -1, -1, -1}, /* IdSel 21, PCEB */ + { 16+0, 16+0, 16+1, 16+2, 16+3}, /* IdSel 22, slot 2 */ + { 16+4, 16+4, 16+5, 16+6, 16+7}, /* IdSel 23, slot 1 */ + }; + const long min_idsel = 6, max_idsel = 12, irqs_per_slot = 5; + return COMMON_TABLE_LOOKUP; +} + +static void +alcor_kill_arch(int mode) +{ + cia_kill_arch(mode); + +#ifndef ALPHA_RESTORE_SRM_SETUP + switch(mode) { + case LINUX_REBOOT_CMD_RESTART: + /* Who said DEC engineer's have no sense of humor? ;-) */ + if (alpha_using_srm) { + *(vuip) GRU_RESET = 0x0000dead; + mb(); + } + break; + case LINUX_REBOOT_CMD_HALT: + break; + case LINUX_REBOOT_CMD_POWER_OFF: + break; + } + + halt(); +#endif +} + +static void __init +alcor_init_pci(void) +{ + struct pci_dev *dev; + + cia_init_pci(); + + /* + * Now we can look to see if we are really running on an XLT-type + * motherboard, by looking for a 21040 TULIP in slot 6, which is + * built into XLT and BRET/MAVERICK, but not available on ALCOR. + */ + dev = pci_get_device(PCI_VENDOR_ID_DEC, + PCI_DEVICE_ID_DEC_TULIP, + NULL); + if (dev && dev->devfn == PCI_DEVFN(6,0)) { + alpha_mv.sys.cia.gru_int_req_bits = XLT_GRU_INT_REQ_BITS; + printk(KERN_INFO "%s: Detected AS500 or XLT motherboard.\n", + __func__); + } + pci_dev_put(dev); +} + + +/* + * The System Vectors + */ + +struct alpha_machine_vector alcor_mv __initmv = { + .vector_name = "Alcor", + DO_EV5_MMU, + DO_DEFAULT_RTC, + DO_CIA_IO, + .machine_check = cia_machine_check, + .max_isa_dma_address = ALPHA_ALCOR_MAX_ISA_DMA_ADDRESS, + .min_io_address = EISA_DEFAULT_IO_BASE, + .min_mem_address = CIA_DEFAULT_MEM_BASE, + + .nr_irqs = 48, + .device_interrupt = alcor_device_interrupt, + + .init_arch = cia_init_arch, + .init_irq = alcor_init_irq, + .init_rtc = common_init_rtc, + .init_pci = alcor_init_pci, + .kill_arch = alcor_kill_arch, + .pci_map_irq = alcor_map_irq, + .pci_swizzle = common_swizzle, + + .sys = { .cia = { + .gru_int_req_bits = ALCOR_GRU_INT_REQ_BITS + }} +}; +ALIAS_MV(alcor) + +struct alpha_machine_vector xlt_mv __initmv = { + .vector_name = "XLT", + DO_EV5_MMU, + DO_DEFAULT_RTC, + DO_CIA_IO, + .machine_check = cia_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = EISA_DEFAULT_IO_BASE, + .min_mem_address = CIA_DEFAULT_MEM_BASE, + + .nr_irqs = 48, + .device_interrupt = alcor_device_interrupt, + + .init_arch = cia_init_arch, + .init_irq = alcor_init_irq, + .init_rtc = common_init_rtc, + .init_pci = alcor_init_pci, + .kill_arch = alcor_kill_arch, + .pci_map_irq = alcor_map_irq, + .pci_swizzle = common_swizzle, + + .sys = { .cia = { + .gru_int_req_bits = XLT_GRU_INT_REQ_BITS + }} +}; + +/* No alpha_mv alias for XLT, since we compile it in unconditionally + with ALCOR; setup_arch knows how to cope. */ diff --git a/arch/alpha/kernel/sys_cabriolet.c b/arch/alpha/kernel/sys_cabriolet.c new file mode 100644 index 00000000..4c50f8f4 --- /dev/null +++ b/arch/alpha/kernel/sys_cabriolet.c @@ -0,0 +1,443 @@ +/* + * linux/arch/alpha/kernel/sys_cabriolet.c + * + * Copyright (C) 1995 David A Rusling + * Copyright (C) 1996 Jay A Estabrook + * Copyright (C) 1998, 1999, 2000 Richard Henderson + * + * Code supporting the Cabriolet (AlphaPC64), EB66+, and EB164, + * PC164 and LX164. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/bitops.h> + +#include <asm/ptrace.h> +#include <asm/dma.h> +#include <asm/irq.h> +#include <asm/mmu_context.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/core_apecs.h> +#include <asm/core_cia.h> +#include <asm/core_lca.h> +#include <asm/tlbflush.h> + +#include "proto.h" +#include "irq_impl.h" +#include "pci_impl.h" +#include "machvec_impl.h" +#include "pc873xx.h" + +/* Note mask bit is true for DISABLED irqs. */ +static unsigned long cached_irq_mask = ~0UL; + +static inline void +cabriolet_update_irq_hw(unsigned int irq, unsigned long mask) +{ + int ofs = (irq - 16) / 8; + outb(mask >> (16 + ofs * 8), 0x804 + ofs); +} + +static inline void +cabriolet_enable_irq(struct irq_data *d) +{ + cabriolet_update_irq_hw(d->irq, cached_irq_mask &= ~(1UL << d->irq)); +} + +static void +cabriolet_disable_irq(struct irq_data *d) +{ + cabriolet_update_irq_hw(d->irq, cached_irq_mask |= 1UL << d->irq); +} + +static struct irq_chip cabriolet_irq_type = { + .name = "CABRIOLET", + .irq_unmask = cabriolet_enable_irq, + .irq_mask = cabriolet_disable_irq, + .irq_mask_ack = cabriolet_disable_irq, +}; + +static void +cabriolet_device_interrupt(unsigned long v) +{ + unsigned long pld; + unsigned int i; + + /* Read the interrupt summary registers */ + pld = inb(0x804) | (inb(0x805) << 8) | (inb(0x806) << 16); + + /* + * Now for every possible bit set, work through them and call + * the appropriate interrupt handler. + */ + while (pld) { + i = ffz(~pld); + pld &= pld - 1; /* clear least bit set */ + if (i == 4) { + isa_device_interrupt(v); + } else { + handle_irq(16 + i); + } + } +} + +static void __init +common_init_irq(void (*srm_dev_int)(unsigned long v)) +{ + init_i8259a_irqs(); + + if (alpha_using_srm) { + alpha_mv.device_interrupt = srm_dev_int; + init_srm_irqs(35, 0); + } + else { + long i; + + outb(0xff, 0x804); + outb(0xff, 0x805); + outb(0xff, 0x806); + + for (i = 16; i < 35; ++i) { + irq_set_chip_and_handler(i, &cabriolet_irq_type, + handle_level_irq); + irq_set_status_flags(i, IRQ_LEVEL); + } + } + + common_init_isa_dma(); + setup_irq(16+4, &isa_cascade_irqaction); +} + +#ifndef CONFIG_ALPHA_PC164 +static void __init +cabriolet_init_irq(void) +{ + common_init_irq(srm_device_interrupt); +} +#endif + +#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_PC164) +/* In theory, the PC164 has the same interrupt hardware as the other + Cabriolet based systems. However, something got screwed up late + in the development cycle which broke the interrupt masking hardware. + Repeat, it is not possible to mask and ack interrupts. At all. + + In an attempt to work around this, while processing interrupts, + we do not allow the IPL to drop below what it is currently. This + prevents the possibility of recursion. + + ??? Another option might be to force all PCI devices to use edge + triggered rather than level triggered interrupts. That might be + too invasive though. */ + +static void +pc164_srm_device_interrupt(unsigned long v) +{ + __min_ipl = getipl(); + srm_device_interrupt(v); + __min_ipl = 0; +} + +static void +pc164_device_interrupt(unsigned long v) +{ + __min_ipl = getipl(); + cabriolet_device_interrupt(v); + __min_ipl = 0; +} + +static void __init +pc164_init_irq(void) +{ + common_init_irq(pc164_srm_device_interrupt); +} +#endif + +/* + * The EB66+ is very similar to the EB66 except that it does not have + * the on-board NCR and Tulip chips. In the code below, I have used + * slot number to refer to the id select line and *not* the slot + * number used in the EB66+ documentation. However, in the table, + * I've given the slot number, the id select line and the Jxx number + * that's printed on the board. The interrupt pins from the PCI slots + * are wired into 3 interrupt summary registers at 0x804, 0x805 and + * 0x806 ISA. + * + * In the table, -1 means don't assign an IRQ number. This is usually + * because it is the Saturn IO (SIO) PCI/ISA Bridge Chip. + */ + +static inline int __init +eb66p_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + static char irq_tab[5][5] __initdata = { + /*INT INTA INTB INTC INTD */ + {16+0, 16+0, 16+5, 16+9, 16+13}, /* IdSel 6, slot 0, J25 */ + {16+1, 16+1, 16+6, 16+10, 16+14}, /* IdSel 7, slot 1, J26 */ + { -1, -1, -1, -1, -1}, /* IdSel 8, SIO */ + {16+2, 16+2, 16+7, 16+11, 16+15}, /* IdSel 9, slot 2, J27 */ + {16+3, 16+3, 16+8, 16+12, 16+6} /* IdSel 10, slot 3, J28 */ + }; + const long min_idsel = 6, max_idsel = 10, irqs_per_slot = 5; + return COMMON_TABLE_LOOKUP; +} + + +/* + * The AlphaPC64 is very similar to the EB66+ except that its slots + * are numbered differently. In the code below, I have used slot + * number to refer to the id select line and *not* the slot number + * used in the AlphaPC64 documentation. However, in the table, I've + * given the slot number, the id select line and the Jxx number that's + * printed on the board. The interrupt pins from the PCI slots are + * wired into 3 interrupt summary registers at 0x804, 0x805 and 0x806 + * ISA. + * + * In the table, -1 means don't assign an IRQ number. This is usually + * because it is the Saturn IO (SIO) PCI/ISA Bridge Chip. + */ + +static inline int __init +cabriolet_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + static char irq_tab[5][5] __initdata = { + /*INT INTA INTB INTC INTD */ + { 16+2, 16+2, 16+7, 16+11, 16+15}, /* IdSel 5, slot 2, J21 */ + { 16+0, 16+0, 16+5, 16+9, 16+13}, /* IdSel 6, slot 0, J19 */ + { 16+1, 16+1, 16+6, 16+10, 16+14}, /* IdSel 7, slot 1, J20 */ + { -1, -1, -1, -1, -1}, /* IdSel 8, SIO */ + { 16+3, 16+3, 16+8, 16+12, 16+16} /* IdSel 9, slot 3, J22 */ + }; + const long min_idsel = 5, max_idsel = 9, irqs_per_slot = 5; + return COMMON_TABLE_LOOKUP; +} + +static inline void __init +cabriolet_enable_ide(void) +{ + if (pc873xx_probe() == -1) { + printk(KERN_ERR "Probing for PC873xx Super IO chip failed.\n"); + } else { + printk(KERN_INFO "Found %s Super IO chip at 0x%x\n", + pc873xx_get_model(), pc873xx_get_base()); + + pc873xx_enable_ide(); + } +} + +static inline void __init +cabriolet_init_pci(void) +{ + common_init_pci(); + cabriolet_enable_ide(); +} + +static inline void __init +cia_cab_init_pci(void) +{ + cia_init_pci(); + cabriolet_enable_ide(); +} + +/* + * The PC164 and LX164 have 19 PCI interrupts, four from each of the four + * PCI slots, the SIO, PCI/IDE, and USB. + * + * Each of the interrupts can be individually masked. This is + * accomplished by setting the appropriate bit in the mask register. + * A bit is set by writing a "1" to the desired position in the mask + * register and cleared by writing a "0". There are 3 mask registers + * located at ISA address 804h, 805h and 806h. + * + * An I/O read at ISA address 804h, 805h, 806h will return the + * state of the 11 PCI interrupts and not the state of the MASKED + * interrupts. + * + * Note: A write to I/O 804h, 805h, and 806h the mask register will be + * updated. + * + * + * ISA DATA<7:0> + * ISA +--------------------------------------------------------------+ + * ADDRESS | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | + * +==============================================================+ + * 0x804 | INTB0 | USB | IDE | SIO | INTA3 |INTA2 | INTA1 | INTA0 | + * +--------------------------------------------------------------+ + * 0x805 | INTD0 | INTC3 | INTC2 | INTC1 | INTC0 |INTB3 | INTB2 | INTB1 | + * +--------------------------------------------------------------+ + * 0x806 | Rsrv | Rsrv | Rsrv | Rsrv | Rsrv |INTD3 | INTD2 | INTD1 | + * +--------------------------------------------------------------+ + * * Rsrv = reserved bits + * Note: The mask register is write-only. + * + * IdSel + * 5 32 bit PCI option slot 2 + * 6 64 bit PCI option slot 0 + * 7 64 bit PCI option slot 1 + * 8 Saturn I/O + * 9 32 bit PCI option slot 3 + * 10 USB + * 11 IDE + * + */ + +static inline int __init +alphapc164_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + static char irq_tab[7][5] __initdata = { + /*INT INTA INTB INTC INTD */ + { 16+2, 16+2, 16+9, 16+13, 16+17}, /* IdSel 5, slot 2, J20 */ + { 16+0, 16+0, 16+7, 16+11, 16+15}, /* IdSel 6, slot 0, J29 */ + { 16+1, 16+1, 16+8, 16+12, 16+16}, /* IdSel 7, slot 1, J26 */ + { -1, -1, -1, -1, -1}, /* IdSel 8, SIO */ + { 16+3, 16+3, 16+10, 16+14, 16+18}, /* IdSel 9, slot 3, J19 */ + { 16+6, 16+6, 16+6, 16+6, 16+6}, /* IdSel 10, USB */ + { 16+5, 16+5, 16+5, 16+5, 16+5} /* IdSel 11, IDE */ + }; + const long min_idsel = 5, max_idsel = 11, irqs_per_slot = 5; + return COMMON_TABLE_LOOKUP; +} + +static inline void __init +alphapc164_init_pci(void) +{ + cia_init_pci(); + SMC93x_Init(); +} + + +/* + * The System Vector + */ + +#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_CABRIOLET) +struct alpha_machine_vector cabriolet_mv __initmv = { + .vector_name = "Cabriolet", + DO_EV4_MMU, + DO_DEFAULT_RTC, + DO_APECS_IO, + .machine_check = apecs_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = DEFAULT_IO_BASE, + .min_mem_address = APECS_AND_LCA_DEFAULT_MEM_BASE, + + .nr_irqs = 35, + .device_interrupt = cabriolet_device_interrupt, + + .init_arch = apecs_init_arch, + .init_irq = cabriolet_init_irq, + .init_rtc = common_init_rtc, + .init_pci = cabriolet_init_pci, + .pci_map_irq = cabriolet_map_irq, + .pci_swizzle = common_swizzle, +}; +#ifndef CONFIG_ALPHA_EB64P +ALIAS_MV(cabriolet) +#endif +#endif + +#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_EB164) +struct alpha_machine_vector eb164_mv __initmv = { + .vector_name = "EB164", + DO_EV5_MMU, + DO_DEFAULT_RTC, + DO_CIA_IO, + .machine_check = cia_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = DEFAULT_IO_BASE, + .min_mem_address = CIA_DEFAULT_MEM_BASE, + + .nr_irqs = 35, + .device_interrupt = cabriolet_device_interrupt, + + .init_arch = cia_init_arch, + .init_irq = cabriolet_init_irq, + .init_rtc = common_init_rtc, + .init_pci = cia_cab_init_pci, + .kill_arch = cia_kill_arch, + .pci_map_irq = cabriolet_map_irq, + .pci_swizzle = common_swizzle, +}; +ALIAS_MV(eb164) +#endif + +#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_EB66P) +struct alpha_machine_vector eb66p_mv __initmv = { + .vector_name = "EB66+", + DO_EV4_MMU, + DO_DEFAULT_RTC, + DO_LCA_IO, + .machine_check = lca_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = DEFAULT_IO_BASE, + .min_mem_address = APECS_AND_LCA_DEFAULT_MEM_BASE, + + .nr_irqs = 35, + .device_interrupt = cabriolet_device_interrupt, + + .init_arch = lca_init_arch, + .init_irq = cabriolet_init_irq, + .init_rtc = common_init_rtc, + .init_pci = cabriolet_init_pci, + .pci_map_irq = eb66p_map_irq, + .pci_swizzle = common_swizzle, +}; +ALIAS_MV(eb66p) +#endif + +#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_LX164) +struct alpha_machine_vector lx164_mv __initmv = { + .vector_name = "LX164", + DO_EV5_MMU, + DO_DEFAULT_RTC, + DO_PYXIS_IO, + .machine_check = cia_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = DEFAULT_IO_BASE, + .min_mem_address = DEFAULT_MEM_BASE, + .pci_dac_offset = PYXIS_DAC_OFFSET, + + .nr_irqs = 35, + .device_interrupt = cabriolet_device_interrupt, + + .init_arch = pyxis_init_arch, + .init_irq = cabriolet_init_irq, + .init_rtc = common_init_rtc, + .init_pci = alphapc164_init_pci, + .kill_arch = cia_kill_arch, + .pci_map_irq = alphapc164_map_irq, + .pci_swizzle = common_swizzle, +}; +ALIAS_MV(lx164) +#endif + +#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_PC164) +struct alpha_machine_vector pc164_mv __initmv = { + .vector_name = "PC164", + DO_EV5_MMU, + DO_DEFAULT_RTC, + DO_CIA_IO, + .machine_check = cia_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = DEFAULT_IO_BASE, + .min_mem_address = CIA_DEFAULT_MEM_BASE, + + .nr_irqs = 35, + .device_interrupt = pc164_device_interrupt, + + .init_arch = cia_init_arch, + .init_irq = pc164_init_irq, + .init_rtc = common_init_rtc, + .init_pci = alphapc164_init_pci, + .kill_arch = cia_kill_arch, + .pci_map_irq = alphapc164_map_irq, + .pci_swizzle = common_swizzle, +}; +ALIAS_MV(pc164) +#endif diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c new file mode 100644 index 00000000..5bf401f7 --- /dev/null +++ b/arch/alpha/kernel/sys_dp264.c @@ -0,0 +1,673 @@ +/* + * linux/arch/alpha/kernel/sys_dp264.c + * + * Copyright (C) 1995 David A Rusling + * Copyright (C) 1996, 1999 Jay A Estabrook + * Copyright (C) 1998, 1999 Richard Henderson + * + * Modified by Christopher C. Chimelis, 2001 to + * add support for the addition of Shark to the + * Tsunami family. + * + * Code supporting the DP264 (EV6+TSUNAMI). + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/bitops.h> + +#include <asm/ptrace.h> +#include <asm/dma.h> +#include <asm/irq.h> +#include <asm/mmu_context.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/core_tsunami.h> +#include <asm/hwrpb.h> +#include <asm/tlbflush.h> + +#include "proto.h" +#include "irq_impl.h" +#include "pci_impl.h" +#include "machvec_impl.h" + + +/* Note mask bit is true for ENABLED irqs. */ +static unsigned long cached_irq_mask; +/* dp264 boards handle at max four CPUs */ +static unsigned long cpu_irq_affinity[4] = { 0UL, 0UL, 0UL, 0UL }; + +DEFINE_SPINLOCK(dp264_irq_lock); + +static void +tsunami_update_irq_hw(unsigned long mask) +{ + register tsunami_cchip *cchip = TSUNAMI_cchip; + unsigned long isa_enable = 1UL << 55; + register int bcpu = boot_cpuid; + +#ifdef CONFIG_SMP + volatile unsigned long *dim0, *dim1, *dim2, *dim3; + unsigned long mask0, mask1, mask2, mask3, dummy; + + mask &= ~isa_enable; + mask0 = mask & cpu_irq_affinity[0]; + mask1 = mask & cpu_irq_affinity[1]; + mask2 = mask & cpu_irq_affinity[2]; + mask3 = mask & cpu_irq_affinity[3]; + + if (bcpu == 0) mask0 |= isa_enable; + else if (bcpu == 1) mask1 |= isa_enable; + else if (bcpu == 2) mask2 |= isa_enable; + else mask3 |= isa_enable; + + dim0 = &cchip->dim0.csr; + dim1 = &cchip->dim1.csr; + dim2 = &cchip->dim2.csr; + dim3 = &cchip->dim3.csr; + if (!cpu_possible(0)) dim0 = &dummy; + if (!cpu_possible(1)) dim1 = &dummy; + if (!cpu_possible(2)) dim2 = &dummy; + if (!cpu_possible(3)) dim3 = &dummy; + + *dim0 = mask0; + *dim1 = mask1; + *dim2 = mask2; + *dim3 = mask3; + mb(); + *dim0; + *dim1; + *dim2; + *dim3; +#else + volatile unsigned long *dimB; + if (bcpu == 0) dimB = &cchip->dim0.csr; + else if (bcpu == 1) dimB = &cchip->dim1.csr; + else if (bcpu == 2) dimB = &cchip->dim2.csr; + else dimB = &cchip->dim3.csr; + + *dimB = mask | isa_enable; + mb(); + *dimB; +#endif +} + +static void +dp264_enable_irq(struct irq_data *d) +{ + spin_lock(&dp264_irq_lock); + cached_irq_mask |= 1UL << d->irq; + tsunami_update_irq_hw(cached_irq_mask); + spin_unlock(&dp264_irq_lock); +} + +static void +dp264_disable_irq(struct irq_data *d) +{ + spin_lock(&dp264_irq_lock); + cached_irq_mask &= ~(1UL << d->irq); + tsunami_update_irq_hw(cached_irq_mask); + spin_unlock(&dp264_irq_lock); +} + +static void +clipper_enable_irq(struct irq_data *d) +{ + spin_lock(&dp264_irq_lock); + cached_irq_mask |= 1UL << (d->irq - 16); + tsunami_update_irq_hw(cached_irq_mask); + spin_unlock(&dp264_irq_lock); +} + +static void +clipper_disable_irq(struct irq_data *d) +{ + spin_lock(&dp264_irq_lock); + cached_irq_mask &= ~(1UL << (d->irq - 16)); + tsunami_update_irq_hw(cached_irq_mask); + spin_unlock(&dp264_irq_lock); +} + +static void +cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity) +{ + int cpu; + + for (cpu = 0; cpu < 4; cpu++) { + unsigned long aff = cpu_irq_affinity[cpu]; + if (cpumask_test_cpu(cpu, &affinity)) + aff |= 1UL << irq; + else + aff &= ~(1UL << irq); + cpu_irq_affinity[cpu] = aff; + } +} + +static int +dp264_set_affinity(struct irq_data *d, const struct cpumask *affinity, + bool force) +{ + spin_lock(&dp264_irq_lock); + cpu_set_irq_affinity(d->irq, *affinity); + tsunami_update_irq_hw(cached_irq_mask); + spin_unlock(&dp264_irq_lock); + + return 0; +} + +static int +clipper_set_affinity(struct irq_data *d, const struct cpumask *affinity, + bool force) +{ + spin_lock(&dp264_irq_lock); + cpu_set_irq_affinity(d->irq - 16, *affinity); + tsunami_update_irq_hw(cached_irq_mask); + spin_unlock(&dp264_irq_lock); + + return 0; +} + +static struct irq_chip dp264_irq_type = { + .name = "DP264", + .irq_unmask = dp264_enable_irq, + .irq_mask = dp264_disable_irq, + .irq_mask_ack = dp264_disable_irq, + .irq_set_affinity = dp264_set_affinity, +}; + +static struct irq_chip clipper_irq_type = { + .name = "CLIPPER", + .irq_unmask = clipper_enable_irq, + .irq_mask = clipper_disable_irq, + .irq_mask_ack = clipper_disable_irq, + .irq_set_affinity = clipper_set_affinity, +}; + +static void +dp264_device_interrupt(unsigned long vector) +{ +#if 1 + printk("dp264_device_interrupt: NOT IMPLEMENTED YET!!\n"); +#else + unsigned long pld; + unsigned int i; + + /* Read the interrupt summary register of TSUNAMI */ + pld = TSUNAMI_cchip->dir0.csr; + + /* + * Now for every possible bit set, work through them and call + * the appropriate interrupt handler. + */ + while (pld) { + i = ffz(~pld); + pld &= pld - 1; /* clear least bit set */ + if (i == 55) + isa_device_interrupt(vector); + else + handle_irq(16 + i); +#if 0 + TSUNAMI_cchip->dir0.csr = 1UL << i; mb(); + tmp = TSUNAMI_cchip->dir0.csr; +#endif + } +#endif +} + +static void +dp264_srm_device_interrupt(unsigned long vector) +{ + int irq; + + irq = (vector - 0x800) >> 4; + + /* + * The SRM console reports PCI interrupts with a vector calculated by: + * + * 0x900 + (0x10 * DRIR-bit) + * + * So bit 16 shows up as IRQ 32, etc. + * + * On DP264/BRICK/MONET, we adjust it down by 16 because at least + * that many of the low order bits of the DRIR are not used, and + * so we don't count them. + */ + if (irq >= 32) + irq -= 16; + + handle_irq(irq); +} + +static void +clipper_srm_device_interrupt(unsigned long vector) +{ + int irq; + + irq = (vector - 0x800) >> 4; + +/* + * The SRM console reports PCI interrupts with a vector calculated by: + * + * 0x900 + (0x10 * DRIR-bit) + * + * So bit 16 shows up as IRQ 32, etc. + * + * CLIPPER uses bits 8-47 for PCI interrupts, so we do not need + * to scale down the vector reported, we just use it. + * + * Eg IRQ 24 is DRIR bit 8, etc, etc + */ + handle_irq(irq); +} + +static void __init +init_tsunami_irqs(struct irq_chip * ops, int imin, int imax) +{ + long i; + for (i = imin; i <= imax; ++i) { + irq_set_chip_and_handler(i, ops, handle_level_irq); + irq_set_status_flags(i, IRQ_LEVEL); + } +} + +static void __init +dp264_init_irq(void) +{ + outb(0, DMA1_RESET_REG); + outb(0, DMA2_RESET_REG); + outb(DMA_MODE_CASCADE, DMA2_MODE_REG); + outb(0, DMA2_MASK_REG); + + if (alpha_using_srm) + alpha_mv.device_interrupt = dp264_srm_device_interrupt; + + tsunami_update_irq_hw(0); + + init_i8259a_irqs(); + init_tsunami_irqs(&dp264_irq_type, 16, 47); +} + +static void __init +clipper_init_irq(void) +{ + outb(0, DMA1_RESET_REG); + outb(0, DMA2_RESET_REG); + outb(DMA_MODE_CASCADE, DMA2_MODE_REG); + outb(0, DMA2_MASK_REG); + + if (alpha_using_srm) + alpha_mv.device_interrupt = clipper_srm_device_interrupt; + + tsunami_update_irq_hw(0); + + init_i8259a_irqs(); + init_tsunami_irqs(&clipper_irq_type, 24, 63); +} + + +/* + * PCI Fixup configuration. + * + * Summary @ TSUNAMI_CSR_DIM0: + * Bit Meaning + * 0-17 Unused + *18 Interrupt SCSI B (Adaptec 7895 builtin) + *19 Interrupt SCSI A (Adaptec 7895 builtin) + *20 Interrupt Line D from slot 2 PCI0 + *21 Interrupt Line C from slot 2 PCI0 + *22 Interrupt Line B from slot 2 PCI0 + *23 Interrupt Line A from slot 2 PCI0 + *24 Interrupt Line D from slot 1 PCI0 + *25 Interrupt Line C from slot 1 PCI0 + *26 Interrupt Line B from slot 1 PCI0 + *27 Interrupt Line A from slot 1 PCI0 + *28 Interrupt Line D from slot 0 PCI0 + *29 Interrupt Line C from slot 0 PCI0 + *30 Interrupt Line B from slot 0 PCI0 + *31 Interrupt Line A from slot 0 PCI0 + * + *32 Interrupt Line D from slot 3 PCI1 + *33 Interrupt Line C from slot 3 PCI1 + *34 Interrupt Line B from slot 3 PCI1 + *35 Interrupt Line A from slot 3 PCI1 + *36 Interrupt Line D from slot 2 PCI1 + *37 Interrupt Line C from slot 2 PCI1 + *38 Interrupt Line B from slot 2 PCI1 + *39 Interrupt Line A from slot 2 PCI1 + *40 Interrupt Line D from slot 1 PCI1 + *41 Interrupt Line C from slot 1 PCI1 + *42 Interrupt Line B from slot 1 PCI1 + *43 Interrupt Line A from slot 1 PCI1 + *44 Interrupt Line D from slot 0 PCI1 + *45 Interrupt Line C from slot 0 PCI1 + *46 Interrupt Line B from slot 0 PCI1 + *47 Interrupt Line A from slot 0 PCI1 + *48-52 Unused + *53 PCI0 NMI (from Cypress) + *54 PCI0 SMI INT (from Cypress) + *55 PCI0 ISA Interrupt (from Cypress) + *56-60 Unused + *61 PCI1 Bus Error + *62 PCI0 Bus Error + *63 Reserved + * + * IdSel + * 5 Cypress Bridge I/O + * 6 SCSI Adaptec builtin + * 7 64 bit PCI option slot 0 (all busses) + * 8 64 bit PCI option slot 1 (all busses) + * 9 64 bit PCI option slot 2 (all busses) + * 10 64 bit PCI option slot 3 (not bus 0) + */ + +static int __init +isa_irq_fixup(const struct pci_dev *dev, int irq) +{ + u8 irq8; + + if (irq > 0) + return irq; + + /* This interrupt is routed via ISA bridge, so we'll + just have to trust whatever value the console might + have assigned. */ + pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq8); + + return irq8 & 0xf; +} + +static int __init +dp264_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + static char irq_tab[6][5] __initdata = { + /*INT INTA INTB INTC INTD */ + { -1, -1, -1, -1, -1}, /* IdSel 5 ISA Bridge */ + { 16+ 3, 16+ 3, 16+ 2, 16+ 2, 16+ 2}, /* IdSel 6 SCSI builtin*/ + { 16+15, 16+15, 16+14, 16+13, 16+12}, /* IdSel 7 slot 0 */ + { 16+11, 16+11, 16+10, 16+ 9, 16+ 8}, /* IdSel 8 slot 1 */ + { 16+ 7, 16+ 7, 16+ 6, 16+ 5, 16+ 4}, /* IdSel 9 slot 2 */ + { 16+ 3, 16+ 3, 16+ 2, 16+ 1, 16+ 0} /* IdSel 10 slot 3 */ + }; + const long min_idsel = 5, max_idsel = 10, irqs_per_slot = 5; + struct pci_controller *hose = dev->sysdata; + int irq = COMMON_TABLE_LOOKUP; + + if (irq > 0) + irq += 16 * hose->index; + + return isa_irq_fixup(dev, irq); +} + +static int __init +monet_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + static char irq_tab[13][5] __initdata = { + /*INT INTA INTB INTC INTD */ + { 45, 45, 45, 45, 45}, /* IdSel 3 21143 PCI1 */ + { -1, -1, -1, -1, -1}, /* IdSel 4 unused */ + { -1, -1, -1, -1, -1}, /* IdSel 5 unused */ + { 47, 47, 47, 47, 47}, /* IdSel 6 SCSI PCI1 */ + { -1, -1, -1, -1, -1}, /* IdSel 7 ISA Bridge */ + { -1, -1, -1, -1, -1}, /* IdSel 8 P2P PCI1 */ +#if 1 + { 28, 28, 29, 30, 31}, /* IdSel 14 slot 4 PCI2*/ + { 24, 24, 25, 26, 27}, /* IdSel 15 slot 5 PCI2*/ +#else + { -1, -1, -1, -1, -1}, /* IdSel 9 unused */ + { -1, -1, -1, -1, -1}, /* IdSel 10 unused */ +#endif + { 40, 40, 41, 42, 43}, /* IdSel 11 slot 1 PCI0*/ + { 36, 36, 37, 38, 39}, /* IdSel 12 slot 2 PCI0*/ + { 32, 32, 33, 34, 35}, /* IdSel 13 slot 3 PCI0*/ + { 28, 28, 29, 30, 31}, /* IdSel 14 slot 4 PCI2*/ + { 24, 24, 25, 26, 27} /* IdSel 15 slot 5 PCI2*/ + }; + const long min_idsel = 3, max_idsel = 15, irqs_per_slot = 5; + + return isa_irq_fixup(dev, COMMON_TABLE_LOOKUP); +} + +static u8 __init +monet_swizzle(struct pci_dev *dev, u8 *pinp) +{ + struct pci_controller *hose = dev->sysdata; + int slot, pin = *pinp; + + if (!dev->bus->parent) { + slot = PCI_SLOT(dev->devfn); + } + /* Check for the built-in bridge on hose 1. */ + else if (hose->index == 1 && PCI_SLOT(dev->bus->self->devfn) == 8) { + slot = PCI_SLOT(dev->devfn); + } else { + /* Must be a card-based bridge. */ + do { + /* Check for built-in bridge on hose 1. */ + if (hose->index == 1 && + PCI_SLOT(dev->bus->self->devfn) == 8) { + slot = PCI_SLOT(dev->devfn); + break; + } + pin = pci_swizzle_interrupt_pin(dev, pin); + + /* Move up the chain of bridges. */ + dev = dev->bus->self; + /* Slot of the next bridge. */ + slot = PCI_SLOT(dev->devfn); + } while (dev->bus->self); + } + *pinp = pin; + return slot; +} + +static int __init +webbrick_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + static char irq_tab[13][5] __initdata = { + /*INT INTA INTB INTC INTD */ + { -1, -1, -1, -1, -1}, /* IdSel 7 ISA Bridge */ + { -1, -1, -1, -1, -1}, /* IdSel 8 unused */ + { 29, 29, 29, 29, 29}, /* IdSel 9 21143 #1 */ + { -1, -1, -1, -1, -1}, /* IdSel 10 unused */ + { 30, 30, 30, 30, 30}, /* IdSel 11 21143 #2 */ + { -1, -1, -1, -1, -1}, /* IdSel 12 unused */ + { -1, -1, -1, -1, -1}, /* IdSel 13 unused */ + { 35, 35, 34, 33, 32}, /* IdSel 14 slot 0 */ + { 39, 39, 38, 37, 36}, /* IdSel 15 slot 1 */ + { 43, 43, 42, 41, 40}, /* IdSel 16 slot 2 */ + { 47, 47, 46, 45, 44}, /* IdSel 17 slot 3 */ + }; + const long min_idsel = 7, max_idsel = 17, irqs_per_slot = 5; + + return isa_irq_fixup(dev, COMMON_TABLE_LOOKUP); +} + +static int __init +clipper_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + static char irq_tab[7][5] __initdata = { + /*INT INTA INTB INTC INTD */ + { 16+ 8, 16+ 8, 16+ 9, 16+10, 16+11}, /* IdSel 1 slot 1 */ + { 16+12, 16+12, 16+13, 16+14, 16+15}, /* IdSel 2 slot 2 */ + { 16+16, 16+16, 16+17, 16+18, 16+19}, /* IdSel 3 slot 3 */ + { 16+20, 16+20, 16+21, 16+22, 16+23}, /* IdSel 4 slot 4 */ + { 16+24, 16+24, 16+25, 16+26, 16+27}, /* IdSel 5 slot 5 */ + { 16+28, 16+28, 16+29, 16+30, 16+31}, /* IdSel 6 slot 6 */ + { -1, -1, -1, -1, -1} /* IdSel 7 ISA Bridge */ + }; + const long min_idsel = 1, max_idsel = 7, irqs_per_slot = 5; + struct pci_controller *hose = dev->sysdata; + int irq = COMMON_TABLE_LOOKUP; + + if (irq > 0) + irq += 16 * hose->index; + + return isa_irq_fixup(dev, irq); +} + +static void __init +dp264_init_pci(void) +{ + common_init_pci(); + SMC669_Init(0); + locate_and_init_vga(NULL); +} + +static void __init +monet_init_pci(void) +{ + common_init_pci(); + SMC669_Init(1); + es1888_init(); + locate_and_init_vga(NULL); +} + +static void __init +clipper_init_pci(void) +{ + common_init_pci(); + locate_and_init_vga(NULL); +} + +static void __init +webbrick_init_arch(void) +{ + tsunami_init_arch(); + + /* Tsunami caches 4 PTEs at a time; DS10 has only 1 hose. */ + hose_head->sg_isa->align_entry = 4; + hose_head->sg_pci->align_entry = 4; +} + + +/* + * The System Vectors + */ + +struct alpha_machine_vector dp264_mv __initmv = { + .vector_name = "DP264", + DO_EV6_MMU, + DO_DEFAULT_RTC, + DO_TSUNAMI_IO, + .machine_check = tsunami_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = DEFAULT_IO_BASE, + .min_mem_address = DEFAULT_MEM_BASE, + .pci_dac_offset = TSUNAMI_DAC_OFFSET, + + .nr_irqs = 64, + .device_interrupt = dp264_device_interrupt, + + .init_arch = tsunami_init_arch, + .init_irq = dp264_init_irq, + .init_rtc = common_init_rtc, + .init_pci = dp264_init_pci, + .kill_arch = tsunami_kill_arch, + .pci_map_irq = dp264_map_irq, + .pci_swizzle = common_swizzle, +}; +ALIAS_MV(dp264) + +struct alpha_machine_vector monet_mv __initmv = { + .vector_name = "Monet", + DO_EV6_MMU, + DO_DEFAULT_RTC, + DO_TSUNAMI_IO, + .machine_check = tsunami_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = DEFAULT_IO_BASE, + .min_mem_address = DEFAULT_MEM_BASE, + .pci_dac_offset = TSUNAMI_DAC_OFFSET, + + .nr_irqs = 64, + .device_interrupt = dp264_device_interrupt, + + .init_arch = tsunami_init_arch, + .init_irq = dp264_init_irq, + .init_rtc = common_init_rtc, + .init_pci = monet_init_pci, + .kill_arch = tsunami_kill_arch, + .pci_map_irq = monet_map_irq, + .pci_swizzle = monet_swizzle, +}; + +struct alpha_machine_vector webbrick_mv __initmv = { + .vector_name = "Webbrick", + DO_EV6_MMU, + DO_DEFAULT_RTC, + DO_TSUNAMI_IO, + .machine_check = tsunami_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = DEFAULT_IO_BASE, + .min_mem_address = DEFAULT_MEM_BASE, + .pci_dac_offset = TSUNAMI_DAC_OFFSET, + + .nr_irqs = 64, + .device_interrupt = dp264_device_interrupt, + + .init_arch = webbrick_init_arch, + .init_irq = dp264_init_irq, + .init_rtc = common_init_rtc, + .init_pci = common_init_pci, + .kill_arch = tsunami_kill_arch, + .pci_map_irq = webbrick_map_irq, + .pci_swizzle = common_swizzle, +}; + +struct alpha_machine_vector clipper_mv __initmv = { + .vector_name = "Clipper", + DO_EV6_MMU, + DO_DEFAULT_RTC, + DO_TSUNAMI_IO, + .machine_check = tsunami_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = DEFAULT_IO_BASE, + .min_mem_address = DEFAULT_MEM_BASE, + .pci_dac_offset = TSUNAMI_DAC_OFFSET, + + .nr_irqs = 64, + .device_interrupt = dp264_device_interrupt, + + .init_arch = tsunami_init_arch, + .init_irq = clipper_init_irq, + .init_rtc = common_init_rtc, + .init_pci = clipper_init_pci, + .kill_arch = tsunami_kill_arch, + .pci_map_irq = clipper_map_irq, + .pci_swizzle = common_swizzle, +}; + +/* Sharks strongly resemble Clipper, at least as far + * as interrupt routing, etc, so we're using the + * same functions as Clipper does + */ + +struct alpha_machine_vector shark_mv __initmv = { + .vector_name = "Shark", + DO_EV6_MMU, + DO_DEFAULT_RTC, + DO_TSUNAMI_IO, + .machine_check = tsunami_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = DEFAULT_IO_BASE, + .min_mem_address = DEFAULT_MEM_BASE, + .pci_dac_offset = TSUNAMI_DAC_OFFSET, + + .nr_irqs = 64, + .device_interrupt = dp264_device_interrupt, + + .init_arch = tsunami_init_arch, + .init_irq = clipper_init_irq, + .init_rtc = common_init_rtc, + .init_pci = common_init_pci, + .kill_arch = tsunami_kill_arch, + .pci_map_irq = clipper_map_irq, + .pci_swizzle = common_swizzle, +}; + +/* No alpha_mv alias for webbrick/monet/clipper, since we compile them + in unconditionally with DP264; setup_arch knows how to cope. */ diff --git a/arch/alpha/kernel/sys_eb64p.c b/arch/alpha/kernel/sys_eb64p.c new file mode 100644 index 00000000..ad40a425 --- /dev/null +++ b/arch/alpha/kernel/sys_eb64p.c @@ -0,0 +1,237 @@ +/* + * linux/arch/alpha/kernel/sys_eb64p.c + * + * Copyright (C) 1995 David A Rusling + * Copyright (C) 1996 Jay A Estabrook + * Copyright (C) 1998, 1999 Richard Henderson + * + * Code supporting the EB64+ and EB66. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/bitops.h> + +#include <asm/ptrace.h> +#include <asm/dma.h> +#include <asm/irq.h> +#include <asm/mmu_context.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/core_apecs.h> +#include <asm/core_lca.h> +#include <asm/hwrpb.h> +#include <asm/tlbflush.h> + +#include "proto.h" +#include "irq_impl.h" +#include "pci_impl.h" +#include "machvec_impl.h" + + +/* Note mask bit is true for DISABLED irqs. */ +static unsigned int cached_irq_mask = -1; + +static inline void +eb64p_update_irq_hw(unsigned int irq, unsigned long mask) +{ + outb(mask >> (irq >= 24 ? 24 : 16), (irq >= 24 ? 0x27 : 0x26)); +} + +static inline void +eb64p_enable_irq(struct irq_data *d) +{ + eb64p_update_irq_hw(d->irq, cached_irq_mask &= ~(1 << d->irq)); +} + +static void +eb64p_disable_irq(struct irq_data *d) +{ + eb64p_update_irq_hw(d->irq, cached_irq_mask |= 1 << d->irq); +} + +static struct irq_chip eb64p_irq_type = { + .name = "EB64P", + .irq_unmask = eb64p_enable_irq, + .irq_mask = eb64p_disable_irq, + .irq_mask_ack = eb64p_disable_irq, +}; + +static void +eb64p_device_interrupt(unsigned long vector) +{ + unsigned long pld; + unsigned int i; + + /* Read the interrupt summary registers */ + pld = inb(0x26) | (inb(0x27) << 8); + + /* + * Now, for every possible bit set, work through + * them and call the appropriate interrupt handler. + */ + while (pld) { + i = ffz(~pld); + pld &= pld - 1; /* clear least bit set */ + + if (i == 5) { + isa_device_interrupt(vector); + } else { + handle_irq(16 + i); + } + } +} + +static void __init +eb64p_init_irq(void) +{ + long i; + +#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_CABRIOLET) + /* + * CABRIO SRM may not set variation correctly, so here we test + * the high word of the interrupt summary register for the RAZ + * bits, and hope that a true EB64+ would read all ones... + */ + if (inw(0x806) != 0xffff) { + extern struct alpha_machine_vector cabriolet_mv; + + printk("Detected Cabriolet: correcting HWRPB.\n"); + + hwrpb->sys_variation |= 2L << 10; + hwrpb_update_checksum(hwrpb); + + alpha_mv = cabriolet_mv; + alpha_mv.init_irq(); + return; + } +#endif /* GENERIC */ + + outb(0xff, 0x26); + outb(0xff, 0x27); + + init_i8259a_irqs(); + + for (i = 16; i < 32; ++i) { + irq_set_chip_and_handler(i, &eb64p_irq_type, handle_level_irq); + irq_set_status_flags(i, IRQ_LEVEL); + } + + common_init_isa_dma(); + setup_irq(16+5, &isa_cascade_irqaction); +} + +/* + * PCI Fixup configuration. + * + * There are two 8 bit external summary registers as follows: + * + * Summary @ 0x26: + * Bit Meaning + * 0 Interrupt Line A from slot 0 + * 1 Interrupt Line A from slot 1 + * 2 Interrupt Line B from slot 0 + * 3 Interrupt Line B from slot 1 + * 4 Interrupt Line C from slot 0 + * 5 Interrupt line from the two ISA PICs + * 6 Tulip + * 7 NCR SCSI + * + * Summary @ 0x27 + * Bit Meaning + * 0 Interrupt Line C from slot 1 + * 1 Interrupt Line D from slot 0 + * 2 Interrupt Line D from slot 1 + * 3 RAZ + * 4 RAZ + * 5 RAZ + * 6 RAZ + * 7 RAZ + * + * The device to slot mapping looks like: + * + * Slot Device + * 5 NCR SCSI controller + * 6 PCI on board slot 0 + * 7 PCI on board slot 1 + * 8 Intel SIO PCI-ISA bridge chip + * 9 Tulip - DECchip 21040 Ethernet controller + * + * + * This two layered interrupt approach means that we allocate IRQ 16 and + * above for PCI interrupts. The IRQ relates to which bit the interrupt + * comes in on. This makes interrupt processing much easier. + */ + +static int __init +eb64p_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + static char irq_tab[5][5] __initdata = { + /*INT INTA INTB INTC INTD */ + {16+7, 16+7, 16+7, 16+7, 16+7}, /* IdSel 5, slot ?, ?? */ + {16+0, 16+0, 16+2, 16+4, 16+9}, /* IdSel 6, slot ?, ?? */ + {16+1, 16+1, 16+3, 16+8, 16+10}, /* IdSel 7, slot ?, ?? */ + { -1, -1, -1, -1, -1}, /* IdSel 8, SIO */ + {16+6, 16+6, 16+6, 16+6, 16+6}, /* IdSel 9, TULIP */ + }; + const long min_idsel = 5, max_idsel = 9, irqs_per_slot = 5; + return COMMON_TABLE_LOOKUP; +} + + +/* + * The System Vector + */ + +#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_EB64P) +struct alpha_machine_vector eb64p_mv __initmv = { + .vector_name = "EB64+", + DO_EV4_MMU, + DO_DEFAULT_RTC, + DO_APECS_IO, + .machine_check = apecs_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = DEFAULT_IO_BASE, + .min_mem_address = APECS_AND_LCA_DEFAULT_MEM_BASE, + + .nr_irqs = 32, + .device_interrupt = eb64p_device_interrupt, + + .init_arch = apecs_init_arch, + .init_irq = eb64p_init_irq, + .init_rtc = common_init_rtc, + .init_pci = common_init_pci, + .kill_arch = NULL, + .pci_map_irq = eb64p_map_irq, + .pci_swizzle = common_swizzle, +}; +ALIAS_MV(eb64p) +#endif + +#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_EB66) +struct alpha_machine_vector eb66_mv __initmv = { + .vector_name = "EB66", + DO_EV4_MMU, + DO_DEFAULT_RTC, + DO_LCA_IO, + .machine_check = lca_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = DEFAULT_IO_BASE, + .min_mem_address = APECS_AND_LCA_DEFAULT_MEM_BASE, + + .nr_irqs = 32, + .device_interrupt = eb64p_device_interrupt, + + .init_arch = lca_init_arch, + .init_irq = eb64p_init_irq, + .init_rtc = common_init_rtc, + .init_pci = common_init_pci, + .pci_map_irq = eb64p_map_irq, + .pci_swizzle = common_swizzle, +}; +ALIAS_MV(eb66) +#endif diff --git a/arch/alpha/kernel/sys_eiger.c b/arch/alpha/kernel/sys_eiger.c new file mode 100644 index 00000000..79d69d7f --- /dev/null +++ b/arch/alpha/kernel/sys_eiger.c @@ -0,0 +1,226 @@ +/* + * linux/arch/alpha/kernel/sys_eiger.c + * + * Copyright (C) 1995 David A Rusling + * Copyright (C) 1996, 1999 Jay A Estabrook + * Copyright (C) 1998, 1999 Richard Henderson + * Copyright (C) 1999 Iain Grant + * + * Code supporting the EIGER (EV6+TSUNAMI). + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/bitops.h> + +#include <asm/ptrace.h> +#include <asm/dma.h> +#include <asm/irq.h> +#include <asm/mmu_context.h> +#include <asm/io.h> +#include <asm/pci.h> +#include <asm/pgtable.h> +#include <asm/core_tsunami.h> +#include <asm/hwrpb.h> +#include <asm/tlbflush.h> + +#include "proto.h" +#include "irq_impl.h" +#include "pci_impl.h" +#include "machvec_impl.h" + + +/* Note that this interrupt code is identical to TAKARA. */ + +/* Note mask bit is true for DISABLED irqs. */ +static unsigned long cached_irq_mask[2] = { -1, -1 }; + +static inline void +eiger_update_irq_hw(unsigned long irq, unsigned long mask) +{ + int regaddr; + + mask = (irq >= 64 ? mask << 16 : mask >> ((irq - 16) & 0x30)); + regaddr = 0x510 + (((irq - 16) >> 2) & 0x0c); + outl(mask & 0xffff0000UL, regaddr); +} + +static inline void +eiger_enable_irq(struct irq_data *d) +{ + unsigned int irq = d->irq; + unsigned long mask; + mask = (cached_irq_mask[irq >= 64] &= ~(1UL << (irq & 63))); + eiger_update_irq_hw(irq, mask); +} + +static void +eiger_disable_irq(struct irq_data *d) +{ + unsigned int irq = d->irq; + unsigned long mask; + mask = (cached_irq_mask[irq >= 64] |= 1UL << (irq & 63)); + eiger_update_irq_hw(irq, mask); +} + +static struct irq_chip eiger_irq_type = { + .name = "EIGER", + .irq_unmask = eiger_enable_irq, + .irq_mask = eiger_disable_irq, + .irq_mask_ack = eiger_disable_irq, +}; + +static void +eiger_device_interrupt(unsigned long vector) +{ + unsigned intstatus; + + /* + * The PALcode will have passed us vectors 0x800 or 0x810, + * which are fairly arbitrary values and serve only to tell + * us whether an interrupt has come in on IRQ0 or IRQ1. If + * it's IRQ1 it's a PCI interrupt; if it's IRQ0, it's + * probably ISA, but PCI interrupts can come through IRQ0 + * as well if the interrupt controller isn't in accelerated + * mode. + * + * OTOH, the accelerator thing doesn't seem to be working + * overly well, so what we'll do instead is try directly + * examining the Master Interrupt Register to see if it's a + * PCI interrupt, and if _not_ then we'll pass it on to the + * ISA handler. + */ + + intstatus = inw(0x500) & 15; + if (intstatus) { + /* + * This is a PCI interrupt. Check each bit and + * despatch an interrupt if it's set. + */ + + if (intstatus & 8) handle_irq(16+3); + if (intstatus & 4) handle_irq(16+2); + if (intstatus & 2) handle_irq(16+1); + if (intstatus & 1) handle_irq(16+0); + } else { + isa_device_interrupt(vector); + } +} + +static void +eiger_srm_device_interrupt(unsigned long vector) +{ + int irq = (vector - 0x800) >> 4; + handle_irq(irq); +} + +static void __init +eiger_init_irq(void) +{ + long i; + + outb(0, DMA1_RESET_REG); + outb(0, DMA2_RESET_REG); + outb(DMA_MODE_CASCADE, DMA2_MODE_REG); + outb(0, DMA2_MASK_REG); + + if (alpha_using_srm) + alpha_mv.device_interrupt = eiger_srm_device_interrupt; + + for (i = 16; i < 128; i += 16) + eiger_update_irq_hw(i, -1); + + init_i8259a_irqs(); + + for (i = 16; i < 128; ++i) { + irq_set_chip_and_handler(i, &eiger_irq_type, handle_level_irq); + irq_set_status_flags(i, IRQ_LEVEL); + } +} + +static int __init +eiger_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + u8 irq_orig; + + /* The SRM console has already calculated out the IRQ value's for + option cards. As this works lets just read in the value already + set and change it to a useable value by Linux. + + All the IRQ values generated by the console are greater than 90, + so we subtract 80 because it is (90 - allocated ISA IRQ's). */ + + pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq_orig); + + return irq_orig - 0x80; +} + +static u8 __init +eiger_swizzle(struct pci_dev *dev, u8 *pinp) +{ + struct pci_controller *hose = dev->sysdata; + int slot, pin = *pinp; + int bridge_count = 0; + + /* Find the number of backplane bridges. */ + int backplane = inw(0x502) & 0x0f; + + switch (backplane) + { + case 0x00: bridge_count = 0; break; /* No bridges */ + case 0x01: bridge_count = 1; break; /* 1 */ + case 0x03: bridge_count = 2; break; /* 2 */ + case 0x07: bridge_count = 3; break; /* 3 */ + case 0x0f: bridge_count = 4; break; /* 4 */ + }; + + slot = PCI_SLOT(dev->devfn); + while (dev->bus->self) { + /* Check for built-in bridges on hose 0. */ + if (hose->index == 0 + && (PCI_SLOT(dev->bus->self->devfn) + > 20 - bridge_count)) { + slot = PCI_SLOT(dev->devfn); + break; + } + /* Must be a card-based bridge. */ + pin = pci_swizzle_interrupt_pin(dev, pin); + + /* Move up the chain of bridges. */ + dev = dev->bus->self; + } + *pinp = pin; + return slot; +} + +/* + * The System Vectors + */ + +struct alpha_machine_vector eiger_mv __initmv = { + .vector_name = "Eiger", + DO_EV6_MMU, + DO_DEFAULT_RTC, + DO_TSUNAMI_IO, + .machine_check = tsunami_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = DEFAULT_IO_BASE, + .min_mem_address = DEFAULT_MEM_BASE, + .pci_dac_offset = TSUNAMI_DAC_OFFSET, + + .nr_irqs = 128, + .device_interrupt = eiger_device_interrupt, + + .init_arch = tsunami_init_arch, + .init_irq = eiger_init_irq, + .init_rtc = common_init_rtc, + .init_pci = common_init_pci, + .kill_arch = tsunami_kill_arch, + .pci_map_irq = eiger_map_irq, + .pci_swizzle = eiger_swizzle, +}; +ALIAS_MV(eiger) diff --git a/arch/alpha/kernel/sys_jensen.c b/arch/alpha/kernel/sys_jensen.c new file mode 100644 index 00000000..5a0af11b --- /dev/null +++ b/arch/alpha/kernel/sys_jensen.c @@ -0,0 +1,239 @@ +/* + * linux/arch/alpha/kernel/sys_jensen.c + * + * Copyright (C) 1995 Linus Torvalds + * Copyright (C) 1998, 1999 Richard Henderson + * + * Code supporting the Jensen. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/pci.h> +#include <linux/init.h> + +#include <asm/ptrace.h> + +#define __EXTERN_INLINE inline +#include <asm/io.h> +#include <asm/jensen.h> +#undef __EXTERN_INLINE + +#include <asm/dma.h> +#include <asm/irq.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> + +#include "proto.h" +#include "irq_impl.h" +#include "pci_impl.h" +#include "machvec_impl.h" + + +/* + * Jensen is special: the vector is 0x8X0 for EISA interrupt X, and + * 0x9X0 for the local motherboard interrupts. + * + * Note especially that those local interrupts CANNOT be masked, + * which causes much of the pain below... + * + * 0x660 - NMI + * + * 0x800 - IRQ0 interval timer (not used, as we use the RTC timer) + * 0x810 - IRQ1 line printer (duh..) + * 0x860 - IRQ6 floppy disk + * + * 0x900 - COM1 + * 0x920 - COM2 + * 0x980 - keyboard + * 0x990 - mouse + * + * PCI-based systems are more sane: they don't have the local + * interrupts at all, and have only normal PCI interrupts from + * devices. Happily it's easy enough to do a sane mapping from the + * Jensen. + * + * Note that this means that we may have to do a hardware + * "local_op" to a different interrupt than we report to the rest of the + * world. + */ + +static void +jensen_local_enable(struct irq_data *d) +{ + /* the parport is really hw IRQ 1, silly Jensen. */ + if (d->irq == 7) + i8259a_enable_irq(d); +} + +static void +jensen_local_disable(struct irq_data *d) +{ + /* the parport is really hw IRQ 1, silly Jensen. */ + if (d->irq == 7) + i8259a_disable_irq(d); +} + +static void +jensen_local_mask_ack(struct irq_data *d) +{ + /* the parport is really hw IRQ 1, silly Jensen. */ + if (d->irq == 7) + i8259a_mask_and_ack_irq(d); +} + +static struct irq_chip jensen_local_irq_type = { + .name = "LOCAL", + .irq_unmask = jensen_local_enable, + .irq_mask = jensen_local_disable, + .irq_mask_ack = jensen_local_mask_ack, +}; + +static void +jensen_device_interrupt(unsigned long vector) +{ + int irq; + + switch (vector) { + case 0x660: + printk("Whee.. NMI received. Probable hardware error\n"); + printk("61=%02x, 461=%02x\n", inb(0x61), inb(0x461)); + return; + + /* local device interrupts: */ + case 0x900: irq = 4; break; /* com1 -> irq 4 */ + case 0x920: irq = 3; break; /* com2 -> irq 3 */ + case 0x980: irq = 1; break; /* kbd -> irq 1 */ + case 0x990: irq = 9; break; /* mouse -> irq 9 */ + + default: + if (vector > 0x900) { + printk("Unknown local interrupt %lx\n", vector); + return; + } + + irq = (vector - 0x800) >> 4; + if (irq == 1) + irq = 7; + break; + } + + /* If there is no handler yet... */ + if (!irq_has_action(irq)) { + /* If it is a local interrupt that cannot be masked... */ + if (vector >= 0x900) + { + /* Clear keyboard/mouse state */ + inb(0x64); + inb(0x60); + /* Reset serial ports */ + inb(0x3fa); + inb(0x2fa); + outb(0x0c, 0x3fc); + outb(0x0c, 0x2fc); + /* Clear NMI */ + outb(0,0x61); + outb(0,0x461); + } + } + +#if 0 + /* A useful bit of code to find out if an interrupt is going wild. */ + { + static unsigned int last_msg = 0, last_cc = 0; + static int last_irq = -1, count = 0; + unsigned int cc; + + __asm __volatile("rpcc %0" : "=r"(cc)); + ++count; +#define JENSEN_CYCLES_PER_SEC (150000000) + if (cc - last_msg > ((JENSEN_CYCLES_PER_SEC) * 3) || + irq != last_irq) { + printk(KERN_CRIT " irq %d count %d cc %u @ %lx\n", + irq, count, cc-last_cc, get_irq_regs()->pc); + count = 0; + last_msg = cc; + last_irq = irq; + } + last_cc = cc; + } +#endif + + handle_irq(irq); +} + +static void __init +jensen_init_irq(void) +{ + init_i8259a_irqs(); + + irq_set_chip_and_handler(1, &jensen_local_irq_type, handle_level_irq); + irq_set_chip_and_handler(4, &jensen_local_irq_type, handle_level_irq); + irq_set_chip_and_handler(3, &jensen_local_irq_type, handle_level_irq); + irq_set_chip_and_handler(7, &jensen_local_irq_type, handle_level_irq); + irq_set_chip_and_handler(9, &jensen_local_irq_type, handle_level_irq); + + common_init_isa_dma(); +} + +static void __init +jensen_init_arch(void) +{ + struct pci_controller *hose; +#ifdef CONFIG_PCI + static struct pci_dev fake_isa_bridge = { .dma_mask = 0xffffffffUL, }; + + isa_bridge = &fake_isa_bridge; +#endif + + /* Create a hose so that we can report i/o base addresses to + userland. */ + + pci_isa_hose = hose = alloc_pci_controller(); + hose->io_space = &ioport_resource; + hose->mem_space = &iomem_resource; + hose->index = 0; + + hose->sparse_mem_base = EISA_MEM - IDENT_ADDR; + hose->dense_mem_base = 0; + hose->sparse_io_base = EISA_IO - IDENT_ADDR; + hose->dense_io_base = 0; + + hose->sg_isa = hose->sg_pci = NULL; + __direct_map_base = 0; + __direct_map_size = 0xffffffff; +} + +static void +jensen_machine_check(unsigned long vector, unsigned long la) +{ + printk(KERN_CRIT "Machine check\n"); +} + +/* + * The System Vector + */ + +struct alpha_machine_vector jensen_mv __initmv = { + .vector_name = "Jensen", + DO_EV4_MMU, + IO_LITE(JENSEN,jensen), + .machine_check = jensen_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .rtc_port = 0x170, + .rtc_get_time = common_get_rtc_time, + .rtc_set_time = common_set_rtc_time, + + .nr_irqs = 16, + .device_interrupt = jensen_device_interrupt, + + .init_arch = jensen_init_arch, + .init_irq = jensen_init_irq, + .init_rtc = common_init_rtc, + .init_pci = NULL, + .kill_arch = NULL, +}; +ALIAS_MV(jensen) diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c new file mode 100644 index 00000000..407accc8 --- /dev/null +++ b/arch/alpha/kernel/sys_marvel.c @@ -0,0 +1,522 @@ +/* + * linux/arch/alpha/kernel/sys_marvel.c + * + * Marvel / IO7 support + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/bitops.h> + +#include <asm/ptrace.h> +#include <asm/dma.h> +#include <asm/irq.h> +#include <asm/mmu_context.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/core_marvel.h> +#include <asm/hwrpb.h> +#include <asm/tlbflush.h> +#include <asm/vga.h> +#include <asm/rtc.h> + +#include "proto.h" +#include "err_impl.h" +#include "irq_impl.h" +#include "pci_impl.h" +#include "machvec_impl.h" + +#if NR_IRQS < MARVEL_NR_IRQS +# error NR_IRQS < MARVEL_NR_IRQS !!! +#endif + + +/* + * Interrupt handling. + */ +static void +io7_device_interrupt(unsigned long vector) +{ + unsigned int pid; + unsigned int irq; + + /* + * Vector is 0x800 + (interrupt) + * + * where (interrupt) is: + * + * ...16|15 14|13 4|3 0 + * -----+-----+--------+--- + * PE | 0 | irq | 0 + * + * where (irq) is + * + * 0x0800 - 0x0ff0 - 0x0800 + (LSI id << 4) + * 0x1000 - 0x2ff0 - 0x1000 + (MSI_DAT<8:0> << 4) + */ + pid = vector >> 16; + irq = ((vector & 0xffff) - 0x800) >> 4; + + irq += 16; /* offset for legacy */ + irq &= MARVEL_IRQ_VEC_IRQ_MASK; /* not too many bits */ + irq |= pid << MARVEL_IRQ_VEC_PE_SHIFT; /* merge the pid */ + + handle_irq(irq); +} + +static volatile unsigned long * +io7_get_irq_ctl(unsigned int irq, struct io7 **pio7) +{ + volatile unsigned long *ctl; + unsigned int pid; + struct io7 *io7; + + pid = irq >> MARVEL_IRQ_VEC_PE_SHIFT; + + if (!(io7 = marvel_find_io7(pid))) { + printk(KERN_ERR + "%s for nonexistent io7 -- vec %x, pid %d\n", + __func__, irq, pid); + return NULL; + } + + irq &= MARVEL_IRQ_VEC_IRQ_MASK; /* isolate the vector */ + irq -= 16; /* subtract legacy bias */ + + if (irq >= 0x180) { + printk(KERN_ERR + "%s for invalid irq -- pid %d adjusted irq %x\n", + __func__, pid, irq); + return NULL; + } + + ctl = &io7->csrs->PO7_LSI_CTL[irq & 0xff].csr; /* assume LSI */ + if (irq >= 0x80) /* MSI */ + ctl = &io7->csrs->PO7_MSI_CTL[((irq - 0x80) >> 5) & 0x0f].csr; + + if (pio7) *pio7 = io7; + return ctl; +} + +static void +io7_enable_irq(struct irq_data *d) +{ + volatile unsigned long *ctl; + unsigned int irq = d->irq; + struct io7 *io7; + + ctl = io7_get_irq_ctl(irq, &io7); + if (!ctl || !io7) { + printk(KERN_ERR "%s: get_ctl failed for irq %x\n", + __func__, irq); + return; + } + + spin_lock(&io7->irq_lock); + *ctl |= 1UL << 24; + mb(); + *ctl; + spin_unlock(&io7->irq_lock); +} + +static void +io7_disable_irq(struct irq_data *d) +{ + volatile unsigned long *ctl; + unsigned int irq = d->irq; + struct io7 *io7; + + ctl = io7_get_irq_ctl(irq, &io7); + if (!ctl || !io7) { + printk(KERN_ERR "%s: get_ctl failed for irq %x\n", + __func__, irq); + return; + } + + spin_lock(&io7->irq_lock); + *ctl &= ~(1UL << 24); + mb(); + *ctl; + spin_unlock(&io7->irq_lock); +} + +static void +marvel_irq_noop(struct irq_data *d) +{ + return; +} + +static struct irq_chip marvel_legacy_irq_type = { + .name = "LEGACY", + .irq_mask = marvel_irq_noop, + .irq_unmask = marvel_irq_noop, +}; + +static struct irq_chip io7_lsi_irq_type = { + .name = "LSI", + .irq_unmask = io7_enable_irq, + .irq_mask = io7_disable_irq, + .irq_mask_ack = io7_disable_irq, +}; + +static struct irq_chip io7_msi_irq_type = { + .name = "MSI", + .irq_unmask = io7_enable_irq, + .irq_mask = io7_disable_irq, + .irq_ack = marvel_irq_noop, +}; + +static void +io7_redirect_irq(struct io7 *io7, + volatile unsigned long *csr, + unsigned int where) +{ + unsigned long val; + + val = *csr; + val &= ~(0x1ffUL << 24); /* clear the target pid */ + val |= ((unsigned long)where << 24); /* set the new target pid */ + + *csr = val; + mb(); + *csr; +} + +static void +io7_redirect_one_lsi(struct io7 *io7, unsigned int which, unsigned int where) +{ + unsigned long val; + + /* + * LSI_CTL has target PID @ 14 + */ + val = io7->csrs->PO7_LSI_CTL[which].csr; + val &= ~(0x1ffUL << 14); /* clear the target pid */ + val |= ((unsigned long)where << 14); /* set the new target pid */ + + io7->csrs->PO7_LSI_CTL[which].csr = val; + mb(); + io7->csrs->PO7_LSI_CTL[which].csr; +} + +static void +io7_redirect_one_msi(struct io7 *io7, unsigned int which, unsigned int where) +{ + unsigned long val; + + /* + * MSI_CTL has target PID @ 14 + */ + val = io7->csrs->PO7_MSI_CTL[which].csr; + val &= ~(0x1ffUL << 14); /* clear the target pid */ + val |= ((unsigned long)where << 14); /* set the new target pid */ + + io7->csrs->PO7_MSI_CTL[which].csr = val; + mb(); + io7->csrs->PO7_MSI_CTL[which].csr; +} + +static void __init +init_one_io7_lsi(struct io7 *io7, unsigned int which, unsigned int where) +{ + /* + * LSI_CTL has target PID @ 14 + */ + io7->csrs->PO7_LSI_CTL[which].csr = ((unsigned long)where << 14); + mb(); + io7->csrs->PO7_LSI_CTL[which].csr; +} + +static void __init +init_one_io7_msi(struct io7 *io7, unsigned int which, unsigned int where) +{ + /* + * MSI_CTL has target PID @ 14 + */ + io7->csrs->PO7_MSI_CTL[which].csr = ((unsigned long)where << 14); + mb(); + io7->csrs->PO7_MSI_CTL[which].csr; +} + +static void __init +init_io7_irqs(struct io7 *io7, + struct irq_chip *lsi_ops, + struct irq_chip *msi_ops) +{ + long base = (io7->pe << MARVEL_IRQ_VEC_PE_SHIFT) + 16; + long i; + + printk("Initializing interrupts for IO7 at PE %u - base %lx\n", + io7->pe, base); + + /* + * Where should interrupts from this IO7 go? + * + * They really should be sent to the local CPU to avoid having to + * traverse the mesh, but if it's not an SMP kernel, they have to + * go to the boot CPU. Send them all to the boot CPU for now, + * as each secondary starts, it can redirect it's local device + * interrupts. + */ + printk(" Interrupts reported to CPU at PE %u\n", boot_cpuid); + + spin_lock(&io7->irq_lock); + + /* set up the error irqs */ + io7_redirect_irq(io7, &io7->csrs->HLT_CTL.csr, boot_cpuid); + io7_redirect_irq(io7, &io7->csrs->HPI_CTL.csr, boot_cpuid); + io7_redirect_irq(io7, &io7->csrs->CRD_CTL.csr, boot_cpuid); + io7_redirect_irq(io7, &io7->csrs->STV_CTL.csr, boot_cpuid); + io7_redirect_irq(io7, &io7->csrs->HEI_CTL.csr, boot_cpuid); + + /* Set up the lsi irqs. */ + for (i = 0; i < 128; ++i) { + irq_set_chip_and_handler(base + i, lsi_ops, handle_level_irq); + irq_set_status_flags(i, IRQ_LEVEL); + } + + /* Disable the implemented irqs in hardware. */ + for (i = 0; i < 0x60; ++i) + init_one_io7_lsi(io7, i, boot_cpuid); + + init_one_io7_lsi(io7, 0x74, boot_cpuid); + init_one_io7_lsi(io7, 0x75, boot_cpuid); + + + /* Set up the msi irqs. */ + for (i = 128; i < (128 + 512); ++i) { + irq_set_chip_and_handler(base + i, msi_ops, handle_level_irq); + irq_set_status_flags(i, IRQ_LEVEL); + } + + for (i = 0; i < 16; ++i) + init_one_io7_msi(io7, i, boot_cpuid); + + spin_unlock(&io7->irq_lock); +} + +static void __init +marvel_init_irq(void) +{ + int i; + struct io7 *io7 = NULL; + + /* Reserve the legacy irqs. */ + for (i = 0; i < 16; ++i) { + irq_set_chip_and_handler(i, &marvel_legacy_irq_type, + handle_level_irq); + } + + /* Init the io7 irqs. */ + for (io7 = NULL; (io7 = marvel_next_io7(io7)) != NULL; ) + init_io7_irqs(io7, &io7_lsi_irq_type, &io7_msi_irq_type); +} + +static int +marvel_map_irq(struct pci_dev *dev, u8 slot, u8 pin) +{ + struct pci_controller *hose = dev->sysdata; + struct io7_port *io7_port = hose->sysdata; + struct io7 *io7 = io7_port->io7; + int msi_loc, msi_data_off; + u16 msg_ctl; + u16 msg_dat; + u8 intline; + int irq; + + pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &intline); + irq = intline; + + msi_loc = pci_find_capability(dev, PCI_CAP_ID_MSI); + msg_ctl = 0; + if (msi_loc) + pci_read_config_word(dev, msi_loc + PCI_MSI_FLAGS, &msg_ctl); + + if (msg_ctl & PCI_MSI_FLAGS_ENABLE) { + msi_data_off = PCI_MSI_DATA_32; + if (msg_ctl & PCI_MSI_FLAGS_64BIT) + msi_data_off = PCI_MSI_DATA_64; + pci_read_config_word(dev, msi_loc + msi_data_off, &msg_dat); + + irq = msg_dat & 0x1ff; /* we use msg_data<8:0> */ + irq += 0x80; /* offset for lsi */ + +#if 1 + printk("PCI:%d:%d:%d (hose %d) is using MSI\n", + dev->bus->number, + PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), + hose->index); + printk(" %d message(s) from 0x%04x\n", + 1 << ((msg_ctl & PCI_MSI_FLAGS_QSIZE) >> 4), + msg_dat); + printk(" reporting on %d IRQ(s) from %d (0x%x)\n", + 1 << ((msg_ctl & PCI_MSI_FLAGS_QSIZE) >> 4), + (irq + 16) | (io7->pe << MARVEL_IRQ_VEC_PE_SHIFT), + (irq + 16) | (io7->pe << MARVEL_IRQ_VEC_PE_SHIFT)); +#endif + +#if 0 + pci_write_config_word(dev, msi_loc + PCI_MSI_FLAGS, + msg_ctl & ~PCI_MSI_FLAGS_ENABLE); + pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &intline); + irq = intline; + + printk(" forcing LSI interrupt on irq %d [0x%x]\n", irq, irq); +#endif + } + + irq += 16; /* offset for legacy */ + irq |= io7->pe << MARVEL_IRQ_VEC_PE_SHIFT; /* merge the pid */ + + return irq; +} + +static void __init +marvel_init_pci(void) +{ + struct io7 *io7; + + marvel_register_error_handlers(); + + /* Indicate that we trust the console to configure things properly */ + pci_set_flags(PCI_PROBE_ONLY); + common_init_pci(); + locate_and_init_vga(NULL); + + /* Clear any io7 errors. */ + for (io7 = NULL; (io7 = marvel_next_io7(io7)) != NULL; ) + io7_clear_errors(io7); +} + +static void __init +marvel_init_rtc(void) +{ + init_rtc_irq(); +} + +struct marvel_rtc_time { + struct rtc_time *time; + int retval; +}; + +#ifdef CONFIG_SMP +static void +smp_get_rtc_time(void *data) +{ + struct marvel_rtc_time *mrt = data; + mrt->retval = __get_rtc_time(mrt->time); +} + +static void +smp_set_rtc_time(void *data) +{ + struct marvel_rtc_time *mrt = data; + mrt->retval = __set_rtc_time(mrt->time); +} +#endif + +static unsigned int +marvel_get_rtc_time(struct rtc_time *time) +{ +#ifdef CONFIG_SMP + struct marvel_rtc_time mrt; + + if (smp_processor_id() != boot_cpuid) { + mrt.time = time; + smp_call_function_single(boot_cpuid, smp_get_rtc_time, &mrt, 1); + return mrt.retval; + } +#endif + return __get_rtc_time(time); +} + +static int +marvel_set_rtc_time(struct rtc_time *time) +{ +#ifdef CONFIG_SMP + struct marvel_rtc_time mrt; + + if (smp_processor_id() != boot_cpuid) { + mrt.time = time; + smp_call_function_single(boot_cpuid, smp_set_rtc_time, &mrt, 1); + return mrt.retval; + } +#endif + return __set_rtc_time(time); +} + +static void +marvel_smp_callin(void) +{ + int cpuid = hard_smp_processor_id(); + struct io7 *io7 = marvel_find_io7(cpuid); + unsigned int i; + + if (!io7) + return; + + /* + * There is a local IO7 - redirect all of its interrupts here. + */ + printk("Redirecting IO7 interrupts to local CPU at PE %u\n", cpuid); + + /* Redirect the error IRQS here. */ + io7_redirect_irq(io7, &io7->csrs->HLT_CTL.csr, cpuid); + io7_redirect_irq(io7, &io7->csrs->HPI_CTL.csr, cpuid); + io7_redirect_irq(io7, &io7->csrs->CRD_CTL.csr, cpuid); + io7_redirect_irq(io7, &io7->csrs->STV_CTL.csr, cpuid); + io7_redirect_irq(io7, &io7->csrs->HEI_CTL.csr, cpuid); + + /* Redirect the implemented LSIs here. */ + for (i = 0; i < 0x60; ++i) + io7_redirect_one_lsi(io7, i, cpuid); + + io7_redirect_one_lsi(io7, 0x74, cpuid); + io7_redirect_one_lsi(io7, 0x75, cpuid); + + /* Redirect the MSIs here. */ + for (i = 0; i < 16; ++i) + io7_redirect_one_msi(io7, i, cpuid); +} + +/* + * System Vectors + */ +struct alpha_machine_vector marvel_ev7_mv __initmv = { + .vector_name = "MARVEL/EV7", + DO_EV7_MMU, + .rtc_port = 0x70, + .rtc_get_time = marvel_get_rtc_time, + .rtc_set_time = marvel_set_rtc_time, + DO_MARVEL_IO, + .machine_check = marvel_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = DEFAULT_IO_BASE, + .min_mem_address = DEFAULT_MEM_BASE, + .pci_dac_offset = IO7_DAC_OFFSET, + + .nr_irqs = MARVEL_NR_IRQS, + .device_interrupt = io7_device_interrupt, + + .agp_info = marvel_agp_info, + + .smp_callin = marvel_smp_callin, + .init_arch = marvel_init_arch, + .init_irq = marvel_init_irq, + .init_rtc = marvel_init_rtc, + .init_pci = marvel_init_pci, + .kill_arch = marvel_kill_arch, + .pci_map_irq = marvel_map_irq, + .pci_swizzle = common_swizzle, + + .pa_to_nid = marvel_pa_to_nid, + .cpuid_to_nid = marvel_cpuid_to_nid, + .node_mem_start = marvel_node_mem_start, + .node_mem_size = marvel_node_mem_size, +}; +ALIAS_MV(marvel_ev7) diff --git a/arch/alpha/kernel/sys_miata.c b/arch/alpha/kernel/sys_miata.c new file mode 100644 index 00000000..d5b9776a --- /dev/null +++ b/arch/alpha/kernel/sys_miata.c @@ -0,0 +1,292 @@ +/* + * linux/arch/alpha/kernel/sys_miata.c + * + * Copyright (C) 1995 David A Rusling + * Copyright (C) 1996 Jay A Estabrook + * Copyright (C) 1998, 1999, 2000 Richard Henderson + * + * Code supporting the MIATA (EV56+PYXIS). + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/reboot.h> + +#include <asm/ptrace.h> +#include <asm/dma.h> +#include <asm/irq.h> +#include <asm/mmu_context.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/core_cia.h> +#include <asm/tlbflush.h> + +#include "proto.h" +#include "irq_impl.h" +#include "pci_impl.h" +#include "machvec_impl.h" + + +static void +miata_srm_device_interrupt(unsigned long vector) +{ + int irq; + + irq = (vector - 0x800) >> 4; + + /* + * I really hate to do this, but the MIATA SRM console ignores the + * low 8 bits in the interrupt summary register, and reports the + * vector 0x80 *lower* than I expected from the bit numbering in + * the documentation. + * This was done because the low 8 summary bits really aren't used + * for reporting any interrupts (the PCI-ISA bridge, bit 7, isn't + * used for this purpose, as PIC interrupts are delivered as the + * vectors 0x800-0x8f0). + * But I really don't want to change the fixup code for allocation + * of IRQs, nor the alpha_irq_mask maintenance stuff, both of which + * look nice and clean now. + * So, here's this grotty hack... :-( + */ + if (irq >= 16) + irq = irq + 8; + + handle_irq(irq); +} + +static void __init +miata_init_irq(void) +{ + if (alpha_using_srm) + alpha_mv.device_interrupt = miata_srm_device_interrupt; + +#if 0 + /* These break on MiataGL so we'll try not to do it at all. */ + *(vulp)PYXIS_INT_HILO = 0x000000B2UL; mb(); /* ISA/NMI HI */ + *(vulp)PYXIS_RT_COUNT = 0UL; mb(); /* clear count */ +#endif + + init_i8259a_irqs(); + + /* Not interested in the bogus interrupts (3,10), Fan Fault (0), + NMI (1), or EIDE (9). + + We also disable the risers (4,5), since we don't know how to + route the interrupts behind the bridge. */ + init_pyxis_irqs(0x63b0000); + + common_init_isa_dma(); + setup_irq(16+2, &halt_switch_irqaction); /* SRM only? */ + setup_irq(16+6, &timer_cascade_irqaction); +} + + +/* + * PCI Fixup configuration. + * + * Summary @ PYXIS_INT_REQ: + * Bit Meaning + * 0 Fan Fault + * 1 NMI + * 2 Halt/Reset switch + * 3 none + * 4 CID0 (Riser ID) + * 5 CID1 (Riser ID) + * 6 Interval timer + * 7 PCI-ISA Bridge + * 8 Ethernet + * 9 EIDE (deprecated, ISA 14/15 used) + *10 none + *11 USB + *12 Interrupt Line A from slot 4 + *13 Interrupt Line B from slot 4 + *14 Interrupt Line C from slot 4 + *15 Interrupt Line D from slot 4 + *16 Interrupt Line A from slot 5 + *17 Interrupt line B from slot 5 + *18 Interrupt Line C from slot 5 + *19 Interrupt Line D from slot 5 + *20 Interrupt Line A from slot 1 + *21 Interrupt Line B from slot 1 + *22 Interrupt Line C from slot 1 + *23 Interrupt Line D from slot 1 + *24 Interrupt Line A from slot 2 + *25 Interrupt Line B from slot 2 + *26 Interrupt Line C from slot 2 + *27 Interrupt Line D from slot 2 + *27 Interrupt Line A from slot 3 + *29 Interrupt Line B from slot 3 + *30 Interrupt Line C from slot 3 + *31 Interrupt Line D from slot 3 + * + * The device to slot mapping looks like: + * + * Slot Device + * 3 DC21142 Ethernet + * 4 EIDE CMD646 + * 5 none + * 6 USB + * 7 PCI-ISA bridge + * 8 PCI-PCI Bridge (SBU Riser) + * 9 none + * 10 none + * 11 PCI on board slot 4 (SBU Riser) + * 12 PCI on board slot 5 (SBU Riser) + * + * These are behind the bridge, so I'm not sure what to do... + * + * 13 PCI on board slot 1 (SBU Riser) + * 14 PCI on board slot 2 (SBU Riser) + * 15 PCI on board slot 3 (SBU Riser) + * + * + * This two layered interrupt approach means that we allocate IRQ 16 and + * above for PCI interrupts. The IRQ relates to which bit the interrupt + * comes in on. This makes interrupt processing much easier. + */ + +static int __init +miata_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + static char irq_tab[18][5] __initdata = { + /*INT INTA INTB INTC INTD */ + {16+ 8, 16+ 8, 16+ 8, 16+ 8, 16+ 8}, /* IdSel 14, DC21142 */ + { -1, -1, -1, -1, -1}, /* IdSel 15, EIDE */ + { -1, -1, -1, -1, -1}, /* IdSel 16, none */ + { -1, -1, -1, -1, -1}, /* IdSel 17, none */ + { -1, -1, -1, -1, -1}, /* IdSel 18, PCI-ISA */ + { -1, -1, -1, -1, -1}, /* IdSel 19, PCI-PCI */ + { -1, -1, -1, -1, -1}, /* IdSel 20, none */ + { -1, -1, -1, -1, -1}, /* IdSel 21, none */ + {16+12, 16+12, 16+13, 16+14, 16+15}, /* IdSel 22, slot 4 */ + {16+16, 16+16, 16+17, 16+18, 16+19}, /* IdSel 23, slot 5 */ + /* the next 7 are actually on PCI bus 1, across the bridge */ + {16+11, 16+11, 16+11, 16+11, 16+11}, /* IdSel 24, QLISP/GL*/ + { -1, -1, -1, -1, -1}, /* IdSel 25, none */ + { -1, -1, -1, -1, -1}, /* IdSel 26, none */ + { -1, -1, -1, -1, -1}, /* IdSel 27, none */ + {16+20, 16+20, 16+21, 16+22, 16+23}, /* IdSel 28, slot 1 */ + {16+24, 16+24, 16+25, 16+26, 16+27}, /* IdSel 29, slot 2 */ + {16+28, 16+28, 16+29, 16+30, 16+31}, /* IdSel 30, slot 3 */ + /* This bridge is on the main bus of the later orig MIATA */ + { -1, -1, -1, -1, -1}, /* IdSel 31, PCI-PCI */ + }; + const long min_idsel = 3, max_idsel = 20, irqs_per_slot = 5; + + /* the USB function of the 82c693 has it's interrupt connected to + the 2nd 8259 controller. So we have to check for it first. */ + + if((slot == 7) && (PCI_FUNC(dev->devfn) == 3)) { + u8 irq=0; + struct pci_dev *pdev = pci_get_slot(dev->bus, dev->devfn & ~7); + if(pdev == NULL || pci_read_config_byte(pdev, 0x40,&irq) != PCIBIOS_SUCCESSFUL) { + pci_dev_put(pdev); + return -1; + } + else { + pci_dev_put(pdev); + return irq; + } + } + + return COMMON_TABLE_LOOKUP; +} + +static u8 __init +miata_swizzle(struct pci_dev *dev, u8 *pinp) +{ + int slot, pin = *pinp; + + if (dev->bus->number == 0) { + slot = PCI_SLOT(dev->devfn); + } + /* Check for the built-in bridge. */ + else if ((PCI_SLOT(dev->bus->self->devfn) == 8) || + (PCI_SLOT(dev->bus->self->devfn) == 20)) { + slot = PCI_SLOT(dev->devfn) + 9; + } + else + { + /* Must be a card-based bridge. */ + do { + if ((PCI_SLOT(dev->bus->self->devfn) == 8) || + (PCI_SLOT(dev->bus->self->devfn) == 20)) { + slot = PCI_SLOT(dev->devfn) + 9; + break; + } + pin = pci_swizzle_interrupt_pin(dev, pin); + + /* Move up the chain of bridges. */ + dev = dev->bus->self; + /* Slot of the next bridge. */ + slot = PCI_SLOT(dev->devfn); + } while (dev->bus->self); + } + *pinp = pin; + return slot; +} + +static void __init +miata_init_pci(void) +{ + cia_init_pci(); + SMC669_Init(0); /* it might be a GL (fails harmlessly if not) */ + es1888_init(); +} + +static void +miata_kill_arch(int mode) +{ + cia_kill_arch(mode); + +#ifndef ALPHA_RESTORE_SRM_SETUP + switch(mode) { + case LINUX_REBOOT_CMD_RESTART: + /* Who said DEC engineers have no sense of humor? ;-) */ + if (alpha_using_srm) { + *(vuip) PYXIS_RESET = 0x0000dead; + mb(); + } + break; + case LINUX_REBOOT_CMD_HALT: + break; + case LINUX_REBOOT_CMD_POWER_OFF: + break; + } + + halt(); +#endif +} + + +/* + * The System Vector + */ + +struct alpha_machine_vector miata_mv __initmv = { + .vector_name = "Miata", + DO_EV5_MMU, + DO_DEFAULT_RTC, + DO_PYXIS_IO, + .machine_check = cia_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = DEFAULT_IO_BASE, + .min_mem_address = DEFAULT_MEM_BASE, + .pci_dac_offset = PYXIS_DAC_OFFSET, + + .nr_irqs = 48, + .device_interrupt = pyxis_device_interrupt, + + .init_arch = pyxis_init_arch, + .init_irq = miata_init_irq, + .init_rtc = common_init_rtc, + .init_pci = miata_init_pci, + .kill_arch = miata_kill_arch, + .pci_map_irq = miata_map_irq, + .pci_swizzle = miata_swizzle, +}; +ALIAS_MV(miata) diff --git a/arch/alpha/kernel/sys_mikasa.c b/arch/alpha/kernel/sys_mikasa.c new file mode 100644 index 00000000..5e82dc1a --- /dev/null +++ b/arch/alpha/kernel/sys_mikasa.c @@ -0,0 +1,247 @@ +/* + * linux/arch/alpha/kernel/sys_mikasa.c + * + * Copyright (C) 1995 David A Rusling + * Copyright (C) 1996 Jay A Estabrook + * Copyright (C) 1998, 1999 Richard Henderson + * + * Code supporting the MIKASA (AlphaServer 1000). + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/bitops.h> + +#include <asm/ptrace.h> +#include <asm/mce.h> +#include <asm/dma.h> +#include <asm/irq.h> +#include <asm/mmu_context.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/core_apecs.h> +#include <asm/core_cia.h> +#include <asm/tlbflush.h> + +#include "proto.h" +#include "irq_impl.h" +#include "pci_impl.h" +#include "machvec_impl.h" + + +/* Note mask bit is true for ENABLED irqs. */ +static int cached_irq_mask; + +static inline void +mikasa_update_irq_hw(int mask) +{ + outw(mask, 0x536); +} + +static inline void +mikasa_enable_irq(struct irq_data *d) +{ + mikasa_update_irq_hw(cached_irq_mask |= 1 << (d->irq - 16)); +} + +static void +mikasa_disable_irq(struct irq_data *d) +{ + mikasa_update_irq_hw(cached_irq_mask &= ~(1 << (d->irq - 16))); +} + +static struct irq_chip mikasa_irq_type = { + .name = "MIKASA", + .irq_unmask = mikasa_enable_irq, + .irq_mask = mikasa_disable_irq, + .irq_mask_ack = mikasa_disable_irq, +}; + +static void +mikasa_device_interrupt(unsigned long vector) +{ + unsigned long pld; + unsigned int i; + + /* Read the interrupt summary registers */ + pld = (((~inw(0x534) & 0x0000ffffUL) << 16) + | (((unsigned long) inb(0xa0)) << 8) + | inb(0x20)); + + /* + * Now for every possible bit set, work through them and call + * the appropriate interrupt handler. + */ + while (pld) { + i = ffz(~pld); + pld &= pld - 1; /* clear least bit set */ + if (i < 16) { + isa_device_interrupt(vector); + } else { + handle_irq(i); + } + } +} + +static void __init +mikasa_init_irq(void) +{ + long i; + + if (alpha_using_srm) + alpha_mv.device_interrupt = srm_device_interrupt; + + mikasa_update_irq_hw(0); + + for (i = 16; i < 32; ++i) { + irq_set_chip_and_handler(i, &mikasa_irq_type, + handle_level_irq); + irq_set_status_flags(i, IRQ_LEVEL); + } + + init_i8259a_irqs(); + common_init_isa_dma(); +} + + +/* + * PCI Fixup configuration. + * + * Summary @ 0x536: + * Bit Meaning + * 0 Interrupt Line A from slot 0 + * 1 Interrupt Line B from slot 0 + * 2 Interrupt Line C from slot 0 + * 3 Interrupt Line D from slot 0 + * 4 Interrupt Line A from slot 1 + * 5 Interrupt line B from slot 1 + * 6 Interrupt Line C from slot 1 + * 7 Interrupt Line D from slot 1 + * 8 Interrupt Line A from slot 2 + * 9 Interrupt Line B from slot 2 + *10 Interrupt Line C from slot 2 + *11 Interrupt Line D from slot 2 + *12 NCR 810 SCSI + *13 Power Supply Fail + *14 Temperature Warn + *15 Reserved + * + * The device to slot mapping looks like: + * + * Slot Device + * 6 NCR SCSI controller + * 7 Intel PCI-EISA bridge chip + * 11 PCI on board slot 0 + * 12 PCI on board slot 1 + * 13 PCI on board slot 2 + * + * + * This two layered interrupt approach means that we allocate IRQ 16 and + * above for PCI interrupts. The IRQ relates to which bit the interrupt + * comes in on. This makes interrupt processing much easier. + */ + +static int __init +mikasa_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + static char irq_tab[8][5] __initdata = { + /*INT INTA INTB INTC INTD */ + {16+12, 16+12, 16+12, 16+12, 16+12}, /* IdSel 17, SCSI */ + { -1, -1, -1, -1, -1}, /* IdSel 18, PCEB */ + { -1, -1, -1, -1, -1}, /* IdSel 19, ???? */ + { -1, -1, -1, -1, -1}, /* IdSel 20, ???? */ + { -1, -1, -1, -1, -1}, /* IdSel 21, ???? */ + { 16+0, 16+0, 16+1, 16+2, 16+3}, /* IdSel 22, slot 0 */ + { 16+4, 16+4, 16+5, 16+6, 16+7}, /* IdSel 23, slot 1 */ + { 16+8, 16+8, 16+9, 16+10, 16+11}, /* IdSel 24, slot 2 */ + }; + const long min_idsel = 6, max_idsel = 13, irqs_per_slot = 5; + return COMMON_TABLE_LOOKUP; +} + + +#if defined(CONFIG_ALPHA_GENERIC) || !defined(CONFIG_ALPHA_PRIMO) +static void +mikasa_apecs_machine_check(unsigned long vector, unsigned long la_ptr) +{ +#define MCHK_NO_DEVSEL 0x205U +#define MCHK_NO_TABT 0x204U + + struct el_common *mchk_header; + unsigned int code; + + mchk_header = (struct el_common *)la_ptr; + + /* Clear the error before any reporting. */ + mb(); + mb(); /* magic */ + draina(); + apecs_pci_clr_err(); + wrmces(0x7); + mb(); + + code = mchk_header->code; + process_mcheck_info(vector, la_ptr, "MIKASA APECS", + (mcheck_expected(0) + && (code == MCHK_NO_DEVSEL + || code == MCHK_NO_TABT))); +} +#endif + + +/* + * The System Vector + */ + +#if defined(CONFIG_ALPHA_GENERIC) || !defined(CONFIG_ALPHA_PRIMO) +struct alpha_machine_vector mikasa_mv __initmv = { + .vector_name = "Mikasa", + DO_EV4_MMU, + DO_DEFAULT_RTC, + DO_APECS_IO, + .machine_check = mikasa_apecs_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = DEFAULT_IO_BASE, + .min_mem_address = APECS_AND_LCA_DEFAULT_MEM_BASE, + + .nr_irqs = 32, + .device_interrupt = mikasa_device_interrupt, + + .init_arch = apecs_init_arch, + .init_irq = mikasa_init_irq, + .init_rtc = common_init_rtc, + .init_pci = common_init_pci, + .pci_map_irq = mikasa_map_irq, + .pci_swizzle = common_swizzle, +}; +ALIAS_MV(mikasa) +#endif + +#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_PRIMO) +struct alpha_machine_vector mikasa_primo_mv __initmv = { + .vector_name = "Mikasa-Primo", + DO_EV5_MMU, + DO_DEFAULT_RTC, + DO_CIA_IO, + .machine_check = cia_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = DEFAULT_IO_BASE, + .min_mem_address = CIA_DEFAULT_MEM_BASE, + + .nr_irqs = 32, + .device_interrupt = mikasa_device_interrupt, + + .init_arch = cia_init_arch, + .init_irq = mikasa_init_irq, + .init_rtc = common_init_rtc, + .init_pci = cia_init_pci, + .kill_arch = cia_kill_arch, + .pci_map_irq = mikasa_map_irq, + .pci_swizzle = common_swizzle, +}; +ALIAS_MV(mikasa_primo) +#endif diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c new file mode 100644 index 00000000..4d4c046f --- /dev/null +++ b/arch/alpha/kernel/sys_nautilus.c @@ -0,0 +1,279 @@ +/* + * linux/arch/alpha/kernel/sys_nautilus.c + * + * Copyright (C) 1995 David A Rusling + * Copyright (C) 1998 Richard Henderson + * Copyright (C) 1999 Alpha Processor, Inc., + * (David Daniel, Stig Telfer, Soohoon Lee) + * + * Code supporting NAUTILUS systems. + * + * + * NAUTILUS has the following I/O features: + * + * a) Driven by AMD 751 aka IRONGATE (northbridge): + * 4 PCI slots + * 1 AGP slot + * + * b) Driven by ALI M1543C (southbridge) + * 2 ISA slots + * 2 IDE connectors + * 1 dual drive capable FDD controller + * 2 serial ports + * 1 ECP/EPP/SP parallel port + * 2 USB ports + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/reboot.h> +#include <linux/bootmem.h> +#include <linux/bitops.h> + +#include <asm/ptrace.h> +#include <asm/dma.h> +#include <asm/irq.h> +#include <asm/mmu_context.h> +#include <asm/io.h> +#include <asm/pci.h> +#include <asm/pgtable.h> +#include <asm/core_irongate.h> +#include <asm/hwrpb.h> +#include <asm/tlbflush.h> + +#include "proto.h" +#include "err_impl.h" +#include "irq_impl.h" +#include "pci_impl.h" +#include "machvec_impl.h" + + +static void __init +nautilus_init_irq(void) +{ + if (alpha_using_srm) { + alpha_mv.device_interrupt = srm_device_interrupt; + } + + init_i8259a_irqs(); + common_init_isa_dma(); +} + +static int __init +nautilus_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + /* Preserve the IRQ set up by the console. */ + + u8 irq; + /* UP1500: AGP INTA is actually routed to IRQ 5, not IRQ 10 as + console reports. Check the device id of AGP bridge to distinguish + UP1500 from UP1000/1100. Note: 'pin' is 2 due to bridge swizzle. */ + if (slot == 1 && pin == 2 && + dev->bus->self && dev->bus->self->device == 0x700f) + return 5; + pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq); + return irq; +} + +void +nautilus_kill_arch(int mode) +{ + struct pci_bus *bus = pci_isa_hose->bus; + u32 pmuport; + int off; + + switch (mode) { + case LINUX_REBOOT_CMD_RESTART: + if (! alpha_using_srm) { + u8 t8; + pci_bus_read_config_byte(bus, 0x38, 0x43, &t8); + pci_bus_write_config_byte(bus, 0x38, 0x43, t8 | 0x80); + outb(1, 0x92); + outb(0, 0x92); + /* NOTREACHED */ + } + break; + + case LINUX_REBOOT_CMD_POWER_OFF: + /* Assume M1543C */ + off = 0x2000; /* SLP_TYPE = 0, SLP_EN = 1 */ + pci_bus_read_config_dword(bus, 0x88, 0x10, &pmuport); + if (!pmuport) { + /* M1535D/D+ */ + off = 0x3400; /* SLP_TYPE = 5, SLP_EN = 1 */ + pci_bus_read_config_dword(bus, 0x88, 0xe0, &pmuport); + } + pmuport &= 0xfffe; + outw(0xffff, pmuport); /* Clear pending events. */ + outw(off, pmuport + 4); + /* NOTREACHED */ + break; + } +} + +/* Perform analysis of a machine check that arrived from the system (NMI) */ + +static void +naut_sys_machine_check(unsigned long vector, unsigned long la_ptr, + struct pt_regs *regs) +{ + printk("PC %lx RA %lx\n", regs->pc, regs->r26); + irongate_pci_clr_err(); +} + +/* Machine checks can come from two sources - those on the CPU and those + in the system. They are analysed separately but all starts here. */ + +void +nautilus_machine_check(unsigned long vector, unsigned long la_ptr) +{ + char *mchk_class; + + /* Now for some analysis. Machine checks fall into two classes -- + those picked up by the system, and those picked up by the CPU. + Add to that the two levels of severity - correctable or not. */ + + if (vector == SCB_Q_SYSMCHK + && ((IRONGATE0->dramms & 0x300) == 0x300)) { + unsigned long nmi_ctl; + + /* Clear ALI NMI */ + nmi_ctl = inb(0x61); + nmi_ctl |= 0x0c; + outb(nmi_ctl, 0x61); + nmi_ctl &= ~0x0c; + outb(nmi_ctl, 0x61); + + /* Write again clears error bits. */ + IRONGATE0->stat_cmd = IRONGATE0->stat_cmd & ~0x100; + mb(); + IRONGATE0->stat_cmd; + + /* Write again clears error bits. */ + IRONGATE0->dramms = IRONGATE0->dramms; + mb(); + IRONGATE0->dramms; + + draina(); + wrmces(0x7); + mb(); + return; + } + + if (vector == SCB_Q_SYSERR) + mchk_class = "Correctable"; + else if (vector == SCB_Q_SYSMCHK) + mchk_class = "Fatal"; + else { + ev6_machine_check(vector, la_ptr); + return; + } + + printk(KERN_CRIT "NAUTILUS Machine check 0x%lx " + "[%s System Machine Check (NMI)]\n", + vector, mchk_class); + + naut_sys_machine_check(vector, la_ptr, get_irq_regs()); + + /* Tell the PALcode to clear the machine check */ + draina(); + wrmces(0x7); + mb(); +} + +extern void free_reserved_mem(void *, void *); +extern void pcibios_claim_one_bus(struct pci_bus *); + +static struct resource irongate_mem = { + .name = "Irongate PCI MEM", + .flags = IORESOURCE_MEM, +}; + +void __init +nautilus_init_pci(void) +{ + struct pci_controller *hose = hose_head; + struct pci_bus *bus; + struct pci_dev *irongate; + unsigned long bus_align, bus_size, pci_mem; + unsigned long memtop = max_low_pfn << PAGE_SHIFT; + + /* Scan our single hose. */ + bus = pci_scan_bus(0, alpha_mv.pci_ops, hose); + hose->bus = bus; + pcibios_claim_one_bus(bus); + + irongate = pci_get_bus_and_slot(0, 0); + bus->self = irongate; + bus->resource[1] = &irongate_mem; + + pci_bus_size_bridges(bus); + + /* IO port range. */ + bus->resource[0]->start = 0; + bus->resource[0]->end = 0xffff; + + /* Set up PCI memory range - limit is hardwired to 0xffffffff, + base must be at aligned to 16Mb. */ + bus_align = bus->resource[1]->start; + bus_size = bus->resource[1]->end + 1 - bus_align; + if (bus_align < 0x1000000UL) + bus_align = 0x1000000UL; + + pci_mem = (0x100000000UL - bus_size) & -bus_align; + + bus->resource[1]->start = pci_mem; + bus->resource[1]->end = 0xffffffffUL; + if (request_resource(&iomem_resource, bus->resource[1]) < 0) + printk(KERN_ERR "Failed to request MEM on hose 0\n"); + + if (pci_mem < memtop) + memtop = pci_mem; + if (memtop > alpha_mv.min_mem_address) { + free_reserved_mem(__va(alpha_mv.min_mem_address), + __va(memtop)); + printk("nautilus_init_pci: %ldk freed\n", + (memtop - alpha_mv.min_mem_address) >> 10); + } + + if ((IRONGATE0->dev_vendor >> 16) > 0x7006) /* Albacore? */ + IRONGATE0->pci_mem = pci_mem; + + pci_bus_assign_resources(bus); + + /* pci_common_swizzle() relies on bus->self being NULL + for the root bus, so just clear it. */ + bus->self = NULL; + pci_fixup_irqs(alpha_mv.pci_swizzle, alpha_mv.pci_map_irq); +} + +/* + * The System Vectors + */ + +struct alpha_machine_vector nautilus_mv __initmv = { + .vector_name = "Nautilus", + DO_EV6_MMU, + DO_DEFAULT_RTC, + DO_IRONGATE_IO, + .machine_check = nautilus_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = DEFAULT_IO_BASE, + .min_mem_address = IRONGATE_DEFAULT_MEM_BASE, + + .nr_irqs = 16, + .device_interrupt = isa_device_interrupt, + + .init_arch = irongate_init_arch, + .init_irq = nautilus_init_irq, + .init_rtc = common_init_rtc, + .init_pci = nautilus_init_pci, + .kill_arch = nautilus_kill_arch, + .pci_map_irq = nautilus_map_irq, + .pci_swizzle = common_swizzle, +}; +ALIAS_MV(nautilus) diff --git a/arch/alpha/kernel/sys_noritake.c b/arch/alpha/kernel/sys_noritake.c new file mode 100644 index 00000000..063e594f --- /dev/null +++ b/arch/alpha/kernel/sys_noritake.c @@ -0,0 +1,336 @@ +/* + * linux/arch/alpha/kernel/sys_noritake.c + * + * Copyright (C) 1995 David A Rusling + * Copyright (C) 1996 Jay A Estabrook + * Copyright (C) 1998, 1999 Richard Henderson + * + * Code supporting the NORITAKE (AlphaServer 1000A), + * CORELLE (AlphaServer 800), and ALCOR Primo (AlphaStation 600A). + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/bitops.h> + +#include <asm/ptrace.h> +#include <asm/mce.h> +#include <asm/dma.h> +#include <asm/irq.h> +#include <asm/mmu_context.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/core_apecs.h> +#include <asm/core_cia.h> +#include <asm/tlbflush.h> + +#include "proto.h" +#include "irq_impl.h" +#include "pci_impl.h" +#include "machvec_impl.h" + +/* Note mask bit is true for ENABLED irqs. */ +static int cached_irq_mask; + +static inline void +noritake_update_irq_hw(int irq, int mask) +{ + int port = 0x54a; + if (irq >= 32) { + mask >>= 16; + port = 0x54c; + } + outw(mask, port); +} + +static void +noritake_enable_irq(struct irq_data *d) +{ + noritake_update_irq_hw(d->irq, cached_irq_mask |= 1 << (d->irq - 16)); +} + +static void +noritake_disable_irq(struct irq_data *d) +{ + noritake_update_irq_hw(d->irq, cached_irq_mask &= ~(1 << (d->irq - 16))); +} + +static struct irq_chip noritake_irq_type = { + .name = "NORITAKE", + .irq_unmask = noritake_enable_irq, + .irq_mask = noritake_disable_irq, + .irq_mask_ack = noritake_disable_irq, +}; + +static void +noritake_device_interrupt(unsigned long vector) +{ + unsigned long pld; + unsigned int i; + + /* Read the interrupt summary registers of NORITAKE */ + pld = (((unsigned long) inw(0x54c) << 32) + | ((unsigned long) inw(0x54a) << 16) + | ((unsigned long) inb(0xa0) << 8) + | inb(0x20)); + + /* + * Now for every possible bit set, work through them and call + * the appropriate interrupt handler. + */ + while (pld) { + i = ffz(~pld); + pld &= pld - 1; /* clear least bit set */ + if (i < 16) { + isa_device_interrupt(vector); + } else { + handle_irq(i); + } + } +} + +static void +noritake_srm_device_interrupt(unsigned long vector) +{ + int irq; + + irq = (vector - 0x800) >> 4; + + /* + * I really hate to do this, too, but the NORITAKE SRM console also + * reports PCI vectors *lower* than I expected from the bit numbers + * in the documentation. + * But I really don't want to change the fixup code for allocation + * of IRQs, nor the alpha_irq_mask maintenance stuff, both of which + * look nice and clean now. + * So, here's this additional grotty hack... :-( + */ + if (irq >= 16) + irq = irq + 1; + + handle_irq(irq); +} + +static void __init +noritake_init_irq(void) +{ + long i; + + if (alpha_using_srm) + alpha_mv.device_interrupt = noritake_srm_device_interrupt; + + outw(0, 0x54a); + outw(0, 0x54c); + + for (i = 16; i < 48; ++i) { + irq_set_chip_and_handler(i, &noritake_irq_type, + handle_level_irq); + irq_set_status_flags(i, IRQ_LEVEL); + } + + init_i8259a_irqs(); + common_init_isa_dma(); +} + + +/* + * PCI Fixup configuration. + * + * Summary @ 0x542, summary register #1: + * Bit Meaning + * 0 All valid ints from summary regs 2 & 3 + * 1 QLOGIC ISP1020A SCSI + * 2 Interrupt Line A from slot 0 + * 3 Interrupt Line B from slot 0 + * 4 Interrupt Line A from slot 1 + * 5 Interrupt line B from slot 1 + * 6 Interrupt Line A from slot 2 + * 7 Interrupt Line B from slot 2 + * 8 Interrupt Line A from slot 3 + * 9 Interrupt Line B from slot 3 + *10 Interrupt Line A from slot 4 + *11 Interrupt Line B from slot 4 + *12 Interrupt Line A from slot 5 + *13 Interrupt Line B from slot 5 + *14 Interrupt Line A from slot 6 + *15 Interrupt Line B from slot 6 + * + * Summary @ 0x544, summary register #2: + * Bit Meaning + * 0 OR of all unmasked ints in SR #2 + * 1 OR of secondary bus ints + * 2 Interrupt Line C from slot 0 + * 3 Interrupt Line D from slot 0 + * 4 Interrupt Line C from slot 1 + * 5 Interrupt line D from slot 1 + * 6 Interrupt Line C from slot 2 + * 7 Interrupt Line D from slot 2 + * 8 Interrupt Line C from slot 3 + * 9 Interrupt Line D from slot 3 + *10 Interrupt Line C from slot 4 + *11 Interrupt Line D from slot 4 + *12 Interrupt Line C from slot 5 + *13 Interrupt Line D from slot 5 + *14 Interrupt Line C from slot 6 + *15 Interrupt Line D from slot 6 + * + * The device to slot mapping looks like: + * + * Slot Device + * 7 Intel PCI-EISA bridge chip + * 8 DEC PCI-PCI bridge chip + * 11 PCI on board slot 0 + * 12 PCI on board slot 1 + * 13 PCI on board slot 2 + * + * + * This two layered interrupt approach means that we allocate IRQ 16 and + * above for PCI interrupts. The IRQ relates to which bit the interrupt + * comes in on. This makes interrupt processing much easier. + */ + +static int __init +noritake_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + static char irq_tab[15][5] __initdata = { + /*INT INTA INTB INTC INTD */ + /* note: IDSELs 16, 17, and 25 are CORELLE only */ + { 16+1, 16+1, 16+1, 16+1, 16+1}, /* IdSel 16, QLOGIC */ + { -1, -1, -1, -1, -1}, /* IdSel 17, S3 Trio64 */ + { -1, -1, -1, -1, -1}, /* IdSel 18, PCEB */ + { -1, -1, -1, -1, -1}, /* IdSel 19, PPB */ + { -1, -1, -1, -1, -1}, /* IdSel 20, ???? */ + { -1, -1, -1, -1, -1}, /* IdSel 21, ???? */ + { 16+2, 16+2, 16+3, 32+2, 32+3}, /* IdSel 22, slot 0 */ + { 16+4, 16+4, 16+5, 32+4, 32+5}, /* IdSel 23, slot 1 */ + { 16+6, 16+6, 16+7, 32+6, 32+7}, /* IdSel 24, slot 2 */ + { 16+8, 16+8, 16+9, 32+8, 32+9}, /* IdSel 25, slot 3 */ + /* The following 5 are actually on PCI bus 1, which is + across the built-in bridge of the NORITAKE only. */ + { 16+1, 16+1, 16+1, 16+1, 16+1}, /* IdSel 16, QLOGIC */ + { 16+8, 16+8, 16+9, 32+8, 32+9}, /* IdSel 17, slot 3 */ + {16+10, 16+10, 16+11, 32+10, 32+11}, /* IdSel 18, slot 4 */ + {16+12, 16+12, 16+13, 32+12, 32+13}, /* IdSel 19, slot 5 */ + {16+14, 16+14, 16+15, 32+14, 32+15}, /* IdSel 20, slot 6 */ + }; + const long min_idsel = 5, max_idsel = 19, irqs_per_slot = 5; + return COMMON_TABLE_LOOKUP; +} + +static u8 __init +noritake_swizzle(struct pci_dev *dev, u8 *pinp) +{ + int slot, pin = *pinp; + + if (dev->bus->number == 0) { + slot = PCI_SLOT(dev->devfn); + } + /* Check for the built-in bridge */ + else if (PCI_SLOT(dev->bus->self->devfn) == 8) { + slot = PCI_SLOT(dev->devfn) + 15; /* WAG! */ + } + else + { + /* Must be a card-based bridge. */ + do { + if (PCI_SLOT(dev->bus->self->devfn) == 8) { + slot = PCI_SLOT(dev->devfn) + 15; + break; + } + pin = pci_swizzle_interrupt_pin(dev, pin); + + /* Move up the chain of bridges. */ + dev = dev->bus->self; + /* Slot of the next bridge. */ + slot = PCI_SLOT(dev->devfn); + } while (dev->bus->self); + } + *pinp = pin; + return slot; +} + +#if defined(CONFIG_ALPHA_GENERIC) || !defined(CONFIG_ALPHA_PRIMO) +static void +noritake_apecs_machine_check(unsigned long vector, unsigned long la_ptr) +{ +#define MCHK_NO_DEVSEL 0x205U +#define MCHK_NO_TABT 0x204U + + struct el_common *mchk_header; + unsigned int code; + + mchk_header = (struct el_common *)la_ptr; + + /* Clear the error before any reporting. */ + mb(); + mb(); /* magic */ + draina(); + apecs_pci_clr_err(); + wrmces(0x7); + mb(); + + code = mchk_header->code; + process_mcheck_info(vector, la_ptr, "NORITAKE APECS", + (mcheck_expected(0) + && (code == MCHK_NO_DEVSEL + || code == MCHK_NO_TABT))); +} +#endif + + +/* + * The System Vectors + */ + +#if defined(CONFIG_ALPHA_GENERIC) || !defined(CONFIG_ALPHA_PRIMO) +struct alpha_machine_vector noritake_mv __initmv = { + .vector_name = "Noritake", + DO_EV4_MMU, + DO_DEFAULT_RTC, + DO_APECS_IO, + .machine_check = noritake_apecs_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = EISA_DEFAULT_IO_BASE, + .min_mem_address = APECS_AND_LCA_DEFAULT_MEM_BASE, + + .nr_irqs = 48, + .device_interrupt = noritake_device_interrupt, + + .init_arch = apecs_init_arch, + .init_irq = noritake_init_irq, + .init_rtc = common_init_rtc, + .init_pci = common_init_pci, + .pci_map_irq = noritake_map_irq, + .pci_swizzle = noritake_swizzle, +}; +ALIAS_MV(noritake) +#endif + +#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_PRIMO) +struct alpha_machine_vector noritake_primo_mv __initmv = { + .vector_name = "Noritake-Primo", + DO_EV5_MMU, + DO_DEFAULT_RTC, + DO_CIA_IO, + .machine_check = cia_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = EISA_DEFAULT_IO_BASE, + .min_mem_address = CIA_DEFAULT_MEM_BASE, + + .nr_irqs = 48, + .device_interrupt = noritake_device_interrupt, + + .init_arch = cia_init_arch, + .init_irq = noritake_init_irq, + .init_rtc = common_init_rtc, + .init_pci = cia_init_pci, + .kill_arch = cia_kill_arch, + .pci_map_irq = noritake_map_irq, + .pci_swizzle = noritake_swizzle, +}; +ALIAS_MV(noritake_primo) +#endif diff --git a/arch/alpha/kernel/sys_rawhide.c b/arch/alpha/kernel/sys_rawhide.c new file mode 100644 index 00000000..dfd510ae --- /dev/null +++ b/arch/alpha/kernel/sys_rawhide.c @@ -0,0 +1,271 @@ +/* + * linux/arch/alpha/kernel/sys_rawhide.c + * + * Copyright (C) 1995 David A Rusling + * Copyright (C) 1996 Jay A Estabrook + * Copyright (C) 1998, 1999 Richard Henderson + * + * Code supporting the RAWHIDE. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/pci.h> +#include <linux/init.h> + +#include <asm/ptrace.h> +#include <asm/dma.h> +#include <asm/irq.h> +#include <asm/mmu_context.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/core_mcpcia.h> +#include <asm/tlbflush.h> + +#include "proto.h" +#include "irq_impl.h" +#include "pci_impl.h" +#include "machvec_impl.h" + + +/* + * HACK ALERT! only the boot cpu is used for interrupts. + */ + + +/* Note mask bit is true for ENABLED irqs. */ + +static unsigned int hose_irq_masks[4] = { + 0xff0000, 0xfe0000, 0xff0000, 0xff0000 +}; +static unsigned int cached_irq_masks[4]; +DEFINE_SPINLOCK(rawhide_irq_lock); + +static inline void +rawhide_update_irq_hw(int hose, int mask) +{ + *(vuip)MCPCIA_INT_MASK0(MCPCIA_HOSE2MID(hose)) = mask; + mb(); + *(vuip)MCPCIA_INT_MASK0(MCPCIA_HOSE2MID(hose)); +} + +#define hose_exists(h) \ + (((h) < MCPCIA_MAX_HOSES) && (cached_irq_masks[(h)] != 0)) + +static inline void +rawhide_enable_irq(struct irq_data *d) +{ + unsigned int mask, hose; + unsigned int irq = d->irq; + + irq -= 16; + hose = irq / 24; + if (!hose_exists(hose)) /* if hose non-existent, exit */ + return; + + irq -= hose * 24; + mask = 1 << irq; + + spin_lock(&rawhide_irq_lock); + mask |= cached_irq_masks[hose]; + cached_irq_masks[hose] = mask; + rawhide_update_irq_hw(hose, mask); + spin_unlock(&rawhide_irq_lock); +} + +static void +rawhide_disable_irq(struct irq_data *d) +{ + unsigned int mask, hose; + unsigned int irq = d->irq; + + irq -= 16; + hose = irq / 24; + if (!hose_exists(hose)) /* if hose non-existent, exit */ + return; + + irq -= hose * 24; + mask = ~(1 << irq) | hose_irq_masks[hose]; + + spin_lock(&rawhide_irq_lock); + mask &= cached_irq_masks[hose]; + cached_irq_masks[hose] = mask; + rawhide_update_irq_hw(hose, mask); + spin_unlock(&rawhide_irq_lock); +} + +static void +rawhide_mask_and_ack_irq(struct irq_data *d) +{ + unsigned int mask, mask1, hose; + unsigned int irq = d->irq; + + irq -= 16; + hose = irq / 24; + if (!hose_exists(hose)) /* if hose non-existent, exit */ + return; + + irq -= hose * 24; + mask1 = 1 << irq; + mask = ~mask1 | hose_irq_masks[hose]; + + spin_lock(&rawhide_irq_lock); + + mask &= cached_irq_masks[hose]; + cached_irq_masks[hose] = mask; + rawhide_update_irq_hw(hose, mask); + + /* Clear the interrupt. */ + *(vuip)MCPCIA_INT_REQ(MCPCIA_HOSE2MID(hose)) = mask1; + + spin_unlock(&rawhide_irq_lock); +} + +static struct irq_chip rawhide_irq_type = { + .name = "RAWHIDE", + .irq_unmask = rawhide_enable_irq, + .irq_mask = rawhide_disable_irq, + .irq_mask_ack = rawhide_mask_and_ack_irq, +}; + +static void +rawhide_srm_device_interrupt(unsigned long vector) +{ + int irq; + + irq = (vector - 0x800) >> 4; + + /* + * The RAWHIDE SRM console reports PCI interrupts with a vector + * 0x80 *higher* than one might expect, as PCI IRQ 0 (ie bit 0) + * shows up as IRQ 24, etc, etc. We adjust it down by 8 to have + * it line up with the actual bit numbers from the REQ registers, + * which is how we manage the interrupts/mask. Sigh... + * + * Also, PCI #1 interrupts are offset some more... :-( + */ + + if (irq == 52) { + /* SCSI on PCI1 is special. */ + irq = 72; + } + + /* Adjust by which hose it is from. */ + irq -= ((irq + 16) >> 2) & 0x38; + + handle_irq(irq); +} + +static void __init +rawhide_init_irq(void) +{ + struct pci_controller *hose; + long i; + + mcpcia_init_hoses(); + + /* Clear them all; only hoses that exist will be non-zero. */ + for (i = 0; i < MCPCIA_MAX_HOSES; i++) cached_irq_masks[i] = 0; + + for (hose = hose_head; hose; hose = hose->next) { + unsigned int h = hose->index; + unsigned int mask = hose_irq_masks[h]; + + cached_irq_masks[h] = mask; + *(vuip)MCPCIA_INT_MASK0(MCPCIA_HOSE2MID(h)) = mask; + *(vuip)MCPCIA_INT_MASK1(MCPCIA_HOSE2MID(h)) = 0; + } + + for (i = 16; i < 128; ++i) { + irq_set_chip_and_handler(i, &rawhide_irq_type, + handle_level_irq); + irq_set_status_flags(i, IRQ_LEVEL); + } + + init_i8259a_irqs(); + common_init_isa_dma(); +} + +/* + * PCI Fixup configuration. + * + * Summary @ MCPCIA_PCI0_INT_REQ: + * Bit Meaning + * 0 Interrupt Line A from slot 2 PCI0 + * 1 Interrupt Line B from slot 2 PCI0 + * 2 Interrupt Line C from slot 2 PCI0 + * 3 Interrupt Line D from slot 2 PCI0 + * 4 Interrupt Line A from slot 3 PCI0 + * 5 Interrupt Line B from slot 3 PCI0 + * 6 Interrupt Line C from slot 3 PCI0 + * 7 Interrupt Line D from slot 3 PCI0 + * 8 Interrupt Line A from slot 4 PCI0 + * 9 Interrupt Line B from slot 4 PCI0 + * 10 Interrupt Line C from slot 4 PCI0 + * 11 Interrupt Line D from slot 4 PCI0 + * 12 Interrupt Line A from slot 5 PCI0 + * 13 Interrupt Line B from slot 5 PCI0 + * 14 Interrupt Line C from slot 5 PCI0 + * 15 Interrupt Line D from slot 5 PCI0 + * 16 EISA interrupt (PCI 0) or SCSI interrupt (PCI 1) + * 17-23 NA + * + * IdSel + * 1 EISA bridge (PCI bus 0 only) + * 2 PCI option slot 2 + * 3 PCI option slot 3 + * 4 PCI option slot 4 + * 5 PCI option slot 5 + * + */ + +static int __init +rawhide_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + static char irq_tab[5][5] __initdata = { + /*INT INTA INTB INTC INTD */ + { 16+16, 16+16, 16+16, 16+16, 16+16}, /* IdSel 1 SCSI PCI 1 */ + { 16+ 0, 16+ 0, 16+ 1, 16+ 2, 16+ 3}, /* IdSel 2 slot 2 */ + { 16+ 4, 16+ 4, 16+ 5, 16+ 6, 16+ 7}, /* IdSel 3 slot 3 */ + { 16+ 8, 16+ 8, 16+ 9, 16+10, 16+11}, /* IdSel 4 slot 4 */ + { 16+12, 16+12, 16+13, 16+14, 16+15} /* IdSel 5 slot 5 */ + }; + const long min_idsel = 1, max_idsel = 5, irqs_per_slot = 5; + + struct pci_controller *hose = dev->sysdata; + int irq = COMMON_TABLE_LOOKUP; + if (irq >= 0) + irq += 24 * hose->index; + return irq; +} + + +/* + * The System Vector + */ + +struct alpha_machine_vector rawhide_mv __initmv = { + .vector_name = "Rawhide", + DO_EV5_MMU, + DO_DEFAULT_RTC, + DO_MCPCIA_IO, + .machine_check = mcpcia_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = DEFAULT_IO_BASE, + .min_mem_address = MCPCIA_DEFAULT_MEM_BASE, + .pci_dac_offset = MCPCIA_DAC_OFFSET, + + .nr_irqs = 128, + .device_interrupt = rawhide_srm_device_interrupt, + + .init_arch = mcpcia_init_arch, + .init_irq = rawhide_init_irq, + .init_rtc = common_init_rtc, + .init_pci = common_init_pci, + .kill_arch = NULL, + .pci_map_irq = rawhide_map_irq, + .pci_swizzle = common_swizzle, +}; +ALIAS_MV(rawhide) diff --git a/arch/alpha/kernel/sys_ruffian.c b/arch/alpha/kernel/sys_ruffian.c new file mode 100644 index 00000000..a3f48525 --- /dev/null +++ b/arch/alpha/kernel/sys_ruffian.c @@ -0,0 +1,239 @@ +/* + * linux/arch/alpha/kernel/sys_ruffian.c + * + * Copyright (C) 1995 David A Rusling + * Copyright (C) 1996 Jay A Estabrook + * Copyright (C) 1998, 1999, 2000 Richard Henderson + * + * Code supporting the RUFFIAN. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/pci.h> +#include <linux/ioport.h> +#include <linux/timex.h> +#include <linux/init.h> + +#include <asm/ptrace.h> +#include <asm/dma.h> +#include <asm/irq.h> +#include <asm/mmu_context.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/core_cia.h> +#include <asm/tlbflush.h> + +#include "proto.h" +#include "irq_impl.h" +#include "pci_impl.h" +#include "machvec_impl.h" + + +static void __init +ruffian_init_irq(void) +{ + /* Invert 6&7 for i82371 */ + *(vulp)PYXIS_INT_HILO = 0x000000c0UL; mb(); + *(vulp)PYXIS_INT_CNFG = 0x00002064UL; mb(); /* all clear */ + + outb(0x11,0xA0); + outb(0x08,0xA1); + outb(0x02,0xA1); + outb(0x01,0xA1); + outb(0xFF,0xA1); + + outb(0x11,0x20); + outb(0x00,0x21); + outb(0x04,0x21); + outb(0x01,0x21); + outb(0xFF,0x21); + + /* Finish writing the 82C59A PIC Operation Control Words */ + outb(0x20,0xA0); + outb(0x20,0x20); + + init_i8259a_irqs(); + + /* Not interested in the bogus interrupts (0,3,6), + NMI (1), HALT (2), flash (5), or 21142 (8). */ + init_pyxis_irqs(0x16f0000); + + common_init_isa_dma(); +} + +#define RUFFIAN_LATCH DIV_ROUND_CLOSEST(PIT_TICK_RATE, HZ) + +static void __init +ruffian_init_rtc(void) +{ + /* Ruffian does not have the RTC connected to the CPU timer + interrupt. Instead, it uses the PIT connected to IRQ 0. */ + + /* Setup interval timer. */ + outb(0x34, 0x43); /* binary, mode 2, LSB/MSB, ch 0 */ + outb(RUFFIAN_LATCH & 0xff, 0x40); /* LSB */ + outb(RUFFIAN_LATCH >> 8, 0x40); /* MSB */ + + outb(0xb6, 0x43); /* pit counter 2: speaker */ + outb(0x31, 0x42); + outb(0x13, 0x42); + + setup_irq(0, &timer_irqaction); +} + +static void +ruffian_kill_arch (int mode) +{ + cia_kill_arch(mode); +#if 0 + /* This only causes re-entry to ARCSBIOS */ + /* Perhaps this works for other PYXIS as well? */ + *(vuip) PYXIS_RESET = 0x0000dead; + mb(); +#endif +} + +/* + * Interrupt routing: + * + * Primary bus + * IdSel INTA INTB INTC INTD + * 21052 13 - - - - + * SIO 14 23 - - - + * 21143 15 44 - - - + * Slot 0 17 43 42 41 40 + * + * Secondary bus + * IdSel INTA INTB INTC INTD + * Slot 0 8 (18) 19 18 17 16 + * Slot 1 9 (19) 31 30 29 28 + * Slot 2 10 (20) 27 26 25 24 + * Slot 3 11 (21) 39 38 37 36 + * Slot 4 12 (22) 35 34 33 32 + * 53c875 13 (23) 20 - - - + * + */ + +static int __init +ruffian_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + static char irq_tab[11][5] __initdata = { + /*INT INTA INTB INTC INTD */ + {-1, -1, -1, -1, -1}, /* IdSel 13, 21052 */ + {-1, -1, -1, -1, -1}, /* IdSel 14, SIO */ + {44, 44, 44, 44, 44}, /* IdSel 15, 21143 */ + {-1, -1, -1, -1, -1}, /* IdSel 16, none */ + {43, 43, 42, 41, 40}, /* IdSel 17, 64-bit slot */ + /* the next 6 are actually on PCI bus 1, across the bridge */ + {19, 19, 18, 17, 16}, /* IdSel 8, slot 0 */ + {31, 31, 30, 29, 28}, /* IdSel 9, slot 1 */ + {27, 27, 26, 25, 24}, /* IdSel 10, slot 2 */ + {39, 39, 38, 37, 36}, /* IdSel 11, slot 3 */ + {35, 35, 34, 33, 32}, /* IdSel 12, slot 4 */ + {20, 20, 20, 20, 20}, /* IdSel 13, 53c875 */ + }; + const long min_idsel = 13, max_idsel = 23, irqs_per_slot = 5; + return COMMON_TABLE_LOOKUP; +} + +static u8 __init +ruffian_swizzle(struct pci_dev *dev, u8 *pinp) +{ + int slot, pin = *pinp; + + if (dev->bus->number == 0) { + slot = PCI_SLOT(dev->devfn); + } + /* Check for the built-in bridge. */ + else if (PCI_SLOT(dev->bus->self->devfn) == 13) { + slot = PCI_SLOT(dev->devfn) + 10; + } + else + { + /* Must be a card-based bridge. */ + do { + if (PCI_SLOT(dev->bus->self->devfn) == 13) { + slot = PCI_SLOT(dev->devfn) + 10; + break; + } + pin = pci_swizzle_interrupt_pin(dev, pin); + + /* Move up the chain of bridges. */ + dev = dev->bus->self; + /* Slot of the next bridge. */ + slot = PCI_SLOT(dev->devfn); + } while (dev->bus->self); + } + *pinp = pin; + return slot; +} + +#ifdef BUILDING_FOR_MILO +/* + * The DeskStation Ruffian motherboard firmware does not place + * the memory size in the PALimpure area. Therefore, we use + * the Bank Configuration Registers in PYXIS to obtain the size. + */ +static unsigned long __init +ruffian_get_bank_size(unsigned long offset) +{ + unsigned long bank_addr, bank, ret = 0; + + /* Valid offsets are: 0x800, 0x840 and 0x880 + since Ruffian only uses three banks. */ + bank_addr = (unsigned long)PYXIS_MCR + offset; + bank = *(vulp)bank_addr; + + /* Check BANK_ENABLE */ + if (bank & 0x01) { + static unsigned long size[] __initdata = { + 0x40000000UL, /* 0x00, 1G */ + 0x20000000UL, /* 0x02, 512M */ + 0x10000000UL, /* 0x04, 256M */ + 0x08000000UL, /* 0x06, 128M */ + 0x04000000UL, /* 0x08, 64M */ + 0x02000000UL, /* 0x0a, 32M */ + 0x01000000UL, /* 0x0c, 16M */ + 0x00800000UL, /* 0x0e, 8M */ + 0x80000000UL, /* 0x10, 2G */ + }; + + bank = (bank & 0x1e) >> 1; + if (bank < ARRAY_SIZE(size)) + ret = size[bank]; + } + + return ret; +} +#endif /* BUILDING_FOR_MILO */ + +/* + * The System Vector + */ + +struct alpha_machine_vector ruffian_mv __initmv = { + .vector_name = "Ruffian", + DO_EV5_MMU, + DO_DEFAULT_RTC, + DO_PYXIS_IO, + .machine_check = cia_machine_check, + .max_isa_dma_address = ALPHA_RUFFIAN_MAX_ISA_DMA_ADDRESS, + .min_io_address = DEFAULT_IO_BASE, + .min_mem_address = DEFAULT_MEM_BASE, + .pci_dac_offset = PYXIS_DAC_OFFSET, + + .nr_irqs = 48, + .device_interrupt = pyxis_device_interrupt, + + .init_arch = pyxis_init_arch, + .init_irq = ruffian_init_irq, + .init_rtc = ruffian_init_rtc, + .init_pci = cia_init_pci, + .kill_arch = ruffian_kill_arch, + .pci_map_irq = ruffian_map_irq, + .pci_swizzle = ruffian_swizzle, +}; +ALIAS_MV(ruffian) diff --git a/arch/alpha/kernel/sys_rx164.c b/arch/alpha/kernel/sys_rx164.c new file mode 100644 index 00000000..08ee737d --- /dev/null +++ b/arch/alpha/kernel/sys_rx164.c @@ -0,0 +1,202 @@ +/* + * linux/arch/alpha/kernel/sys_rx164.c + * + * Copyright (C) 1995 David A Rusling + * Copyright (C) 1996 Jay A Estabrook + * Copyright (C) 1998, 1999 Richard Henderson + * + * Code supporting the RX164 (PCA56+POLARIS). + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/bitops.h> + +#include <asm/ptrace.h> +#include <asm/dma.h> +#include <asm/irq.h> +#include <asm/mmu_context.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/core_polaris.h> +#include <asm/tlbflush.h> + +#include "proto.h" +#include "irq_impl.h" +#include "pci_impl.h" +#include "machvec_impl.h" + + +/* Note mask bit is true for ENABLED irqs. */ +static unsigned long cached_irq_mask; + +static inline void +rx164_update_irq_hw(unsigned long mask) +{ + volatile unsigned int *irq_mask; + + irq_mask = (void *)(POLARIS_DENSE_CONFIG_BASE + 0x74); + *irq_mask = mask; + mb(); + *irq_mask; +} + +static inline void +rx164_enable_irq(struct irq_data *d) +{ + rx164_update_irq_hw(cached_irq_mask |= 1UL << (d->irq - 16)); +} + +static void +rx164_disable_irq(struct irq_data *d) +{ + rx164_update_irq_hw(cached_irq_mask &= ~(1UL << (d->irq - 16))); +} + +static struct irq_chip rx164_irq_type = { + .name = "RX164", + .irq_unmask = rx164_enable_irq, + .irq_mask = rx164_disable_irq, + .irq_mask_ack = rx164_disable_irq, +}; + +static void +rx164_device_interrupt(unsigned long vector) +{ + unsigned long pld; + volatile unsigned int *dirr; + long i; + + /* Read the interrupt summary register. On Polaris, this is + the DIRR register in PCI config space (offset 0x84). */ + dirr = (void *)(POLARIS_DENSE_CONFIG_BASE + 0x84); + pld = *dirr; + + /* + * Now for every possible bit set, work through them and call + * the appropriate interrupt handler. + */ + while (pld) { + i = ffz(~pld); + pld &= pld - 1; /* clear least bit set */ + if (i == 20) { + isa_no_iack_sc_device_interrupt(vector); + } else { + handle_irq(16+i); + } + } +} + +static void __init +rx164_init_irq(void) +{ + long i; + + rx164_update_irq_hw(0); + for (i = 16; i < 40; ++i) { + irq_set_chip_and_handler(i, &rx164_irq_type, handle_level_irq); + irq_set_status_flags(i, IRQ_LEVEL); + } + + init_i8259a_irqs(); + common_init_isa_dma(); + + setup_irq(16+20, &isa_cascade_irqaction); +} + + +/* + * The RX164 changed its interrupt routing between pass1 and pass2... + * + * PASS1: + * + * Slot IDSEL INTA INTB INTC INTD + * 0 6 5 10 15 20 + * 1 7 4 9 14 19 + * 2 5 3 8 13 18 + * 3 9 2 7 12 17 + * 4 10 1 6 11 16 + * + * PASS2: + * Slot IDSEL INTA INTB INTC INTD + * 0 5 1 7 12 17 + * 1 6 2 8 13 18 + * 2 8 3 9 14 19 + * 3 9 4 10 15 20 + * 4 10 5 11 16 6 + * + */ + +/* + * IdSel + * 5 32 bit PCI option slot 0 + * 6 64 bit PCI option slot 1 + * 7 PCI-ISA bridge + * 7 64 bit PCI option slot 2 + * 9 32 bit PCI option slot 3 + * 10 PCI-PCI bridge + * + */ + +static int __init +rx164_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ +#if 0 + static char irq_tab_pass1[6][5] __initdata = { + /*INT INTA INTB INTC INTD */ + { 16+3, 16+3, 16+8, 16+13, 16+18}, /* IdSel 5, slot 2 */ + { 16+5, 16+5, 16+10, 16+15, 16+20}, /* IdSel 6, slot 0 */ + { 16+4, 16+4, 16+9, 16+14, 16+19}, /* IdSel 7, slot 1 */ + { -1, -1, -1, -1, -1}, /* IdSel 8, PCI/ISA bridge */ + { 16+2, 16+2, 16+7, 16+12, 16+17}, /* IdSel 9, slot 3 */ + { 16+1, 16+1, 16+6, 16+11, 16+16}, /* IdSel 10, slot 4 */ + }; +#else + static char irq_tab[6][5] __initdata = { + /*INT INTA INTB INTC INTD */ + { 16+0, 16+0, 16+6, 16+11, 16+16}, /* IdSel 5, slot 0 */ + { 16+1, 16+1, 16+7, 16+12, 16+17}, /* IdSel 6, slot 1 */ + { -1, -1, -1, -1, -1}, /* IdSel 7, PCI/ISA bridge */ + { 16+2, 16+2, 16+8, 16+13, 16+18}, /* IdSel 8, slot 2 */ + { 16+3, 16+3, 16+9, 16+14, 16+19}, /* IdSel 9, slot 3 */ + { 16+4, 16+4, 16+10, 16+15, 16+5}, /* IdSel 10, PCI-PCI */ + }; +#endif + const long min_idsel = 5, max_idsel = 10, irqs_per_slot = 5; + + /* JRP - Need to figure out how to distinguish pass1 from pass2, + and use the correct table. */ + return COMMON_TABLE_LOOKUP; +} + + +/* + * The System Vector + */ + +struct alpha_machine_vector rx164_mv __initmv = { + .vector_name = "RX164", + DO_EV5_MMU, + DO_DEFAULT_RTC, + DO_POLARIS_IO, + .machine_check = polaris_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = DEFAULT_IO_BASE, + .min_mem_address = DEFAULT_MEM_BASE, + + .nr_irqs = 40, + .device_interrupt = rx164_device_interrupt, + + .init_arch = polaris_init_arch, + .init_irq = rx164_init_irq, + .init_rtc = common_init_rtc, + .init_pci = common_init_pci, + .kill_arch = NULL, + .pci_map_irq = rx164_map_irq, + .pci_swizzle = common_swizzle, +}; +ALIAS_MV(rx164) diff --git a/arch/alpha/kernel/sys_sable.c b/arch/alpha/kernel/sys_sable.c new file mode 100644 index 00000000..8a0aa6d6 --- /dev/null +++ b/arch/alpha/kernel/sys_sable.c @@ -0,0 +1,635 @@ +/* + * linux/arch/alpha/kernel/sys_sable.c + * + * Copyright (C) 1995 David A Rusling + * Copyright (C) 1996 Jay A Estabrook + * Copyright (C) 1998, 1999 Richard Henderson + * + * Code supporting the Sable, Sable-Gamma, and Lynx systems. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/pci.h> +#include <linux/init.h> + +#include <asm/ptrace.h> +#include <asm/dma.h> +#include <asm/irq.h> +#include <asm/mmu_context.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/core_t2.h> +#include <asm/tlbflush.h> + +#include "proto.h" +#include "irq_impl.h" +#include "pci_impl.h" +#include "machvec_impl.h" + +DEFINE_SPINLOCK(sable_lynx_irq_lock); + +typedef struct irq_swizzle_struct +{ + char irq_to_mask[64]; + char mask_to_irq[64]; + + /* Note mask bit is true for DISABLED irqs. */ + unsigned long shadow_mask; + + void (*update_irq_hw)(unsigned long bit, unsigned long mask); + void (*ack_irq_hw)(unsigned long bit); + +} irq_swizzle_t; + +static irq_swizzle_t *sable_lynx_irq_swizzle; + +static void sable_lynx_init_irq(int nr_of_irqs); + +#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_SABLE) + +/***********************************************************************/ +/* + * For SABLE, which is really baroque, we manage 40 IRQ's, but the + * hardware really only supports 24, not via normal ISA PIC, + * but cascaded custom 8259's, etc. + * 0-7 (char at 536) + * 8-15 (char at 53a) + * 16-23 (char at 53c) + * + * Summary Registers (536/53a/53c): + * + * Bit Meaning Kernel IRQ + *------------------------------------------ + * 0 PCI slot 0 34 + * 1 NCR810 (builtin) 33 + * 2 TULIP (builtin) 32 + * 3 mouse 12 + * 4 PCI slot 1 35 + * 5 PCI slot 2 36 + * 6 keyboard 1 + * 7 floppy 6 + * 8 COM2 3 + * 9 parallel port 7 + *10 EISA irq 3 - + *11 EISA irq 4 - + *12 EISA irq 5 5 + *13 EISA irq 6 - + *14 EISA irq 7 - + *15 COM1 4 + *16 EISA irq 9 9 + *17 EISA irq 10 10 + *18 EISA irq 11 11 + *19 EISA irq 12 - + *20 EISA irq 13 - + *21 EISA irq 14 14 + *22 NC 15 + *23 IIC - + */ + +static void +sable_update_irq_hw(unsigned long bit, unsigned long mask) +{ + int port = 0x537; + + if (bit >= 16) { + port = 0x53d; + mask >>= 16; + } else if (bit >= 8) { + port = 0x53b; + mask >>= 8; + } + + outb(mask, port); +} + +static void +sable_ack_irq_hw(unsigned long bit) +{ + int port, val1, val2; + + if (bit >= 16) { + port = 0x53c; + val1 = 0xE0 | (bit - 16); + val2 = 0xE0 | 4; + } else if (bit >= 8) { + port = 0x53a; + val1 = 0xE0 | (bit - 8); + val2 = 0xE0 | 3; + } else { + port = 0x536; + val1 = 0xE0 | (bit - 0); + val2 = 0xE0 | 1; + } + + outb(val1, port); /* ack the slave */ + outb(val2, 0x534); /* ack the master */ +} + +static irq_swizzle_t sable_irq_swizzle = { + { + -1, 6, -1, 8, 15, 12, 7, 9, /* pseudo PIC 0-7 */ + -1, 16, 17, 18, 3, -1, 21, 22, /* pseudo PIC 8-15 */ + -1, -1, -1, -1, -1, -1, -1, -1, /* pseudo EISA 0-7 */ + -1, -1, -1, -1, -1, -1, -1, -1, /* pseudo EISA 8-15 */ + 2, 1, 0, 4, 5, -1, -1, -1, /* pseudo PCI */ + -1, -1, -1, -1, -1, -1, -1, -1, /* */ + -1, -1, -1, -1, -1, -1, -1, -1, /* */ + -1, -1, -1, -1, -1, -1, -1, -1 /* */ + }, + { + 34, 33, 32, 12, 35, 36, 1, 6, /* mask 0-7 */ + 3, 7, -1, -1, 5, -1, -1, 4, /* mask 8-15 */ + 9, 10, 11, -1, -1, 14, 15, -1, /* mask 16-23 */ + -1, -1, -1, -1, -1, -1, -1, -1, /* */ + -1, -1, -1, -1, -1, -1, -1, -1, /* */ + -1, -1, -1, -1, -1, -1, -1, -1, /* */ + -1, -1, -1, -1, -1, -1, -1, -1, /* */ + -1, -1, -1, -1, -1, -1, -1, -1 /* */ + }, + -1, + sable_update_irq_hw, + sable_ack_irq_hw +}; + +static void __init +sable_init_irq(void) +{ + outb(-1, 0x537); /* slave 0 */ + outb(-1, 0x53b); /* slave 1 */ + outb(-1, 0x53d); /* slave 2 */ + outb(0x44, 0x535); /* enable cascades in master */ + + sable_lynx_irq_swizzle = &sable_irq_swizzle; + sable_lynx_init_irq(40); +} + +/* + * PCI Fixup configuration for ALPHA SABLE (2100). + * + * The device to slot mapping looks like: + * + * Slot Device + * 0 TULIP + * 1 SCSI + * 2 PCI-EISA bridge + * 3 none + * 4 none + * 5 none + * 6 PCI on board slot 0 + * 7 PCI on board slot 1 + * 8 PCI on board slot 2 + * + * + * This two layered interrupt approach means that we allocate IRQ 16 and + * above for PCI interrupts. The IRQ relates to which bit the interrupt + * comes in on. This makes interrupt processing much easier. + */ +/* + * NOTE: the IRQ assignments below are arbitrary, but need to be consistent + * with the values in the irq swizzling tables above. + */ + +static int __init +sable_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + static char irq_tab[9][5] __initdata = { + /*INT INTA INTB INTC INTD */ + { 32+0, 32+0, 32+0, 32+0, 32+0}, /* IdSel 0, TULIP */ + { 32+1, 32+1, 32+1, 32+1, 32+1}, /* IdSel 1, SCSI */ + { -1, -1, -1, -1, -1}, /* IdSel 2, SIO */ + { -1, -1, -1, -1, -1}, /* IdSel 3, none */ + { -1, -1, -1, -1, -1}, /* IdSel 4, none */ + { -1, -1, -1, -1, -1}, /* IdSel 5, none */ + { 32+2, 32+2, 32+2, 32+2, 32+2}, /* IdSel 6, slot 0 */ + { 32+3, 32+3, 32+3, 32+3, 32+3}, /* IdSel 7, slot 1 */ + { 32+4, 32+4, 32+4, 32+4, 32+4} /* IdSel 8, slot 2 */ + }; + long min_idsel = 0, max_idsel = 8, irqs_per_slot = 5; + return COMMON_TABLE_LOOKUP; +} +#endif /* defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_SABLE) */ + +#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_LYNX) + +/***********************************************************************/ +/* LYNX hardware specifics + */ +/* + * For LYNX, which is also baroque, we manage 64 IRQs, via a custom IC. + * + * Bit Meaning Kernel IRQ + *------------------------------------------ + * 0 + * 1 + * 2 + * 3 mouse 12 + * 4 + * 5 + * 6 keyboard 1 + * 7 floppy 6 + * 8 COM2 3 + * 9 parallel port 7 + *10 EISA irq 3 - + *11 EISA irq 4 - + *12 EISA irq 5 5 + *13 EISA irq 6 - + *14 EISA irq 7 - + *15 COM1 4 + *16 EISA irq 9 9 + *17 EISA irq 10 10 + *18 EISA irq 11 11 + *19 EISA irq 12 - + *20 + *21 EISA irq 14 14 + *22 EISA irq 15 15 + *23 IIC - + *24 VGA (builtin) - + *25 + *26 + *27 + *28 NCR810 (builtin) 28 + *29 + *30 + *31 + *32 PCI 0 slot 4 A primary bus 32 + *33 PCI 0 slot 4 B primary bus 33 + *34 PCI 0 slot 4 C primary bus 34 + *35 PCI 0 slot 4 D primary bus + *36 PCI 0 slot 5 A primary bus + *37 PCI 0 slot 5 B primary bus + *38 PCI 0 slot 5 C primary bus + *39 PCI 0 slot 5 D primary bus + *40 PCI 0 slot 6 A primary bus + *41 PCI 0 slot 6 B primary bus + *42 PCI 0 slot 6 C primary bus + *43 PCI 0 slot 6 D primary bus + *44 PCI 0 slot 7 A primary bus + *45 PCI 0 slot 7 B primary bus + *46 PCI 0 slot 7 C primary bus + *47 PCI 0 slot 7 D primary bus + *48 PCI 0 slot 0 A secondary bus + *49 PCI 0 slot 0 B secondary bus + *50 PCI 0 slot 0 C secondary bus + *51 PCI 0 slot 0 D secondary bus + *52 PCI 0 slot 1 A secondary bus + *53 PCI 0 slot 1 B secondary bus + *54 PCI 0 slot 1 C secondary bus + *55 PCI 0 slot 1 D secondary bus + *56 PCI 0 slot 2 A secondary bus + *57 PCI 0 slot 2 B secondary bus + *58 PCI 0 slot 2 C secondary bus + *59 PCI 0 slot 2 D secondary bus + *60 PCI 0 slot 3 A secondary bus + *61 PCI 0 slot 3 B secondary bus + *62 PCI 0 slot 3 C secondary bus + *63 PCI 0 slot 3 D secondary bus + */ + +static void +lynx_update_irq_hw(unsigned long bit, unsigned long mask) +{ + /* + * Write the AIR register on the T3/T4 with the + * address of the IC mask register (offset 0x40) + */ + *(vulp)T2_AIR = 0x40; + mb(); + *(vulp)T2_AIR; /* re-read to force write */ + mb(); + *(vulp)T2_DIR = mask; + mb(); + mb(); +} + +static void +lynx_ack_irq_hw(unsigned long bit) +{ + *(vulp)T2_VAR = (u_long) bit; + mb(); + mb(); +} + +static irq_swizzle_t lynx_irq_swizzle = { + { /* irq_to_mask */ + -1, 6, -1, 8, 15, 12, 7, 9, /* pseudo PIC 0-7 */ + -1, 16, 17, 18, 3, -1, 21, 22, /* pseudo PIC 8-15 */ + -1, -1, -1, -1, -1, -1, -1, -1, /* pseudo */ + -1, -1, -1, -1, 28, -1, -1, -1, /* pseudo */ + 32, 33, 34, 35, 36, 37, 38, 39, /* mask 32-39 */ + 40, 41, 42, 43, 44, 45, 46, 47, /* mask 40-47 */ + 48, 49, 50, 51, 52, 53, 54, 55, /* mask 48-55 */ + 56, 57, 58, 59, 60, 61, 62, 63 /* mask 56-63 */ + }, + { /* mask_to_irq */ + -1, -1, -1, 12, -1, -1, 1, 6, /* mask 0-7 */ + 3, 7, -1, -1, 5, -1, -1, 4, /* mask 8-15 */ + 9, 10, 11, -1, -1, 14, 15, -1, /* mask 16-23 */ + -1, -1, -1, -1, 28, -1, -1, -1, /* mask 24-31 */ + 32, 33, 34, 35, 36, 37, 38, 39, /* mask 32-39 */ + 40, 41, 42, 43, 44, 45, 46, 47, /* mask 40-47 */ + 48, 49, 50, 51, 52, 53, 54, 55, /* mask 48-55 */ + 56, 57, 58, 59, 60, 61, 62, 63 /* mask 56-63 */ + }, + -1, + lynx_update_irq_hw, + lynx_ack_irq_hw +}; + +static void __init +lynx_init_irq(void) +{ + sable_lynx_irq_swizzle = &lynx_irq_swizzle; + sable_lynx_init_irq(64); +} + +/* + * PCI Fixup configuration for ALPHA LYNX (2100A) + * + * The device to slot mapping looks like: + * + * Slot Device + * 0 none + * 1 none + * 2 PCI-EISA bridge + * 3 PCI-PCI bridge + * 4 NCR 810 (Demi-Lynx only) + * 5 none + * 6 PCI on board slot 4 + * 7 PCI on board slot 5 + * 8 PCI on board slot 6 + * 9 PCI on board slot 7 + * + * And behind the PPB we have: + * + * 11 PCI on board slot 0 + * 12 PCI on board slot 1 + * 13 PCI on board slot 2 + * 14 PCI on board slot 3 + */ +/* + * NOTE: the IRQ assignments below are arbitrary, but need to be consistent + * with the values in the irq swizzling tables above. + */ + +static int __init +lynx_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + static char irq_tab[19][5] __initdata = { + /*INT INTA INTB INTC INTD */ + { -1, -1, -1, -1, -1}, /* IdSel 13, PCEB */ + { -1, -1, -1, -1, -1}, /* IdSel 14, PPB */ + { 28, 28, 28, 28, 28}, /* IdSel 15, NCR demi */ + { -1, -1, -1, -1, -1}, /* IdSel 16, none */ + { 32, 32, 33, 34, 35}, /* IdSel 17, slot 4 */ + { 36, 36, 37, 38, 39}, /* IdSel 18, slot 5 */ + { 40, 40, 41, 42, 43}, /* IdSel 19, slot 6 */ + { 44, 44, 45, 46, 47}, /* IdSel 20, slot 7 */ + { -1, -1, -1, -1, -1}, /* IdSel 22, none */ + /* The following are actually behind the PPB. */ + { -1, -1, -1, -1, -1}, /* IdSel 16 none */ + { 28, 28, 28, 28, 28}, /* IdSel 17 NCR lynx */ + { -1, -1, -1, -1, -1}, /* IdSel 18 none */ + { -1, -1, -1, -1, -1}, /* IdSel 19 none */ + { -1, -1, -1, -1, -1}, /* IdSel 20 none */ + { -1, -1, -1, -1, -1}, /* IdSel 21 none */ + { 48, 48, 49, 50, 51}, /* IdSel 22 slot 0 */ + { 52, 52, 53, 54, 55}, /* IdSel 23 slot 1 */ + { 56, 56, 57, 58, 59}, /* IdSel 24 slot 2 */ + { 60, 60, 61, 62, 63} /* IdSel 25 slot 3 */ + }; + const long min_idsel = 2, max_idsel = 20, irqs_per_slot = 5; + return COMMON_TABLE_LOOKUP; +} + +static u8 __init +lynx_swizzle(struct pci_dev *dev, u8 *pinp) +{ + int slot, pin = *pinp; + + if (dev->bus->number == 0) { + slot = PCI_SLOT(dev->devfn); + } + /* Check for the built-in bridge */ + else if (PCI_SLOT(dev->bus->self->devfn) == 3) { + slot = PCI_SLOT(dev->devfn) + 11; + } + else + { + /* Must be a card-based bridge. */ + do { + if (PCI_SLOT(dev->bus->self->devfn) == 3) { + slot = PCI_SLOT(dev->devfn) + 11; + break; + } + pin = pci_swizzle_interrupt_pin(dev, pin); + + /* Move up the chain of bridges. */ + dev = dev->bus->self; + /* Slot of the next bridge. */ + slot = PCI_SLOT(dev->devfn); + } while (dev->bus->self); + } + *pinp = pin; + return slot; +} + +#endif /* defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_LYNX) */ + +/***********************************************************************/ +/* GENERIC irq routines */ + +static inline void +sable_lynx_enable_irq(struct irq_data *d) +{ + unsigned long bit, mask; + + bit = sable_lynx_irq_swizzle->irq_to_mask[d->irq]; + spin_lock(&sable_lynx_irq_lock); + mask = sable_lynx_irq_swizzle->shadow_mask &= ~(1UL << bit); + sable_lynx_irq_swizzle->update_irq_hw(bit, mask); + spin_unlock(&sable_lynx_irq_lock); +#if 0 + printk("%s: mask 0x%lx bit 0x%lx irq 0x%x\n", + __func__, mask, bit, irq); +#endif +} + +static void +sable_lynx_disable_irq(struct irq_data *d) +{ + unsigned long bit, mask; + + bit = sable_lynx_irq_swizzle->irq_to_mask[d->irq]; + spin_lock(&sable_lynx_irq_lock); + mask = sable_lynx_irq_swizzle->shadow_mask |= 1UL << bit; + sable_lynx_irq_swizzle->update_irq_hw(bit, mask); + spin_unlock(&sable_lynx_irq_lock); +#if 0 + printk("%s: mask 0x%lx bit 0x%lx irq 0x%x\n", + __func__, mask, bit, irq); +#endif +} + +static void +sable_lynx_mask_and_ack_irq(struct irq_data *d) +{ + unsigned long bit, mask; + + bit = sable_lynx_irq_swizzle->irq_to_mask[d->irq]; + spin_lock(&sable_lynx_irq_lock); + mask = sable_lynx_irq_swizzle->shadow_mask |= 1UL << bit; + sable_lynx_irq_swizzle->update_irq_hw(bit, mask); + sable_lynx_irq_swizzle->ack_irq_hw(bit); + spin_unlock(&sable_lynx_irq_lock); +} + +static struct irq_chip sable_lynx_irq_type = { + .name = "SABLE/LYNX", + .irq_unmask = sable_lynx_enable_irq, + .irq_mask = sable_lynx_disable_irq, + .irq_mask_ack = sable_lynx_mask_and_ack_irq, +}; + +static void +sable_lynx_srm_device_interrupt(unsigned long vector) +{ + /* Note that the vector reported by the SRM PALcode corresponds + to the interrupt mask bits, but we have to manage via the + so-called legacy IRQs for many common devices. */ + + int bit, irq; + + bit = (vector - 0x800) >> 4; + irq = sable_lynx_irq_swizzle->mask_to_irq[bit]; +#if 0 + printk("%s: vector 0x%lx bit 0x%x irq 0x%x\n", + __func__, vector, bit, irq); +#endif + handle_irq(irq); +} + +static void __init +sable_lynx_init_irq(int nr_of_irqs) +{ + long i; + + for (i = 0; i < nr_of_irqs; ++i) { + irq_set_chip_and_handler(i, &sable_lynx_irq_type, + handle_level_irq); + irq_set_status_flags(i, IRQ_LEVEL); + } + + common_init_isa_dma(); +} + +static void __init +sable_lynx_init_pci(void) +{ + common_init_pci(); +} + +/*****************************************************************/ +/* + * The System Vectors + * + * In order that T2_HAE_ADDRESS should be a constant, we play + * these games with GAMMA_BIAS. + */ + +#if defined(CONFIG_ALPHA_GENERIC) || \ + (defined(CONFIG_ALPHA_SABLE) && !defined(CONFIG_ALPHA_GAMMA)) +#undef GAMMA_BIAS +#define GAMMA_BIAS 0 +struct alpha_machine_vector sable_mv __initmv = { + .vector_name = "Sable", + DO_EV4_MMU, + DO_DEFAULT_RTC, + DO_T2_IO, + .machine_check = t2_machine_check, + .max_isa_dma_address = ALPHA_SABLE_MAX_ISA_DMA_ADDRESS, + .min_io_address = EISA_DEFAULT_IO_BASE, + .min_mem_address = T2_DEFAULT_MEM_BASE, + + .nr_irqs = 40, + .device_interrupt = sable_lynx_srm_device_interrupt, + + .init_arch = t2_init_arch, + .init_irq = sable_init_irq, + .init_rtc = common_init_rtc, + .init_pci = sable_lynx_init_pci, + .kill_arch = t2_kill_arch, + .pci_map_irq = sable_map_irq, + .pci_swizzle = common_swizzle, + + .sys = { .t2 = { + .gamma_bias = 0 + } } +}; +ALIAS_MV(sable) +#endif /* GENERIC || (SABLE && !GAMMA) */ + +#if defined(CONFIG_ALPHA_GENERIC) || \ + (defined(CONFIG_ALPHA_SABLE) && defined(CONFIG_ALPHA_GAMMA)) +#undef GAMMA_BIAS +#define GAMMA_BIAS _GAMMA_BIAS +struct alpha_machine_vector sable_gamma_mv __initmv = { + .vector_name = "Sable-Gamma", + DO_EV5_MMU, + DO_DEFAULT_RTC, + DO_T2_IO, + .machine_check = t2_machine_check, + .max_isa_dma_address = ALPHA_SABLE_MAX_ISA_DMA_ADDRESS, + .min_io_address = EISA_DEFAULT_IO_BASE, + .min_mem_address = T2_DEFAULT_MEM_BASE, + + .nr_irqs = 40, + .device_interrupt = sable_lynx_srm_device_interrupt, + + .init_arch = t2_init_arch, + .init_irq = sable_init_irq, + .init_rtc = common_init_rtc, + .init_pci = sable_lynx_init_pci, + .kill_arch = t2_kill_arch, + .pci_map_irq = sable_map_irq, + .pci_swizzle = common_swizzle, + + .sys = { .t2 = { + .gamma_bias = _GAMMA_BIAS + } } +}; +ALIAS_MV(sable_gamma) +#endif /* GENERIC || (SABLE && GAMMA) */ + +#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_LYNX) +#undef GAMMA_BIAS +#define GAMMA_BIAS _GAMMA_BIAS +struct alpha_machine_vector lynx_mv __initmv = { + .vector_name = "Lynx", + DO_EV4_MMU, + DO_DEFAULT_RTC, + DO_T2_IO, + .machine_check = t2_machine_check, + .max_isa_dma_address = ALPHA_SABLE_MAX_ISA_DMA_ADDRESS, + .min_io_address = EISA_DEFAULT_IO_BASE, + .min_mem_address = T2_DEFAULT_MEM_BASE, + + .nr_irqs = 64, + .device_interrupt = sable_lynx_srm_device_interrupt, + + .init_arch = t2_init_arch, + .init_irq = lynx_init_irq, + .init_rtc = common_init_rtc, + .init_pci = sable_lynx_init_pci, + .kill_arch = t2_kill_arch, + .pci_map_irq = lynx_map_irq, + .pci_swizzle = lynx_swizzle, + + .sys = { .t2 = { + .gamma_bias = _GAMMA_BIAS + } } +}; +ALIAS_MV(lynx) +#endif /* GENERIC || LYNX */ diff --git a/arch/alpha/kernel/sys_sio.c b/arch/alpha/kernel/sys_sio.c new file mode 100644 index 00000000..febd24eb --- /dev/null +++ b/arch/alpha/kernel/sys_sio.c @@ -0,0 +1,461 @@ +/* + * linux/arch/alpha/kernel/sys_sio.c + * + * Copyright (C) 1995 David A Rusling + * Copyright (C) 1996 Jay A Estabrook + * Copyright (C) 1998, 1999 Richard Henderson + * + * Code for all boards that route the PCI interrupts through the SIO + * PCI/ISA bridge. This includes Noname (AXPpci33), Multia (UDB), + * Kenetics's Platform 2000, Avanti (AlphaStation), XL, and AlphaBook1. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/screen_info.h> + +#include <asm/compiler.h> +#include <asm/ptrace.h> +#include <asm/dma.h> +#include <asm/irq.h> +#include <asm/mmu_context.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/core_apecs.h> +#include <asm/core_lca.h> +#include <asm/tlbflush.h> + +#include "proto.h" +#include "irq_impl.h" +#include "pci_impl.h" +#include "machvec_impl.h" +#include "pc873xx.h" + +#if defined(ALPHA_RESTORE_SRM_SETUP) +/* Save LCA configuration data as the console had it set up. */ +struct +{ + unsigned int orig_route_tab; /* for SAVE/RESTORE */ +} saved_config __attribute((common)); +#endif + + +static void __init +sio_init_irq(void) +{ + if (alpha_using_srm) + alpha_mv.device_interrupt = srm_device_interrupt; + + init_i8259a_irqs(); + common_init_isa_dma(); +} + +static inline void __init +alphabook1_init_arch(void) +{ + /* The AlphaBook1 has LCD video fixed at 800x600, + 37 rows and 100 cols. */ + screen_info.orig_y = 37; + screen_info.orig_video_cols = 100; + screen_info.orig_video_lines = 37; + + lca_init_arch(); +} + + +/* + * sio_route_tab selects irq routing in PCI/ISA bridge so that: + * PIRQ0 -> irq 15 + * PIRQ1 -> irq 9 + * PIRQ2 -> irq 10 + * PIRQ3 -> irq 11 + * + * This probably ought to be configurable via MILO. For + * example, sound boards seem to like using IRQ 9. + * + * This is NOT how we should do it. PIRQ0-X should have + * their own IRQs, the way intel uses the IO-APIC IRQs. + */ + +static void __init +sio_pci_route(void) +{ + unsigned int orig_route_tab; + + /* First, ALWAYS read and print the original setting. */ + pci_bus_read_config_dword(pci_isa_hose->bus, PCI_DEVFN(7, 0), 0x60, + &orig_route_tab); + printk("%s: PIRQ original 0x%x new 0x%x\n", __func__, + orig_route_tab, alpha_mv.sys.sio.route_tab); + +#if defined(ALPHA_RESTORE_SRM_SETUP) + saved_config.orig_route_tab = orig_route_tab; +#endif + + /* Now override with desired setting. */ + pci_bus_write_config_dword(pci_isa_hose->bus, PCI_DEVFN(7, 0), 0x60, + alpha_mv.sys.sio.route_tab); +} + +static unsigned int __init +sio_collect_irq_levels(void) +{ + unsigned int level_bits = 0; + struct pci_dev *dev = NULL; + + /* Iterate through the devices, collecting IRQ levels. */ + for_each_pci_dev(dev) { + if ((dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) && + (dev->class >> 8 != PCI_CLASS_BRIDGE_PCMCIA)) + continue; + + if (dev->irq) + level_bits |= (1 << dev->irq); + } + return level_bits; +} + +static void __init +sio_fixup_irq_levels(unsigned int level_bits) +{ + unsigned int old_level_bits; + + /* + * Now, make all PCI interrupts level sensitive. Notice: + * these registers must be accessed byte-wise. inw()/outw() + * don't work. + * + * Make sure to turn off any level bits set for IRQs 9,10,11,15, + * so that the only bits getting set are for devices actually found. + * Note that we do preserve the remainder of the bits, which we hope + * will be set correctly by ARC/SRM. + * + * Note: we at least preserve any level-set bits on AlphaBook1 + */ + old_level_bits = inb(0x4d0) | (inb(0x4d1) << 8); + + level_bits |= (old_level_bits & 0x71ff); + + outb((level_bits >> 0) & 0xff, 0x4d0); + outb((level_bits >> 8) & 0xff, 0x4d1); +} + +static inline int __init +noname_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + /* + * The Noname board has 5 PCI slots with each of the 4 + * interrupt pins routed to different pins on the PCI/ISA + * bridge (PIRQ0-PIRQ3). The table below is based on + * information available at: + * + * http://ftp.digital.com/pub/DEC/axppci/ref_interrupts.txt + * + * I have no information on the Avanti interrupt routing, but + * the routing seems to be identical to the Noname except + * that the Avanti has an additional slot whose routing I'm + * unsure of. + * + * pirq_tab[0] is a fake entry to deal with old PCI boards + * that have the interrupt pin number hardwired to 0 (meaning + * that they use the default INTA line, if they are interrupt + * driven at all). + */ + static char irq_tab[][5] __initdata = { + /*INT A B C D */ + { 3, 3, 3, 3, 3}, /* idsel 6 (53c810) */ + {-1, -1, -1, -1, -1}, /* idsel 7 (SIO: PCI/ISA bridge) */ + { 2, 2, -1, -1, -1}, /* idsel 8 (Hack: slot closest ISA) */ + {-1, -1, -1, -1, -1}, /* idsel 9 (unused) */ + {-1, -1, -1, -1, -1}, /* idsel 10 (unused) */ + { 0, 0, 2, 1, 0}, /* idsel 11 KN25_PCI_SLOT0 */ + { 1, 1, 0, 2, 1}, /* idsel 12 KN25_PCI_SLOT1 */ + { 2, 2, 1, 0, 2}, /* idsel 13 KN25_PCI_SLOT2 */ + { 0, 0, 0, 0, 0}, /* idsel 14 AS255 TULIP */ + }; + const long min_idsel = 6, max_idsel = 14, irqs_per_slot = 5; + int irq = COMMON_TABLE_LOOKUP, tmp; + tmp = __kernel_extbl(alpha_mv.sys.sio.route_tab, irq); + return irq >= 0 ? tmp : -1; +} + +static inline int __init +p2k_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + static char irq_tab[][5] __initdata = { + /*INT A B C D */ + { 0, 0, -1, -1, -1}, /* idsel 6 (53c810) */ + {-1, -1, -1, -1, -1}, /* idsel 7 (SIO: PCI/ISA bridge) */ + { 1, 1, 2, 3, 0}, /* idsel 8 (slot A) */ + { 2, 2, 3, 0, 1}, /* idsel 9 (slot B) */ + {-1, -1, -1, -1, -1}, /* idsel 10 (unused) */ + {-1, -1, -1, -1, -1}, /* idsel 11 (unused) */ + { 3, 3, -1, -1, -1}, /* idsel 12 (CMD0646) */ + }; + const long min_idsel = 6, max_idsel = 12, irqs_per_slot = 5; + int irq = COMMON_TABLE_LOOKUP, tmp; + tmp = __kernel_extbl(alpha_mv.sys.sio.route_tab, irq); + return irq >= 0 ? tmp : -1; +} + +static inline void __init +noname_init_pci(void) +{ + common_init_pci(); + sio_pci_route(); + sio_fixup_irq_levels(sio_collect_irq_levels()); + + if (pc873xx_probe() == -1) { + printk(KERN_ERR "Probing for PC873xx Super IO chip failed.\n"); + } else { + printk(KERN_INFO "Found %s Super IO chip at 0x%x\n", + pc873xx_get_model(), pc873xx_get_base()); + + /* Enabling things in the Super IO chip doesn't actually + * configure and enable things, the legacy drivers still + * need to do the actual configuration and enabling. + * This only unblocks them. + */ + +#if !defined(CONFIG_ALPHA_AVANTI) + /* Don't bother on the Avanti family. + * None of them had on-board IDE. + */ + pc873xx_enable_ide(); +#endif + pc873xx_enable_epp19(); + } +} + +static inline void __init +alphabook1_init_pci(void) +{ + struct pci_dev *dev; + unsigned char orig, config; + + common_init_pci(); + sio_pci_route(); + + /* + * On the AlphaBook1, the PCMCIA chip (Cirrus 6729) + * is sensitive to PCI bus bursts, so we must DISABLE + * burst mode for the NCR 8xx SCSI... :-( + * + * Note that the NCR810 SCSI driver must preserve the + * setting of the bit in order for this to work. At the + * moment (2.0.29), ncr53c8xx.c does NOT do this, but + * 53c7,8xx.c DOES. + */ + + dev = NULL; + while ((dev = pci_get_device(PCI_VENDOR_ID_NCR, PCI_ANY_ID, dev))) { + if (dev->device == PCI_DEVICE_ID_NCR_53C810 + || dev->device == PCI_DEVICE_ID_NCR_53C815 + || dev->device == PCI_DEVICE_ID_NCR_53C820 + || dev->device == PCI_DEVICE_ID_NCR_53C825) { + unsigned long io_port; + unsigned char ctest4; + + io_port = dev->resource[0].start; + ctest4 = inb(io_port+0x21); + if (!(ctest4 & 0x80)) { + printk("AlphaBook1 NCR init: setting" + " burst disable\n"); + outb(ctest4 | 0x80, io_port+0x21); + } + } + } + + /* Do not set *ANY* level triggers for AlphaBook1. */ + sio_fixup_irq_levels(0); + + /* Make sure that register PR1 indicates 1Mb mem */ + outb(0x0f, 0x3ce); orig = inb(0x3cf); /* read PR5 */ + outb(0x0f, 0x3ce); outb(0x05, 0x3cf); /* unlock PR0-4 */ + outb(0x0b, 0x3ce); config = inb(0x3cf); /* read PR1 */ + if ((config & 0xc0) != 0xc0) { + printk("AlphaBook1 VGA init: setting 1Mb memory\n"); + config |= 0xc0; + outb(0x0b, 0x3ce); outb(config, 0x3cf); /* write PR1 */ + } + outb(0x0f, 0x3ce); outb(orig, 0x3cf); /* (re)lock PR0-4 */ +} + +void +sio_kill_arch(int mode) +{ +#if defined(ALPHA_RESTORE_SRM_SETUP) + /* Since we cannot read the PCI DMA Window CSRs, we + * cannot restore them here. + * + * However, we CAN read the PIRQ route register, so restore it + * now... + */ + pci_bus_write_config_dword(pci_isa_hose->bus, PCI_DEVFN(7, 0), 0x60, + saved_config.orig_route_tab); +#endif +} + + +/* + * The System Vectors + */ + +#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_BOOK1) +struct alpha_machine_vector alphabook1_mv __initmv = { + .vector_name = "AlphaBook1", + DO_EV4_MMU, + DO_DEFAULT_RTC, + DO_LCA_IO, + .machine_check = lca_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = DEFAULT_IO_BASE, + .min_mem_address = APECS_AND_LCA_DEFAULT_MEM_BASE, + + .nr_irqs = 16, + .device_interrupt = isa_device_interrupt, + + .init_arch = alphabook1_init_arch, + .init_irq = sio_init_irq, + .init_rtc = common_init_rtc, + .init_pci = alphabook1_init_pci, + .kill_arch = sio_kill_arch, + .pci_map_irq = noname_map_irq, + .pci_swizzle = common_swizzle, + + .sys = { .sio = { + /* NCR810 SCSI is 14, PCMCIA controller is 15. */ + .route_tab = 0x0e0f0a0a, + }} +}; +ALIAS_MV(alphabook1) +#endif + +#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_AVANTI) +struct alpha_machine_vector avanti_mv __initmv = { + .vector_name = "Avanti", + DO_EV4_MMU, + DO_DEFAULT_RTC, + DO_APECS_IO, + .machine_check = apecs_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = DEFAULT_IO_BASE, + .min_mem_address = APECS_AND_LCA_DEFAULT_MEM_BASE, + + .nr_irqs = 16, + .device_interrupt = isa_device_interrupt, + + .init_arch = apecs_init_arch, + .init_irq = sio_init_irq, + .init_rtc = common_init_rtc, + .init_pci = noname_init_pci, + .kill_arch = sio_kill_arch, + .pci_map_irq = noname_map_irq, + .pci_swizzle = common_swizzle, + + .sys = { .sio = { + .route_tab = 0x0b0a050f, /* leave 14 for IDE, 9 for SND */ + }} +}; +ALIAS_MV(avanti) +#endif + +#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_NONAME) +struct alpha_machine_vector noname_mv __initmv = { + .vector_name = "Noname", + DO_EV4_MMU, + DO_DEFAULT_RTC, + DO_LCA_IO, + .machine_check = lca_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = DEFAULT_IO_BASE, + .min_mem_address = APECS_AND_LCA_DEFAULT_MEM_BASE, + + .nr_irqs = 16, + .device_interrupt = srm_device_interrupt, + + .init_arch = lca_init_arch, + .init_irq = sio_init_irq, + .init_rtc = common_init_rtc, + .init_pci = noname_init_pci, + .kill_arch = sio_kill_arch, + .pci_map_irq = noname_map_irq, + .pci_swizzle = common_swizzle, + + .sys = { .sio = { + /* For UDB, the only available PCI slot must not map to IRQ 9, + since that's the builtin MSS sound chip. That PCI slot + will map to PIRQ1 (for INTA at least), so we give it IRQ 15 + instead. + + Unfortunately we have to do this for NONAME as well, since + they are co-indicated when the platform type "Noname" is + selected... :-( */ + + .route_tab = 0x0b0a0f0d, + }} +}; +ALIAS_MV(noname) +#endif + +#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_P2K) +struct alpha_machine_vector p2k_mv __initmv = { + .vector_name = "Platform2000", + DO_EV4_MMU, + DO_DEFAULT_RTC, + DO_LCA_IO, + .machine_check = lca_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = DEFAULT_IO_BASE, + .min_mem_address = APECS_AND_LCA_DEFAULT_MEM_BASE, + + .nr_irqs = 16, + .device_interrupt = srm_device_interrupt, + + .init_arch = lca_init_arch, + .init_irq = sio_init_irq, + .init_rtc = common_init_rtc, + .init_pci = noname_init_pci, + .kill_arch = sio_kill_arch, + .pci_map_irq = p2k_map_irq, + .pci_swizzle = common_swizzle, + + .sys = { .sio = { + .route_tab = 0x0b0a090f, + }} +}; +ALIAS_MV(p2k) +#endif + +#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_XL) +struct alpha_machine_vector xl_mv __initmv = { + .vector_name = "XL", + DO_EV4_MMU, + DO_DEFAULT_RTC, + DO_APECS_IO, + .machine_check = apecs_machine_check, + .max_isa_dma_address = ALPHA_XL_MAX_ISA_DMA_ADDRESS, + .min_io_address = DEFAULT_IO_BASE, + .min_mem_address = XL_DEFAULT_MEM_BASE, + + .nr_irqs = 16, + .device_interrupt = isa_device_interrupt, + + .init_arch = apecs_init_arch, + .init_irq = sio_init_irq, + .init_rtc = common_init_rtc, + .init_pci = noname_init_pci, + .kill_arch = sio_kill_arch, + .pci_map_irq = noname_map_irq, + .pci_swizzle = common_swizzle, + + .sys = { .sio = { + .route_tab = 0x0b0a090f, + }} +}; +ALIAS_MV(xl) +#endif diff --git a/arch/alpha/kernel/sys_sx164.c b/arch/alpha/kernel/sys_sx164.c new file mode 100644 index 00000000..d063b360 --- /dev/null +++ b/arch/alpha/kernel/sys_sx164.c @@ -0,0 +1,178 @@ +/* + * linux/arch/alpha/kernel/sys_sx164.c + * + * Copyright (C) 1995 David A Rusling + * Copyright (C) 1996 Jay A Estabrook + * Copyright (C) 1998, 1999, 2000 Richard Henderson + * + * Code supporting the SX164 (PCA56+PYXIS). + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/bitops.h> + +#include <asm/ptrace.h> +#include <asm/dma.h> +#include <asm/irq.h> +#include <asm/mmu_context.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/core_cia.h> +#include <asm/hwrpb.h> +#include <asm/tlbflush.h> +#include <asm/special_insns.h> + +#include "proto.h" +#include "irq_impl.h" +#include "pci_impl.h" +#include "machvec_impl.h" + + +static void __init +sx164_init_irq(void) +{ + outb(0, DMA1_RESET_REG); + outb(0, DMA2_RESET_REG); + outb(DMA_MODE_CASCADE, DMA2_MODE_REG); + outb(0, DMA2_MASK_REG); + + if (alpha_using_srm) + alpha_mv.device_interrupt = srm_device_interrupt; + + init_i8259a_irqs(); + + /* Not interested in the bogus interrupts (0,3,4,5,40-47), + NMI (1), or HALT (2). */ + if (alpha_using_srm) + init_srm_irqs(40, 0x3f0000); + else + init_pyxis_irqs(0xff00003f0000UL); + + setup_irq(16+6, &timer_cascade_irqaction); +} + +/* + * PCI Fixup configuration. + * + * Summary @ PYXIS_INT_REQ: + * Bit Meaning + * 0 RSVD + * 1 NMI + * 2 Halt/Reset switch + * 3 MBZ + * 4 RAZ + * 5 RAZ + * 6 Interval timer (RTC) + * 7 PCI-ISA Bridge + * 8 Interrupt Line A from slot 3 + * 9 Interrupt Line A from slot 2 + *10 Interrupt Line A from slot 1 + *11 Interrupt Line A from slot 0 + *12 Interrupt Line B from slot 3 + *13 Interrupt Line B from slot 2 + *14 Interrupt Line B from slot 1 + *15 Interrupt line B from slot 0 + *16 Interrupt Line C from slot 3 + *17 Interrupt Line C from slot 2 + *18 Interrupt Line C from slot 1 + *19 Interrupt Line C from slot 0 + *20 Interrupt Line D from slot 3 + *21 Interrupt Line D from slot 2 + *22 Interrupt Line D from slot 1 + *23 Interrupt Line D from slot 0 + * + * IdSel + * 5 32 bit PCI option slot 2 + * 6 64 bit PCI option slot 0 + * 7 64 bit PCI option slot 1 + * 8 Cypress I/O + * 9 32 bit PCI option slot 3 + */ + +static int __init +sx164_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + static char irq_tab[5][5] __initdata = { + /*INT INTA INTB INTC INTD */ + { 16+ 9, 16+ 9, 16+13, 16+17, 16+21}, /* IdSel 5 slot 2 J17 */ + { 16+11, 16+11, 16+15, 16+19, 16+23}, /* IdSel 6 slot 0 J19 */ + { 16+10, 16+10, 16+14, 16+18, 16+22}, /* IdSel 7 slot 1 J18 */ + { -1, -1, -1, -1, -1}, /* IdSel 8 SIO */ + { 16+ 8, 16+ 8, 16+12, 16+16, 16+20} /* IdSel 9 slot 3 J15 */ + }; + const long min_idsel = 5, max_idsel = 9, irqs_per_slot = 5; + return COMMON_TABLE_LOOKUP; +} + +static void __init +sx164_init_pci(void) +{ + cia_init_pci(); + SMC669_Init(0); +} + +static void __init +sx164_init_arch(void) +{ + /* + * OSF palcode v1.23 forgets to enable PCA56 Motion Video + * Instructions. Let's enable it. + * We have to check palcode revision because CSERVE interface + * is subject to change without notice. For example, it + * has been changed completely since v1.16 (found in MILO + * distribution). -ink + */ + struct percpu_struct *cpu = (struct percpu_struct*) + ((char*)hwrpb + hwrpb->processor_offset); + + if (amask(AMASK_MAX) != 0 + && alpha_using_srm + && (cpu->pal_revision & 0xffff) <= 0x117) { + __asm__ __volatile__( + "lda $16,8($31)\n" + "call_pal 9\n" /* Allow PALRES insns in kernel mode */ + ".long 0x64000118\n\n" /* hw_mfpr $0,icsr */ + "ldah $16,(1<<(19-16))($31)\n" + "or $0,$16,$0\n" /* set MVE bit */ + ".long 0x74000118\n" /* hw_mtpr $0,icsr */ + "lda $16,9($31)\n" + "call_pal 9" /* Disable PALRES insns */ + : : : "$0", "$16"); + printk("PCA56 MVI set enabled\n"); + } + + pyxis_init_arch(); +} + +/* + * The System Vector + */ + +struct alpha_machine_vector sx164_mv __initmv = { + .vector_name = "SX164", + DO_EV5_MMU, + DO_DEFAULT_RTC, + DO_PYXIS_IO, + .machine_check = cia_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = DEFAULT_IO_BASE, + .min_mem_address = DEFAULT_MEM_BASE, + .pci_dac_offset = PYXIS_DAC_OFFSET, + + .nr_irqs = 48, + .device_interrupt = pyxis_device_interrupt, + + .init_arch = sx164_init_arch, + .init_irq = sx164_init_irq, + .init_rtc = common_init_rtc, + .init_pci = sx164_init_pci, + .kill_arch = cia_kill_arch, + .pci_map_irq = sx164_map_irq, + .pci_swizzle = common_swizzle, +}; +ALIAS_MV(sx164) diff --git a/arch/alpha/kernel/sys_takara.c b/arch/alpha/kernel/sys_takara.c new file mode 100644 index 00000000..dd0f1eae --- /dev/null +++ b/arch/alpha/kernel/sys_takara.c @@ -0,0 +1,288 @@ +/* + * linux/arch/alpha/kernel/sys_takara.c + * + * Copyright (C) 1995 David A Rusling + * Copyright (C) 1996 Jay A Estabrook + * Copyright (C) 1998, 1999 Richard Henderson + * + * Code supporting the TAKARA. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/pci.h> +#include <linux/init.h> + +#include <asm/ptrace.h> +#include <asm/dma.h> +#include <asm/irq.h> +#include <asm/mmu_context.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/core_cia.h> +#include <asm/tlbflush.h> + +#include "proto.h" +#include "irq_impl.h" +#include "pci_impl.h" +#include "machvec_impl.h" +#include "pc873xx.h" + +/* Note mask bit is true for DISABLED irqs. */ +static unsigned long cached_irq_mask[2] = { -1, -1 }; + +static inline void +takara_update_irq_hw(unsigned long irq, unsigned long mask) +{ + int regaddr; + + mask = (irq >= 64 ? mask << 16 : mask >> ((irq - 16) & 0x30)); + regaddr = 0x510 + (((irq - 16) >> 2) & 0x0c); + outl(mask & 0xffff0000UL, regaddr); +} + +static inline void +takara_enable_irq(struct irq_data *d) +{ + unsigned int irq = d->irq; + unsigned long mask; + mask = (cached_irq_mask[irq >= 64] &= ~(1UL << (irq & 63))); + takara_update_irq_hw(irq, mask); +} + +static void +takara_disable_irq(struct irq_data *d) +{ + unsigned int irq = d->irq; + unsigned long mask; + mask = (cached_irq_mask[irq >= 64] |= 1UL << (irq & 63)); + takara_update_irq_hw(irq, mask); +} + +static struct irq_chip takara_irq_type = { + .name = "TAKARA", + .irq_unmask = takara_enable_irq, + .irq_mask = takara_disable_irq, + .irq_mask_ack = takara_disable_irq, +}; + +static void +takara_device_interrupt(unsigned long vector) +{ + unsigned intstatus; + + /* + * The PALcode will have passed us vectors 0x800 or 0x810, + * which are fairly arbitrary values and serve only to tell + * us whether an interrupt has come in on IRQ0 or IRQ1. If + * it's IRQ1 it's a PCI interrupt; if it's IRQ0, it's + * probably ISA, but PCI interrupts can come through IRQ0 + * as well if the interrupt controller isn't in accelerated + * mode. + * + * OTOH, the accelerator thing doesn't seem to be working + * overly well, so what we'll do instead is try directly + * examining the Master Interrupt Register to see if it's a + * PCI interrupt, and if _not_ then we'll pass it on to the + * ISA handler. + */ + + intstatus = inw(0x500) & 15; + if (intstatus) { + /* + * This is a PCI interrupt. Check each bit and + * despatch an interrupt if it's set. + */ + + if (intstatus & 8) handle_irq(16+3); + if (intstatus & 4) handle_irq(16+2); + if (intstatus & 2) handle_irq(16+1); + if (intstatus & 1) handle_irq(16+0); + } else { + isa_device_interrupt (vector); + } +} + +static void +takara_srm_device_interrupt(unsigned long vector) +{ + int irq = (vector - 0x800) >> 4; + handle_irq(irq); +} + +static void __init +takara_init_irq(void) +{ + long i; + + init_i8259a_irqs(); + + if (alpha_using_srm) { + alpha_mv.device_interrupt = takara_srm_device_interrupt; + } else { + unsigned int ctlreg = inl(0x500); + + /* Return to non-accelerated mode. */ + ctlreg &= ~0x8000; + outl(ctlreg, 0x500); + + /* Enable the PCI interrupt register. */ + ctlreg = 0x05107c00; + outl(ctlreg, 0x500); + } + + for (i = 16; i < 128; i += 16) + takara_update_irq_hw(i, -1); + + for (i = 16; i < 128; ++i) { + irq_set_chip_and_handler(i, &takara_irq_type, + handle_level_irq); + irq_set_status_flags(i, IRQ_LEVEL); + } + + common_init_isa_dma(); +} + + +/* + * The Takara has PCI devices 1, 2, and 3 configured to slots 20, + * 19, and 18 respectively, in the default configuration. They can + * also be jumpered to slots 8, 7, and 6 respectively, which is fun + * because the SIO ISA bridge can also be slot 7. However, the SIO + * doesn't explicitly generate PCI-type interrupts, so we can + * assign it whatever the hell IRQ we like and it doesn't matter. + */ + +static int __init +takara_map_irq_srm(const struct pci_dev *dev, u8 slot, u8 pin) +{ + static char irq_tab[15][5] __initdata = { + { 16+3, 16+3, 16+3, 16+3, 16+3}, /* slot 6 == device 3 */ + { 16+2, 16+2, 16+2, 16+2, 16+2}, /* slot 7 == device 2 */ + { 16+1, 16+1, 16+1, 16+1, 16+1}, /* slot 8 == device 1 */ + { -1, -1, -1, -1, -1}, /* slot 9 == nothing */ + { -1, -1, -1, -1, -1}, /* slot 10 == nothing */ + { -1, -1, -1, -1, -1}, /* slot 11 == nothing */ + /* These are behind the bridges. */ + { 12, 12, 13, 14, 15}, /* slot 12 == nothing */ + { 8, 8, 9, 19, 11}, /* slot 13 == nothing */ + { 4, 4, 5, 6, 7}, /* slot 14 == nothing */ + { 0, 0, 1, 2, 3}, /* slot 15 == nothing */ + { -1, -1, -1, -1, -1}, /* slot 16 == nothing */ + {64+ 0, 64+0, 64+1, 64+2, 64+3}, /* slot 17= device 4 */ + {48+ 0, 48+0, 48+1, 48+2, 48+3}, /* slot 18= device 3 */ + {32+ 0, 32+0, 32+1, 32+2, 32+3}, /* slot 19= device 2 */ + {16+ 0, 16+0, 16+1, 16+2, 16+3}, /* slot 20= device 1 */ + }; + const long min_idsel = 6, max_idsel = 20, irqs_per_slot = 5; + int irq = COMMON_TABLE_LOOKUP; + if (irq >= 0 && irq < 16) { + /* Guess that we are behind a bridge. */ + unsigned int busslot = PCI_SLOT(dev->bus->self->devfn); + irq += irq_tab[busslot-min_idsel][0]; + } + return irq; +} + +static int __init +takara_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + static char irq_tab[15][5] __initdata = { + { 16+3, 16+3, 16+3, 16+3, 16+3}, /* slot 6 == device 3 */ + { 16+2, 16+2, 16+2, 16+2, 16+2}, /* slot 7 == device 2 */ + { 16+1, 16+1, 16+1, 16+1, 16+1}, /* slot 8 == device 1 */ + { -1, -1, -1, -1, -1}, /* slot 9 == nothing */ + { -1, -1, -1, -1, -1}, /* slot 10 == nothing */ + { -1, -1, -1, -1, -1}, /* slot 11 == nothing */ + { -1, -1, -1, -1, -1}, /* slot 12 == nothing */ + { -1, -1, -1, -1, -1}, /* slot 13 == nothing */ + { -1, -1, -1, -1, -1}, /* slot 14 == nothing */ + { -1, -1, -1, -1, -1}, /* slot 15 == nothing */ + { -1, -1, -1, -1, -1}, /* slot 16 == nothing */ + { -1, -1, -1, -1, -1}, /* slot 17 == nothing */ + { 16+3, 16+3, 16+3, 16+3, 16+3}, /* slot 18 == device 3 */ + { 16+2, 16+2, 16+2, 16+2, 16+2}, /* slot 19 == device 2 */ + { 16+1, 16+1, 16+1, 16+1, 16+1}, /* slot 20 == device 1 */ + }; + const long min_idsel = 6, max_idsel = 20, irqs_per_slot = 5; + return COMMON_TABLE_LOOKUP; +} + +static u8 __init +takara_swizzle(struct pci_dev *dev, u8 *pinp) +{ + int slot = PCI_SLOT(dev->devfn); + int pin = *pinp; + unsigned int ctlreg = inl(0x500); + unsigned int busslot; + + if (!dev->bus->self) + return slot; + + busslot = PCI_SLOT(dev->bus->self->devfn); + /* Check for built-in bridges. */ + if (dev->bus->number != 0 + && busslot > 16 + && ((1<<(36-busslot)) & ctlreg)) { + if (pin == 1) + pin += (20 - busslot); + else { + printk(KERN_WARNING "takara_swizzle: can only " + "handle cards with INTA IRQ pin.\n"); + } + } else { + /* Must be a card-based bridge. */ + printk(KERN_WARNING "takara_swizzle: cannot handle " + "card-bridge behind builtin bridge yet.\n"); + } + + *pinp = pin; + return slot; +} + +static void __init +takara_init_pci(void) +{ + if (alpha_using_srm) + alpha_mv.pci_map_irq = takara_map_irq_srm; + + cia_init_pci(); + + if (pc873xx_probe() == -1) { + printk(KERN_ERR "Probing for PC873xx Super IO chip failed.\n"); + } else { + printk(KERN_INFO "Found %s Super IO chip at 0x%x\n", + pc873xx_get_model(), pc873xx_get_base()); + pc873xx_enable_ide(); + } +} + + +/* + * The System Vector + */ + +struct alpha_machine_vector takara_mv __initmv = { + .vector_name = "Takara", + DO_EV5_MMU, + DO_DEFAULT_RTC, + DO_CIA_IO, + .machine_check = cia_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = DEFAULT_IO_BASE, + .min_mem_address = CIA_DEFAULT_MEM_BASE, + + .nr_irqs = 128, + .device_interrupt = takara_device_interrupt, + + .init_arch = cia_init_arch, + .init_irq = takara_init_irq, + .init_rtc = common_init_rtc, + .init_pci = takara_init_pci, + .kill_arch = cia_kill_arch, + .pci_map_irq = takara_map_irq, + .pci_swizzle = takara_swizzle, +}; +ALIAS_MV(takara) diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c new file mode 100644 index 00000000..2533db28 --- /dev/null +++ b/arch/alpha/kernel/sys_titan.c @@ -0,0 +1,419 @@ +/* + * linux/arch/alpha/kernel/sys_titan.c + * + * Copyright (C) 1995 David A Rusling + * Copyright (C) 1996, 1999 Jay A Estabrook + * Copyright (C) 1998, 1999 Richard Henderson + * Copyright (C) 1999, 2000 Jeff Wiedemeier + * + * Code supporting TITAN systems (EV6+TITAN), currently: + * Privateer + * Falcon + * Granite + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/bitops.h> + +#include <asm/ptrace.h> +#include <asm/dma.h> +#include <asm/irq.h> +#include <asm/mmu_context.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/core_titan.h> +#include <asm/hwrpb.h> +#include <asm/tlbflush.h> + +#include "proto.h" +#include "irq_impl.h" +#include "pci_impl.h" +#include "machvec_impl.h" +#include "err_impl.h" + + +/* + * Titan generic + */ + +/* + * Titan supports up to 4 CPUs + */ +static unsigned long titan_cpu_irq_affinity[4] = { ~0UL, ~0UL, ~0UL, ~0UL }; + +/* + * Mask is set (1) if enabled + */ +static unsigned long titan_cached_irq_mask; + +/* + * Need SMP-safe access to interrupt CSRs + */ +DEFINE_SPINLOCK(titan_irq_lock); + +static void +titan_update_irq_hw(unsigned long mask) +{ + register titan_cchip *cchip = TITAN_cchip; + unsigned long isa_enable = 1UL << 55; + register int bcpu = boot_cpuid; + +#ifdef CONFIG_SMP + cpumask_t cpm; + volatile unsigned long *dim0, *dim1, *dim2, *dim3; + unsigned long mask0, mask1, mask2, mask3, dummy; + + cpumask_copy(&cpm, cpu_present_mask); + mask &= ~isa_enable; + mask0 = mask & titan_cpu_irq_affinity[0]; + mask1 = mask & titan_cpu_irq_affinity[1]; + mask2 = mask & titan_cpu_irq_affinity[2]; + mask3 = mask & titan_cpu_irq_affinity[3]; + + if (bcpu == 0) mask0 |= isa_enable; + else if (bcpu == 1) mask1 |= isa_enable; + else if (bcpu == 2) mask2 |= isa_enable; + else mask3 |= isa_enable; + + dim0 = &cchip->dim0.csr; + dim1 = &cchip->dim1.csr; + dim2 = &cchip->dim2.csr; + dim3 = &cchip->dim3.csr; + if (!cpumask_test_cpu(0, &cpm)) dim0 = &dummy; + if (!cpumask_test_cpu(1, &cpm)) dim1 = &dummy; + if (!cpumask_test_cpu(2, &cpm)) dim2 = &dummy; + if (!cpumask_test_cpu(3, &cpm)) dim3 = &dummy; + + *dim0 = mask0; + *dim1 = mask1; + *dim2 = mask2; + *dim3 = mask3; + mb(); + *dim0; + *dim1; + *dim2; + *dim3; +#else + volatile unsigned long *dimB; + dimB = &cchip->dim0.csr; + if (bcpu == 1) dimB = &cchip->dim1.csr; + else if (bcpu == 2) dimB = &cchip->dim2.csr; + else if (bcpu == 3) dimB = &cchip->dim3.csr; + + *dimB = mask | isa_enable; + mb(); + *dimB; +#endif +} + +static inline void +titan_enable_irq(struct irq_data *d) +{ + unsigned int irq = d->irq; + spin_lock(&titan_irq_lock); + titan_cached_irq_mask |= 1UL << (irq - 16); + titan_update_irq_hw(titan_cached_irq_mask); + spin_unlock(&titan_irq_lock); +} + +static inline void +titan_disable_irq(struct irq_data *d) +{ + unsigned int irq = d->irq; + spin_lock(&titan_irq_lock); + titan_cached_irq_mask &= ~(1UL << (irq - 16)); + titan_update_irq_hw(titan_cached_irq_mask); + spin_unlock(&titan_irq_lock); +} + +static void +titan_cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity) +{ + int cpu; + + for (cpu = 0; cpu < 4; cpu++) { + if (cpumask_test_cpu(cpu, &affinity)) + titan_cpu_irq_affinity[cpu] |= 1UL << irq; + else + titan_cpu_irq_affinity[cpu] &= ~(1UL << irq); + } + +} + +static int +titan_set_irq_affinity(struct irq_data *d, const struct cpumask *affinity, + bool force) +{ + unsigned int irq = d->irq; + spin_lock(&titan_irq_lock); + titan_cpu_set_irq_affinity(irq - 16, *affinity); + titan_update_irq_hw(titan_cached_irq_mask); + spin_unlock(&titan_irq_lock); + + return 0; +} + +static void +titan_device_interrupt(unsigned long vector) +{ + printk("titan_device_interrupt: NOT IMPLEMENTED YET!!\n"); +} + +static void +titan_srm_device_interrupt(unsigned long vector) +{ + int irq; + + irq = (vector - 0x800) >> 4; + handle_irq(irq); +} + + +static void __init +init_titan_irqs(struct irq_chip * ops, int imin, int imax) +{ + long i; + for (i = imin; i <= imax; ++i) { + irq_set_chip_and_handler(i, ops, handle_level_irq); + irq_set_status_flags(i, IRQ_LEVEL); + } +} + +static struct irq_chip titan_irq_type = { + .name = "TITAN", + .irq_unmask = titan_enable_irq, + .irq_mask = titan_disable_irq, + .irq_mask_ack = titan_disable_irq, + .irq_set_affinity = titan_set_irq_affinity, +}; + +static irqreturn_t +titan_intr_nop(int irq, void *dev_id) +{ + /* + * This is a NOP interrupt handler for the purposes of + * event counting -- just return. + */ + return IRQ_HANDLED; +} + +static void __init +titan_init_irq(void) +{ + if (alpha_using_srm && !alpha_mv.device_interrupt) + alpha_mv.device_interrupt = titan_srm_device_interrupt; + if (!alpha_mv.device_interrupt) + alpha_mv.device_interrupt = titan_device_interrupt; + + titan_update_irq_hw(0); + + init_titan_irqs(&titan_irq_type, 16, 63 + 16); +} + +static void __init +titan_legacy_init_irq(void) +{ + /* init the legacy dma controller */ + outb(0, DMA1_RESET_REG); + outb(0, DMA2_RESET_REG); + outb(DMA_MODE_CASCADE, DMA2_MODE_REG); + outb(0, DMA2_MASK_REG); + + /* init the legacy irq controller */ + init_i8259a_irqs(); + + /* init the titan irqs */ + titan_init_irq(); +} + +void +titan_dispatch_irqs(u64 mask) +{ + unsigned long vector; + + /* + * Mask down to those interrupts which are enable on this processor + */ + mask &= titan_cpu_irq_affinity[smp_processor_id()]; + + /* + * Dispatch all requested interrupts + */ + while (mask) { + /* convert to SRM vector... priority is <63> -> <0> */ + vector = 63 - __kernel_ctlz(mask); + mask &= ~(1UL << vector); /* clear it out */ + vector = 0x900 + (vector << 4); /* convert to SRM vector */ + + /* dispatch it */ + alpha_mv.device_interrupt(vector); + } +} + + +/* + * Titan Family + */ +static void __init +titan_request_irq(unsigned int irq, irq_handler_t handler, + unsigned long irqflags, const char *devname, + void *dev_id) +{ + int err; + err = request_irq(irq, handler, irqflags, devname, dev_id); + if (err) { + printk("titan_request_irq for IRQ %d returned %d; ignoring\n", + irq, err); + } +} + +static void __init +titan_late_init(void) +{ + /* + * Enable the system error interrupts. These interrupts are + * all reported to the kernel as machine checks, so the handler + * is a nop so it can be called to count the individual events. + */ + titan_request_irq(63+16, titan_intr_nop, IRQF_DISABLED, + "CChip Error", NULL); + titan_request_irq(62+16, titan_intr_nop, IRQF_DISABLED, + "PChip 0 H_Error", NULL); + titan_request_irq(61+16, titan_intr_nop, IRQF_DISABLED, + "PChip 1 H_Error", NULL); + titan_request_irq(60+16, titan_intr_nop, IRQF_DISABLED, + "PChip 0 C_Error", NULL); + titan_request_irq(59+16, titan_intr_nop, IRQF_DISABLED, + "PChip 1 C_Error", NULL); + + /* + * Register our error handlers. + */ + titan_register_error_handlers(); + + /* + * Check if the console left us any error logs. + */ + cdl_check_console_data_log(); + +} + +static int __devinit +titan_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + u8 intline; + int irq; + + /* Get the current intline. */ + pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &intline); + irq = intline; + + /* Is it explicitly routed through ISA? */ + if ((irq & 0xF0) == 0xE0) + return irq; + + /* Offset by 16 to make room for ISA interrupts 0 - 15. */ + return irq + 16; +} + +static void __init +titan_init_pci(void) +{ + /* + * This isn't really the right place, but there's some init + * that needs to be done after everything is basically up. + */ + titan_late_init(); + + /* Indicate that we trust the console to configure things properly */ + pci_set_flags(PCI_PROBE_ONLY); + common_init_pci(); + SMC669_Init(0); + locate_and_init_vga(NULL); +} + + +/* + * Privateer + */ +static void __init +privateer_init_pci(void) +{ + /* + * Hook a couple of extra err interrupts that the + * common titan code won't. + */ + titan_request_irq(53+16, titan_intr_nop, IRQF_DISABLED, + "NMI", NULL); + titan_request_irq(50+16, titan_intr_nop, IRQF_DISABLED, + "Temperature Warning", NULL); + + /* + * Finish with the common version. + */ + return titan_init_pci(); +} + + +/* + * The System Vectors. + */ +struct alpha_machine_vector titan_mv __initmv = { + .vector_name = "TITAN", + DO_EV6_MMU, + DO_DEFAULT_RTC, + DO_TITAN_IO, + .machine_check = titan_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = DEFAULT_IO_BASE, + .min_mem_address = DEFAULT_MEM_BASE, + .pci_dac_offset = TITAN_DAC_OFFSET, + + .nr_irqs = 80, /* 64 + 16 */ + /* device_interrupt will be filled in by titan_init_irq */ + + .agp_info = titan_agp_info, + + .init_arch = titan_init_arch, + .init_irq = titan_legacy_init_irq, + .init_rtc = common_init_rtc, + .init_pci = titan_init_pci, + + .kill_arch = titan_kill_arch, + .pci_map_irq = titan_map_irq, + .pci_swizzle = common_swizzle, +}; +ALIAS_MV(titan) + +struct alpha_machine_vector privateer_mv __initmv = { + .vector_name = "PRIVATEER", + DO_EV6_MMU, + DO_DEFAULT_RTC, + DO_TITAN_IO, + .machine_check = privateer_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = DEFAULT_IO_BASE, + .min_mem_address = DEFAULT_MEM_BASE, + .pci_dac_offset = TITAN_DAC_OFFSET, + + .nr_irqs = 80, /* 64 + 16 */ + /* device_interrupt will be filled in by titan_init_irq */ + + .agp_info = titan_agp_info, + + .init_arch = titan_init_arch, + .init_irq = titan_legacy_init_irq, + .init_rtc = common_init_rtc, + .init_pci = privateer_init_pci, + + .kill_arch = titan_kill_arch, + .pci_map_irq = titan_map_irq, + .pci_swizzle = common_swizzle, +}; +/* No alpha_mv alias for privateer since we compile it + in unconditionally with titan; setup_arch knows how to cope. */ diff --git a/arch/alpha/kernel/sys_wildfire.c b/arch/alpha/kernel/sys_wildfire.c new file mode 100644 index 00000000..ee187488 --- /dev/null +++ b/arch/alpha/kernel/sys_wildfire.c @@ -0,0 +1,349 @@ +/* + * linux/arch/alpha/kernel/sys_wildfire.c + * + * Wildfire support. + * + * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/bitops.h> + +#include <asm/ptrace.h> +#include <asm/dma.h> +#include <asm/irq.h> +#include <asm/mmu_context.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/core_wildfire.h> +#include <asm/hwrpb.h> +#include <asm/tlbflush.h> + +#include "proto.h" +#include "irq_impl.h" +#include "pci_impl.h" +#include "machvec_impl.h" + +static unsigned long cached_irq_mask[WILDFIRE_NR_IRQS/(sizeof(long)*8)]; + +DEFINE_SPINLOCK(wildfire_irq_lock); + +static int doing_init_irq_hw = 0; + +static void +wildfire_update_irq_hw(unsigned int irq) +{ + int qbbno = (irq >> 8) & (WILDFIRE_MAX_QBB - 1); + int pcano = (irq >> 6) & (WILDFIRE_PCA_PER_QBB - 1); + wildfire_pca *pca; + volatile unsigned long * enable0; + + if (!WILDFIRE_PCA_EXISTS(qbbno, pcano)) { + if (!doing_init_irq_hw) { + printk(KERN_ERR "wildfire_update_irq_hw:" + " got irq %d for non-existent PCA %d" + " on QBB %d.\n", + irq, pcano, qbbno); + } + return; + } + + pca = WILDFIRE_pca(qbbno, pcano); + enable0 = (unsigned long *) &pca->pca_int[0].enable; /* ??? */ + + *enable0 = cached_irq_mask[qbbno * WILDFIRE_PCA_PER_QBB + pcano]; + mb(); + *enable0; +} + +static void __init +wildfire_init_irq_hw(void) +{ +#if 0 + register wildfire_pca * pca = WILDFIRE_pca(0, 0); + volatile unsigned long * enable0, * enable1, * enable2, *enable3; + volatile unsigned long * target0, * target1, * target2, *target3; + + enable0 = (unsigned long *) &pca->pca_int[0].enable; + enable1 = (unsigned long *) &pca->pca_int[1].enable; + enable2 = (unsigned long *) &pca->pca_int[2].enable; + enable3 = (unsigned long *) &pca->pca_int[3].enable; + + target0 = (unsigned long *) &pca->pca_int[0].target; + target1 = (unsigned long *) &pca->pca_int[1].target; + target2 = (unsigned long *) &pca->pca_int[2].target; + target3 = (unsigned long *) &pca->pca_int[3].target; + + *enable0 = *enable1 = *enable2 = *enable3 = 0; + + *target0 = (1UL<<8) | WILDFIRE_QBB(0); + *target1 = *target2 = *target3 = 0; + + mb(); + + *enable0; *enable1; *enable2; *enable3; + *target0; *target1; *target2; *target3; + +#else + int i; + + doing_init_irq_hw = 1; + + /* Need to update only once for every possible PCA. */ + for (i = 0; i < WILDFIRE_NR_IRQS; i+=WILDFIRE_IRQ_PER_PCA) + wildfire_update_irq_hw(i); + + doing_init_irq_hw = 0; +#endif +} + +static void +wildfire_enable_irq(struct irq_data *d) +{ + unsigned int irq = d->irq; + + if (irq < 16) + i8259a_enable_irq(d); + + spin_lock(&wildfire_irq_lock); + set_bit(irq, &cached_irq_mask); + wildfire_update_irq_hw(irq); + spin_unlock(&wildfire_irq_lock); +} + +static void +wildfire_disable_irq(struct irq_data *d) +{ + unsigned int irq = d->irq; + + if (irq < 16) + i8259a_disable_irq(d); + + spin_lock(&wildfire_irq_lock); + clear_bit(irq, &cached_irq_mask); + wildfire_update_irq_hw(irq); + spin_unlock(&wildfire_irq_lock); +} + +static void +wildfire_mask_and_ack_irq(struct irq_data *d) +{ + unsigned int irq = d->irq; + + if (irq < 16) + i8259a_mask_and_ack_irq(d); + + spin_lock(&wildfire_irq_lock); + clear_bit(irq, &cached_irq_mask); + wildfire_update_irq_hw(irq); + spin_unlock(&wildfire_irq_lock); +} + +static struct irq_chip wildfire_irq_type = { + .name = "WILDFIRE", + .irq_unmask = wildfire_enable_irq, + .irq_mask = wildfire_disable_irq, + .irq_mask_ack = wildfire_mask_and_ack_irq, +}; + +static void __init +wildfire_init_irq_per_pca(int qbbno, int pcano) +{ + int i, irq_bias; + static struct irqaction isa_enable = { + .handler = no_action, + .name = "isa_enable", + }; + + irq_bias = qbbno * (WILDFIRE_PCA_PER_QBB * WILDFIRE_IRQ_PER_PCA) + + pcano * WILDFIRE_IRQ_PER_PCA; + +#if 0 + unsigned long io_bias; + + /* Only need the following for first PCI bus per PCA. */ + io_bias = WILDFIRE_IO(qbbno, pcano<<1) - WILDFIRE_IO_BIAS; + + outb(0, DMA1_RESET_REG + io_bias); + outb(0, DMA2_RESET_REG + io_bias); + outb(DMA_MODE_CASCADE, DMA2_MODE_REG + io_bias); + outb(0, DMA2_MASK_REG + io_bias); +#endif + +#if 0 + /* ??? Not sure how to do this, yet... */ + init_i8259a_irqs(); /* ??? */ +#endif + + for (i = 0; i < 16; ++i) { + if (i == 2) + continue; + irq_set_chip_and_handler(i + irq_bias, &wildfire_irq_type, + handle_level_irq); + irq_set_status_flags(i + irq_bias, IRQ_LEVEL); + } + + irq_set_chip_and_handler(36 + irq_bias, &wildfire_irq_type, + handle_level_irq); + irq_set_status_flags(36 + irq_bias, IRQ_LEVEL); + for (i = 40; i < 64; ++i) { + irq_set_chip_and_handler(i + irq_bias, &wildfire_irq_type, + handle_level_irq); + irq_set_status_flags(i + irq_bias, IRQ_LEVEL); + } + + setup_irq(32+irq_bias, &isa_enable); +} + +static void __init +wildfire_init_irq(void) +{ + int qbbno, pcano; + +#if 1 + wildfire_init_irq_hw(); + init_i8259a_irqs(); +#endif + + for (qbbno = 0; qbbno < WILDFIRE_MAX_QBB; qbbno++) { + if (WILDFIRE_QBB_EXISTS(qbbno)) { + for (pcano = 0; pcano < WILDFIRE_PCA_PER_QBB; pcano++) { + if (WILDFIRE_PCA_EXISTS(qbbno, pcano)) { + wildfire_init_irq_per_pca(qbbno, pcano); + } + } + } + } +} + +static void +wildfire_device_interrupt(unsigned long vector) +{ + int irq; + + irq = (vector - 0x800) >> 4; + + /* + * bits 10-8: source QBB ID + * bits 7-6: PCA + * bits 5-0: irq in PCA + */ + + handle_irq(irq); + return; +} + +/* + * PCI Fixup configuration. + * + * Summary per PCA (2 PCI or HIPPI buses): + * + * Bit Meaning + * 0-15 ISA + * + *32 ISA summary + *33 SMI + *34 NMI + *36 builtin QLogic SCSI (or slot 0 if no IO module) + *40 Interrupt Line A from slot 2 PCI0 + *41 Interrupt Line B from slot 2 PCI0 + *42 Interrupt Line C from slot 2 PCI0 + *43 Interrupt Line D from slot 2 PCI0 + *44 Interrupt Line A from slot 3 PCI0 + *45 Interrupt Line B from slot 3 PCI0 + *46 Interrupt Line C from slot 3 PCI0 + *47 Interrupt Line D from slot 3 PCI0 + * + *48 Interrupt Line A from slot 4 PCI1 + *49 Interrupt Line B from slot 4 PCI1 + *50 Interrupt Line C from slot 4 PCI1 + *51 Interrupt Line D from slot 4 PCI1 + *52 Interrupt Line A from slot 5 PCI1 + *53 Interrupt Line B from slot 5 PCI1 + *54 Interrupt Line C from slot 5 PCI1 + *55 Interrupt Line D from slot 5 PCI1 + *56 Interrupt Line A from slot 6 PCI1 + *57 Interrupt Line B from slot 6 PCI1 + *58 Interrupt Line C from slot 6 PCI1 + *50 Interrupt Line D from slot 6 PCI1 + *60 Interrupt Line A from slot 7 PCI1 + *61 Interrupt Line B from slot 7 PCI1 + *62 Interrupt Line C from slot 7 PCI1 + *63 Interrupt Line D from slot 7 PCI1 + * + * + * IdSel + * 0 Cypress Bridge I/O (ISA summary interrupt) + * 1 64 bit PCI 0 option slot 1 (SCSI QLogic builtin) + * 2 64 bit PCI 0 option slot 2 + * 3 64 bit PCI 0 option slot 3 + * 4 64 bit PCI 1 option slot 4 + * 5 64 bit PCI 1 option slot 5 + * 6 64 bit PCI 1 option slot 6 + * 7 64 bit PCI 1 option slot 7 + */ + +static int __init +wildfire_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + static char irq_tab[8][5] __initdata = { + /*INT INTA INTB INTC INTD */ + { -1, -1, -1, -1, -1}, /* IdSel 0 ISA Bridge */ + { 36, 36, 36+1, 36+2, 36+3}, /* IdSel 1 SCSI builtin */ + { 40, 40, 40+1, 40+2, 40+3}, /* IdSel 2 PCI 0 slot 2 */ + { 44, 44, 44+1, 44+2, 44+3}, /* IdSel 3 PCI 0 slot 3 */ + { 48, 48, 48+1, 48+2, 48+3}, /* IdSel 4 PCI 1 slot 4 */ + { 52, 52, 52+1, 52+2, 52+3}, /* IdSel 5 PCI 1 slot 5 */ + { 56, 56, 56+1, 56+2, 56+3}, /* IdSel 6 PCI 1 slot 6 */ + { 60, 60, 60+1, 60+2, 60+3}, /* IdSel 7 PCI 1 slot 7 */ + }; + long min_idsel = 0, max_idsel = 7, irqs_per_slot = 5; + + struct pci_controller *hose = dev->sysdata; + int irq = COMMON_TABLE_LOOKUP; + + if (irq > 0) { + int qbbno = hose->index >> 3; + int pcano = (hose->index >> 1) & 3; + irq += (qbbno << 8) + (pcano << 6); + } + return irq; +} + + +/* + * The System Vectors + */ + +struct alpha_machine_vector wildfire_mv __initmv = { + .vector_name = "WILDFIRE", + DO_EV6_MMU, + DO_DEFAULT_RTC, + DO_WILDFIRE_IO, + .machine_check = wildfire_machine_check, + .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, + .min_io_address = DEFAULT_IO_BASE, + .min_mem_address = DEFAULT_MEM_BASE, + + .nr_irqs = WILDFIRE_NR_IRQS, + .device_interrupt = wildfire_device_interrupt, + + .init_arch = wildfire_init_arch, + .init_irq = wildfire_init_irq, + .init_rtc = common_init_rtc, + .init_pci = common_init_pci, + .kill_arch = wildfire_kill_arch, + .pci_map_irq = wildfire_map_irq, + .pci_swizzle = common_swizzle, + + .pa_to_nid = wildfire_pa_to_nid, + .cpuid_to_nid = wildfire_cpuid_to_nid, + .node_mem_start = wildfire_node_mem_start, + .node_mem_size = wildfire_node_mem_size, +}; +ALIAS_MV(wildfire) diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S new file mode 100644 index 00000000..e534e1c5 --- /dev/null +++ b/arch/alpha/kernel/systbls.S @@ -0,0 +1,532 @@ +/* + * arch/alpha/kernel/systbls.S + * + * The system call table. + */ + +#include <asm/unistd.h> + + .data + .align 3 + .globl sys_call_table +sys_call_table: + .quad alpha_ni_syscall /* 0 */ + .quad sys_exit + .quad sys_fork + .quad sys_read + .quad sys_write + .quad alpha_ni_syscall /* 5 */ + .quad sys_close + .quad sys_osf_wait4 + .quad alpha_ni_syscall + .quad sys_link + .quad sys_unlink /* 10 */ + .quad alpha_ni_syscall + .quad sys_chdir + .quad sys_fchdir + .quad sys_mknod + .quad sys_chmod /* 15 */ + .quad sys_chown + .quad sys_osf_brk + .quad alpha_ni_syscall + .quad sys_lseek + .quad sys_getxpid /* 20 */ + .quad sys_osf_mount + .quad sys_umount + .quad sys_setuid + .quad sys_getxuid + .quad alpha_ni_syscall /* 25 */ + .quad sys_ptrace + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 30 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad sys_access + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 35 */ + .quad sys_sync + .quad sys_kill + .quad alpha_ni_syscall + .quad sys_setpgid + .quad alpha_ni_syscall /* 40 */ + .quad sys_dup + .quad sys_alpha_pipe + .quad sys_osf_set_program_attributes + .quad alpha_ni_syscall + .quad sys_open /* 45 */ + .quad alpha_ni_syscall + .quad sys_getxgid + .quad sys_osf_sigprocmask + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 50 */ + .quad sys_acct + .quad sys_sigpending + .quad alpha_ni_syscall + .quad sys_ioctl + .quad alpha_ni_syscall /* 55 */ + .quad alpha_ni_syscall + .quad sys_symlink + .quad sys_readlink + .quad sys_execve + .quad sys_umask /* 60 */ + .quad sys_chroot + .quad alpha_ni_syscall + .quad sys_getpgrp + .quad sys_getpagesize + .quad alpha_ni_syscall /* 65 */ + .quad sys_vfork + .quad sys_newstat + .quad sys_newlstat + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 70 */ + .quad sys_osf_mmap + .quad alpha_ni_syscall + .quad sys_munmap + .quad sys_mprotect + .quad sys_madvise /* 75 */ + .quad sys_vhangup + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad sys_getgroups + /* map BSD's setpgrp to sys_setpgid for binary compatibility: */ + .quad sys_setgroups /* 80 */ + .quad alpha_ni_syscall + .quad sys_setpgid + .quad sys_osf_setitimer + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 85 */ + .quad sys_osf_getitimer + .quad sys_gethostname + .quad sys_sethostname + .quad sys_getdtablesize + .quad sys_dup2 /* 90 */ + .quad sys_newfstat + .quad sys_fcntl + .quad sys_osf_select + .quad sys_poll + .quad sys_fsync /* 95 */ + .quad sys_setpriority + .quad sys_socket + .quad sys_connect + .quad sys_accept + .quad osf_getpriority /* 100 */ + .quad sys_send + .quad sys_recv + .quad sys_sigreturn + .quad sys_bind + .quad sys_setsockopt /* 105 */ + .quad sys_listen + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 110 */ + .quad sys_sigsuspend + .quad sys_osf_sigstack + .quad sys_recvmsg + .quad sys_sendmsg + .quad alpha_ni_syscall /* 115 */ + .quad sys_osf_gettimeofday + .quad sys_osf_getrusage + .quad sys_getsockopt + .quad alpha_ni_syscall +#ifdef CONFIG_OSF4_COMPAT + .quad sys_osf_readv /* 120 */ + .quad sys_osf_writev +#else + .quad sys_readv /* 120 */ + .quad sys_writev +#endif + .quad sys_osf_settimeofday + .quad sys_fchown + .quad sys_fchmod + .quad sys_recvfrom /* 125 */ + .quad sys_setreuid + .quad sys_setregid + .quad sys_rename + .quad sys_truncate + .quad sys_ftruncate /* 130 */ + .quad sys_flock + .quad sys_setgid + .quad sys_sendto + .quad sys_shutdown + .quad sys_socketpair /* 135 */ + .quad sys_mkdir + .quad sys_rmdir + .quad sys_osf_utimes + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 140 */ + .quad sys_getpeername + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad sys_getrlimit + .quad sys_setrlimit /* 145 */ + .quad alpha_ni_syscall + .quad sys_setsid + .quad sys_quotactl + .quad alpha_ni_syscall + .quad sys_getsockname /* 150 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 155 */ + .quad sys_osf_sigaction + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad sys_osf_getdirentries + .quad sys_osf_statfs /* 160 */ + .quad sys_osf_fstatfs + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad sys_osf_getdomainname /* 165 */ + .quad sys_setdomainname + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 170 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 175 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 180 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 185 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 190 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 195 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + /* The OSF swapon has two extra arguments, but we ignore them. */ + .quad sys_swapon + .quad sys_msgctl /* 200 */ + .quad sys_msgget + .quad sys_msgrcv + .quad sys_msgsnd + .quad sys_semctl + .quad sys_semget /* 205 */ + .quad sys_semop + .quad sys_osf_utsname + .quad sys_lchown + .quad sys_shmat + .quad sys_shmctl /* 210 */ + .quad sys_shmdt + .quad sys_shmget + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 215 */ + .quad alpha_ni_syscall + .quad sys_msync + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 220 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 225 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 230 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad sys_getpgid + .quad sys_getsid + .quad sys_sigaltstack /* 235 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 240 */ + .quad sys_osf_sysinfo + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad sys_osf_proplist_syscall + .quad alpha_ni_syscall /* 245 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 250 */ + .quad sys_osf_usleep_thread + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad sys_sysfs + .quad alpha_ni_syscall /* 255 */ + .quad sys_osf_getsysinfo + .quad sys_osf_setsysinfo + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 260 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 265 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 270 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 275 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 280 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 285 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 290 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 295 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall +/* linux-specific system calls start at 300 */ + .quad sys_bdflush /* 300 */ + .quad sys_sethae + .quad sys_mount + .quad sys_old_adjtimex + .quad sys_swapoff + .quad sys_getdents /* 305 */ + .quad sys_ni_syscall /* 306: old create_module */ + .quad sys_init_module + .quad sys_delete_module + .quad sys_ni_syscall /* 309: old get_kernel_syms */ + .quad sys_syslog /* 310 */ + .quad sys_reboot + .quad sys_clone + .quad sys_uselib + .quad sys_mlock + .quad sys_munlock /* 315 */ + .quad sys_mlockall + .quad sys_munlockall + .quad sys_sysinfo + .quad sys_sysctl + .quad sys_ni_syscall /* 320 */ + .quad sys_oldumount + .quad sys_swapon + .quad sys_times + .quad sys_personality + .quad sys_setfsuid /* 325 */ + .quad sys_setfsgid + .quad sys_ustat + .quad sys_statfs + .quad sys_fstatfs + .quad sys_sched_setparam /* 330 */ + .quad sys_sched_getparam + .quad sys_sched_setscheduler + .quad sys_sched_getscheduler + .quad sys_sched_yield + .quad sys_sched_get_priority_max /* 335 */ + .quad sys_sched_get_priority_min + .quad sys_sched_rr_get_interval + .quad sys_ni_syscall /* sys_afs_syscall */ + .quad sys_newuname + .quad sys_nanosleep /* 340 */ + .quad sys_mremap + .quad sys_ni_syscall /* old nfsservctl */ + .quad sys_setresuid + .quad sys_getresuid + .quad sys_pciconfig_read /* 345 */ + .quad sys_pciconfig_write + .quad sys_ni_syscall /* 347: old query_module */ + .quad sys_prctl + .quad sys_pread64 + .quad sys_pwrite64 /* 350 */ + .quad sys_rt_sigreturn + .quad sys_rt_sigaction + .quad sys_rt_sigprocmask + .quad sys_rt_sigpending + .quad sys_rt_sigtimedwait /* 355 */ + .quad sys_rt_sigqueueinfo + .quad sys_rt_sigsuspend + .quad sys_select + .quad sys_gettimeofday + .quad sys_settimeofday /* 360 */ + .quad sys_getitimer + .quad sys_setitimer + .quad sys_utimes + .quad sys_getrusage + .quad sys_wait4 /* 365 */ + .quad sys_adjtimex + .quad sys_getcwd + .quad sys_capget + .quad sys_capset + .quad sys_sendfile64 /* 370 */ + .quad sys_setresgid + .quad sys_getresgid + .quad sys_ni_syscall /* sys_dipc */ + .quad sys_pivot_root + .quad sys_mincore /* 375 */ + .quad sys_pciconfig_iobase + .quad sys_getdents64 + .quad sys_gettid + .quad sys_readahead + .quad sys_ni_syscall /* 380 */ + .quad sys_tkill + .quad sys_setxattr + .quad sys_lsetxattr + .quad sys_fsetxattr + .quad sys_getxattr /* 385 */ + .quad sys_lgetxattr + .quad sys_fgetxattr + .quad sys_listxattr + .quad sys_llistxattr + .quad sys_flistxattr /* 390 */ + .quad sys_removexattr + .quad sys_lremovexattr + .quad sys_fremovexattr + .quad sys_futex + .quad sys_sched_setaffinity /* 395 */ + .quad sys_sched_getaffinity + .quad sys_ni_syscall /* 397, tux */ + .quad sys_io_setup + .quad sys_io_destroy + .quad sys_io_getevents /* 400 */ + .quad sys_io_submit + .quad sys_io_cancel + .quad sys_ni_syscall /* 403, sys_alloc_hugepages */ + .quad sys_ni_syscall /* 404, sys_free_hugepages */ + .quad sys_exit_group /* 405 */ + .quad sys_lookup_dcookie + .quad sys_epoll_create + .quad sys_epoll_ctl + .quad sys_epoll_wait + .quad sys_remap_file_pages /* 410 */ + .quad sys_set_tid_address + .quad sys_restart_syscall + .quad sys_fadvise64 + .quad sys_timer_create + .quad sys_timer_settime /* 415 */ + .quad sys_timer_gettime + .quad sys_timer_getoverrun + .quad sys_timer_delete + .quad sys_clock_settime + .quad sys_clock_gettime /* 420 */ + .quad sys_clock_getres + .quad sys_clock_nanosleep + .quad sys_semtimedop + .quad sys_tgkill + .quad sys_stat64 /* 425 */ + .quad sys_lstat64 + .quad sys_fstat64 + .quad sys_ni_syscall /* sys_vserver */ + .quad sys_ni_syscall /* sys_mbind */ + .quad sys_ni_syscall /* sys_get_mempolicy */ + .quad sys_ni_syscall /* sys_set_mempolicy */ + .quad sys_mq_open + .quad sys_mq_unlink + .quad sys_mq_timedsend + .quad sys_mq_timedreceive /* 435 */ + .quad sys_mq_notify + .quad sys_mq_getsetattr + .quad sys_waitid + .quad sys_add_key + .quad sys_request_key /* 440 */ + .quad sys_keyctl + .quad sys_ioprio_set + .quad sys_ioprio_get + .quad sys_inotify_init + .quad sys_inotify_add_watch /* 445 */ + .quad sys_inotify_rm_watch + .quad sys_fdatasync + .quad sys_kexec_load + .quad sys_migrate_pages + .quad sys_openat /* 450 */ + .quad sys_mkdirat + .quad sys_mknodat + .quad sys_fchownat + .quad sys_futimesat + .quad sys_fstatat64 /* 455 */ + .quad sys_unlinkat + .quad sys_renameat + .quad sys_linkat + .quad sys_symlinkat + .quad sys_readlinkat /* 460 */ + .quad sys_fchmodat + .quad sys_faccessat + .quad sys_pselect6 + .quad sys_ppoll + .quad sys_unshare /* 465 */ + .quad sys_set_robust_list + .quad sys_get_robust_list + .quad sys_splice + .quad sys_sync_file_range + .quad sys_tee /* 470 */ + .quad sys_vmsplice + .quad sys_move_pages + .quad sys_getcpu + .quad sys_epoll_pwait + .quad sys_utimensat /* 475 */ + .quad sys_signalfd + .quad sys_ni_syscall /* sys_timerfd */ + .quad sys_eventfd + .quad sys_recvmmsg + .quad sys_fallocate /* 480 */ + .quad sys_timerfd_create + .quad sys_timerfd_settime + .quad sys_timerfd_gettime + .quad sys_signalfd4 + .quad sys_eventfd2 /* 485 */ + .quad sys_epoll_create1 + .quad sys_dup3 + .quad sys_pipe2 + .quad sys_inotify_init1 + .quad sys_preadv /* 490 */ + .quad sys_pwritev + .quad sys_rt_tgsigqueueinfo + .quad sys_perf_event_open + .quad sys_fanotify_init + .quad sys_fanotify_mark /* 495 */ + .quad sys_prlimit64 + .quad sys_name_to_handle_at + .quad sys_open_by_handle_at + .quad sys_clock_adjtime + .quad sys_syncfs /* 500 */ + .quad sys_setns + .quad sys_accept4 + .quad sys_sendmmsg + + .size sys_call_table, . - sys_call_table + .type sys_call_table, @object + +/* Remember to update everything, kids. */ +.ifne (. - sys_call_table) - (NR_SYSCALLS * 8) +.err +.endif diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c new file mode 100644 index 00000000..e336694c --- /dev/null +++ b/arch/alpha/kernel/time.c @@ -0,0 +1,522 @@ +/* + * linux/arch/alpha/kernel/time.c + * + * Copyright (C) 1991, 1992, 1995, 1999, 2000 Linus Torvalds + * + * This file contains the PC-specific time handling details: + * reading the RTC at bootup, etc.. + * 1994-07-02 Alan Modra + * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime + * 1995-03-26 Markus Kuhn + * fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887 + * precision CMOS clock update + * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 + * "A Kernel Model for Precision Timekeeping" by Dave Mills + * 1997-01-09 Adrian Sun + * use interval timer if CONFIG_RTC=y + * 1997-10-29 John Bowman (bowman@math.ualberta.ca) + * fixed tick loss calculation in timer_interrupt + * (round system clock to nearest tick instead of truncating) + * fixed algorithm in time_init for getting time from CMOS clock + * 1999-04-16 Thorsten Kranzkowski (dl8bcu@gmx.net) + * fixed algorithm in do_gettimeofday() for calculating the precise time + * from processor cycle counter (now taking lost_ticks into account) + * 2000-08-13 Jan-Benedict Glaw <jbglaw@lug-owl.de> + * Fixed time_init to be aware of epoches != 1900. This prevents + * booting up in 2048 for me;) Code is stolen from rtc.c. + * 2003-06-03 R. Scott Bailey <scott.bailey@eds.com> + * Tighten sanity in time_init from 1% (10,000 PPM) to 250 PPM + */ +#include <linux/errno.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/param.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/delay.h> +#include <linux/ioport.h> +#include <linux/irq.h> +#include <linux/interrupt.h> +#include <linux/init.h> +#include <linux/bcd.h> +#include <linux/profile.h> +#include <linux/irq_work.h> + +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/hwrpb.h> +#include <asm/rtc.h> + +#include <linux/mc146818rtc.h> +#include <linux/time.h> +#include <linux/timex.h> +#include <linux/clocksource.h> + +#include "proto.h" +#include "irq_impl.h" + +static int set_rtc_mmss(unsigned long); + +DEFINE_SPINLOCK(rtc_lock); +EXPORT_SYMBOL(rtc_lock); + +#define TICK_SIZE (tick_nsec / 1000) + +/* + * Shift amount by which scaled_ticks_per_cycle is scaled. Shifting + * by 48 gives us 16 bits for HZ while keeping the accuracy good even + * for large CPU clock rates. + */ +#define FIX_SHIFT 48 + +/* lump static variables together for more efficient access: */ +static struct { + /* cycle counter last time it got invoked */ + __u32 last_time; + /* ticks/cycle * 2^48 */ + unsigned long scaled_ticks_per_cycle; + /* partial unused tick */ + unsigned long partial_tick; +} state; + +unsigned long est_cycle_freq; + +#ifdef CONFIG_IRQ_WORK + +DEFINE_PER_CPU(u8, irq_work_pending); + +#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1 +#define test_irq_work_pending() __get_cpu_var(irq_work_pending) +#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0 + +void arch_irq_work_raise(void) +{ + set_irq_work_pending_flag(); +} + +#else /* CONFIG_IRQ_WORK */ + +#define test_irq_work_pending() 0 +#define clear_irq_work_pending() + +#endif /* CONFIG_IRQ_WORK */ + + +static inline __u32 rpcc(void) +{ + __u32 result; + asm volatile ("rpcc %0" : "=r"(result)); + return result; +} + +int update_persistent_clock(struct timespec now) +{ + return set_rtc_mmss(now.tv_sec); +} + +void read_persistent_clock(struct timespec *ts) +{ + unsigned int year, mon, day, hour, min, sec, epoch; + + sec = CMOS_READ(RTC_SECONDS); + min = CMOS_READ(RTC_MINUTES); + hour = CMOS_READ(RTC_HOURS); + day = CMOS_READ(RTC_DAY_OF_MONTH); + mon = CMOS_READ(RTC_MONTH); + year = CMOS_READ(RTC_YEAR); + + if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { + sec = bcd2bin(sec); + min = bcd2bin(min); + hour = bcd2bin(hour); + day = bcd2bin(day); + mon = bcd2bin(mon); + year = bcd2bin(year); + } + + /* PC-like is standard; used for year >= 70 */ + epoch = 1900; + if (year < 20) + epoch = 2000; + else if (year >= 20 && year < 48) + /* NT epoch */ + epoch = 1980; + else if (year >= 48 && year < 70) + /* Digital UNIX epoch */ + epoch = 1952; + + printk(KERN_INFO "Using epoch = %d\n", epoch); + + if ((year += epoch) < 1970) + year += 100; + + ts->tv_sec = mktime(year, mon, day, hour, min, sec); + ts->tv_nsec = 0; +} + + + +/* + * timer_interrupt() needs to keep up the real-time clock, + * as well as call the "xtime_update()" routine every clocktick + */ +irqreturn_t timer_interrupt(int irq, void *dev) +{ + unsigned long delta; + __u32 now; + long nticks; + +#ifndef CONFIG_SMP + /* Not SMP, do kernel PC profiling here. */ + profile_tick(CPU_PROFILING); +#endif + + /* + * Calculate how many ticks have passed since the last update, + * including any previous partial leftover. Save any resulting + * fraction for the next pass. + */ + now = rpcc(); + delta = now - state.last_time; + state.last_time = now; + delta = delta * state.scaled_ticks_per_cycle + state.partial_tick; + state.partial_tick = delta & ((1UL << FIX_SHIFT) - 1); + nticks = delta >> FIX_SHIFT; + + if (nticks) + xtime_update(nticks); + + if (test_irq_work_pending()) { + clear_irq_work_pending(); + irq_work_run(); + } + +#ifndef CONFIG_SMP + while (nticks--) + update_process_times(user_mode(get_irq_regs())); +#endif + + return IRQ_HANDLED; +} + +void __init +common_init_rtc(void) +{ + unsigned char x; + + /* Reset periodic interrupt frequency. */ + x = CMOS_READ(RTC_FREQ_SELECT) & 0x3f; + /* Test includes known working values on various platforms + where 0x26 is wrong; we refuse to change those. */ + if (x != 0x26 && x != 0x25 && x != 0x19 && x != 0x06) { + printk("Setting RTC_FREQ to 1024 Hz (%x)\n", x); + CMOS_WRITE(0x26, RTC_FREQ_SELECT); + } + + /* Turn on periodic interrupts. */ + x = CMOS_READ(RTC_CONTROL); + if (!(x & RTC_PIE)) { + printk("Turning on RTC interrupts.\n"); + x |= RTC_PIE; + x &= ~(RTC_AIE | RTC_UIE); + CMOS_WRITE(x, RTC_CONTROL); + } + (void) CMOS_READ(RTC_INTR_FLAGS); + + outb(0x36, 0x43); /* pit counter 0: system timer */ + outb(0x00, 0x40); + outb(0x00, 0x40); + + outb(0xb6, 0x43); /* pit counter 2: speaker */ + outb(0x31, 0x42); + outb(0x13, 0x42); + + init_rtc_irq(); +} + +unsigned int common_get_rtc_time(struct rtc_time *time) +{ + return __get_rtc_time(time); +} + +int common_set_rtc_time(struct rtc_time *time) +{ + return __set_rtc_time(time); +} + +/* Validate a computed cycle counter result against the known bounds for + the given processor core. There's too much brokenness in the way of + timing hardware for any one method to work everywhere. :-( + + Return 0 if the result cannot be trusted, otherwise return the argument. */ + +static unsigned long __init +validate_cc_value(unsigned long cc) +{ + static struct bounds { + unsigned int min, max; + } cpu_hz[] __initdata = { + [EV3_CPU] = { 50000000, 200000000 }, /* guess */ + [EV4_CPU] = { 100000000, 300000000 }, + [LCA4_CPU] = { 100000000, 300000000 }, /* guess */ + [EV45_CPU] = { 200000000, 300000000 }, + [EV5_CPU] = { 250000000, 433000000 }, + [EV56_CPU] = { 333000000, 667000000 }, + [PCA56_CPU] = { 400000000, 600000000 }, /* guess */ + [PCA57_CPU] = { 500000000, 600000000 }, /* guess */ + [EV6_CPU] = { 466000000, 600000000 }, + [EV67_CPU] = { 600000000, 750000000 }, + [EV68AL_CPU] = { 750000000, 940000000 }, + [EV68CB_CPU] = { 1000000000, 1333333333 }, + /* None of the following are shipping as of 2001-11-01. */ + [EV68CX_CPU] = { 1000000000, 1700000000 }, /* guess */ + [EV69_CPU] = { 1000000000, 1700000000 }, /* guess */ + [EV7_CPU] = { 800000000, 1400000000 }, /* guess */ + [EV79_CPU] = { 1000000000, 2000000000 }, /* guess */ + }; + + /* Allow for some drift in the crystal. 10MHz is more than enough. */ + const unsigned int deviation = 10000000; + + struct percpu_struct *cpu; + unsigned int index; + + cpu = (struct percpu_struct *)((char*)hwrpb + hwrpb->processor_offset); + index = cpu->type & 0xffffffff; + + /* If index out of bounds, no way to validate. */ + if (index >= ARRAY_SIZE(cpu_hz)) + return cc; + + /* If index contains no data, no way to validate. */ + if (cpu_hz[index].max == 0) + return cc; + + if (cc < cpu_hz[index].min - deviation + || cc > cpu_hz[index].max + deviation) + return 0; + + return cc; +} + + +/* + * Calibrate CPU clock using legacy 8254 timer/counter. Stolen from + * arch/i386/time.c. + */ + +#define CALIBRATE_LATCH 0xffff +#define TIMEOUT_COUNT 0x100000 + +static unsigned long __init +calibrate_cc_with_pit(void) +{ + int cc, count = 0; + + /* Set the Gate high, disable speaker */ + outb((inb(0x61) & ~0x02) | 0x01, 0x61); + + /* + * Now let's take care of CTC channel 2 + * + * Set the Gate high, program CTC channel 2 for mode 0, + * (interrupt on terminal count mode), binary count, + * load 5 * LATCH count, (LSB and MSB) to begin countdown. + */ + outb(0xb0, 0x43); /* binary, mode 0, LSB/MSB, Ch 2 */ + outb(CALIBRATE_LATCH & 0xff, 0x42); /* LSB of count */ + outb(CALIBRATE_LATCH >> 8, 0x42); /* MSB of count */ + + cc = rpcc(); + do { + count++; + } while ((inb(0x61) & 0x20) == 0 && count < TIMEOUT_COUNT); + cc = rpcc() - cc; + + /* Error: ECTCNEVERSET or ECPUTOOFAST. */ + if (count <= 1 || count == TIMEOUT_COUNT) + return 0; + + return ((long)cc * PIT_TICK_RATE) / (CALIBRATE_LATCH + 1); +} + +/* The Linux interpretation of the CMOS clock register contents: + When the Update-In-Progress (UIP) flag goes from 1 to 0, the + RTC registers show the second which has precisely just started. + Let's hope other operating systems interpret the RTC the same way. */ + +static unsigned long __init +rpcc_after_update_in_progress(void) +{ + do { } while (!(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)); + do { } while (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP); + + return rpcc(); +} + +#ifndef CONFIG_SMP +/* Until and unless we figure out how to get cpu cycle counters + in sync and keep them there, we can't use the rpcc. */ +static cycle_t read_rpcc(struct clocksource *cs) +{ + cycle_t ret = (cycle_t)rpcc(); + return ret; +} + +static struct clocksource clocksource_rpcc = { + .name = "rpcc", + .rating = 300, + .read = read_rpcc, + .mask = CLOCKSOURCE_MASK(32), + .flags = CLOCK_SOURCE_IS_CONTINUOUS +}; + +static inline void register_rpcc_clocksource(long cycle_freq) +{ + clocksource_register_hz(&clocksource_rpcc, cycle_freq); +} +#else /* !CONFIG_SMP */ +static inline void register_rpcc_clocksource(long cycle_freq) +{ +} +#endif /* !CONFIG_SMP */ + +void __init +time_init(void) +{ + unsigned int cc1, cc2; + unsigned long cycle_freq, tolerance; + long diff; + + /* Calibrate CPU clock -- attempt #1. */ + if (!est_cycle_freq) + est_cycle_freq = validate_cc_value(calibrate_cc_with_pit()); + + cc1 = rpcc(); + + /* Calibrate CPU clock -- attempt #2. */ + if (!est_cycle_freq) { + cc1 = rpcc_after_update_in_progress(); + cc2 = rpcc_after_update_in_progress(); + est_cycle_freq = validate_cc_value(cc2 - cc1); + cc1 = cc2; + } + + cycle_freq = hwrpb->cycle_freq; + if (est_cycle_freq) { + /* If the given value is within 250 PPM of what we calculated, + accept it. Otherwise, use what we found. */ + tolerance = cycle_freq / 4000; + diff = cycle_freq - est_cycle_freq; + if (diff < 0) + diff = -diff; + if ((unsigned long)diff > tolerance) { + cycle_freq = est_cycle_freq; + printk("HWRPB cycle frequency bogus. " + "Estimated %lu Hz\n", cycle_freq); + } else { + est_cycle_freq = 0; + } + } else if (! validate_cc_value (cycle_freq)) { + printk("HWRPB cycle frequency bogus, " + "and unable to estimate a proper value!\n"); + } + + /* From John Bowman <bowman@math.ualberta.ca>: allow the values + to settle, as the Update-In-Progress bit going low isn't good + enough on some hardware. 2ms is our guess; we haven't found + bogomips yet, but this is close on a 500Mhz box. */ + __delay(1000000); + + + if (HZ > (1<<16)) { + extern void __you_loose (void); + __you_loose(); + } + + register_rpcc_clocksource(cycle_freq); + + state.last_time = cc1; + state.scaled_ticks_per_cycle + = ((unsigned long) HZ << FIX_SHIFT) / cycle_freq; + state.partial_tick = 0L; + + /* Startup the timer source. */ + alpha_mv.init_rtc(); +} + +/* + * In order to set the CMOS clock precisely, set_rtc_mmss has to be + * called 500 ms after the second nowtime has started, because when + * nowtime is written into the registers of the CMOS clock, it will + * jump to the next second precisely 500 ms later. Check the Motorola + * MC146818A or Dallas DS12887 data sheet for details. + * + * BUG: This routine does not handle hour overflow properly; it just + * sets the minutes. Usually you won't notice until after reboot! + */ + + +static int +set_rtc_mmss(unsigned long nowtime) +{ + int retval = 0; + int real_seconds, real_minutes, cmos_minutes; + unsigned char save_control, save_freq_select; + + /* irq are locally disabled here */ + spin_lock(&rtc_lock); + /* Tell the clock it's being set */ + save_control = CMOS_READ(RTC_CONTROL); + CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); + + /* Stop and reset prescaler */ + save_freq_select = CMOS_READ(RTC_FREQ_SELECT); + CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); + + cmos_minutes = CMOS_READ(RTC_MINUTES); + if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) + cmos_minutes = bcd2bin(cmos_minutes); + + /* + * since we're only adjusting minutes and seconds, + * don't interfere with hour overflow. This avoids + * messing with unknown time zones but requires your + * RTC not to be off by more than 15 minutes + */ + real_seconds = nowtime % 60; + real_minutes = nowtime / 60; + if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1) { + /* correct for half hour time zone */ + real_minutes += 30; + } + real_minutes %= 60; + + if (abs(real_minutes - cmos_minutes) < 30) { + if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { + real_seconds = bin2bcd(real_seconds); + real_minutes = bin2bcd(real_minutes); + } + CMOS_WRITE(real_seconds,RTC_SECONDS); + CMOS_WRITE(real_minutes,RTC_MINUTES); + } else { + printk_once(KERN_NOTICE + "set_rtc_mmss: can't update from %d to %d\n", + cmos_minutes, real_minutes); + retval = -1; + } + + /* The following flags have to be released exactly in this order, + * otherwise the DS12887 (popular MC146818A clone with integrated + * battery and quartz) will not reset the oscillator and will not + * update precisely 500 ms later. You won't find this mentioned in + * the Dallas Semiconductor data sheets, but who believes data + * sheets anyway ... -- Markus Kuhn + */ + CMOS_WRITE(save_control, RTC_CONTROL); + CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); + spin_unlock(&rtc_lock); + + return retval; +} diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c new file mode 100644 index 00000000..80d987c0 --- /dev/null +++ b/arch/alpha/kernel/traps.c @@ -0,0 +1,1087 @@ +/* + * arch/alpha/kernel/traps.c + * + * (C) Copyright 1994 Linus Torvalds + */ + +/* + * This file initializes the trap entry points + */ + +#include <linux/jiffies.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/tty.h> +#include <linux/delay.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/kallsyms.h> +#include <linux/ratelimit.h> + +#include <asm/gentrap.h> +#include <asm/uaccess.h> +#include <asm/unaligned.h> +#include <asm/sysinfo.h> +#include <asm/hwrpb.h> +#include <asm/mmu_context.h> +#include <asm/special_insns.h> + +#include "proto.h" + +/* Work-around for some SRMs which mishandle opDEC faults. */ + +static int opDEC_fix; + +static void __cpuinit +opDEC_check(void) +{ + __asm__ __volatile__ ( + /* Load the address of... */ + " br $16, 1f\n" + /* A stub instruction fault handler. Just add 4 to the + pc and continue. */ + " ldq $16, 8($sp)\n" + " addq $16, 4, $16\n" + " stq $16, 8($sp)\n" + " call_pal %[rti]\n" + /* Install the instruction fault handler. */ + "1: lda $17, 3\n" + " call_pal %[wrent]\n" + /* With that in place, the fault from the round-to-minf fp + insn will arrive either at the "lda 4" insn (bad) or one + past that (good). This places the correct fixup in %0. */ + " lda %[fix], 0\n" + " cvttq/svm $f31,$f31\n" + " lda %[fix], 4" + : [fix] "=r" (opDEC_fix) + : [rti] "n" (PAL_rti), [wrent] "n" (PAL_wrent) + : "$0", "$1", "$16", "$17", "$22", "$23", "$24", "$25"); + + if (opDEC_fix) + printk("opDEC fixup enabled.\n"); +} + +void +dik_show_regs(struct pt_regs *regs, unsigned long *r9_15) +{ + printk("pc = [<%016lx>] ra = [<%016lx>] ps = %04lx %s\n", + regs->pc, regs->r26, regs->ps, print_tainted()); + print_symbol("pc is at %s\n", regs->pc); + print_symbol("ra is at %s\n", regs->r26 ); + printk("v0 = %016lx t0 = %016lx t1 = %016lx\n", + regs->r0, regs->r1, regs->r2); + printk("t2 = %016lx t3 = %016lx t4 = %016lx\n", + regs->r3, regs->r4, regs->r5); + printk("t5 = %016lx t6 = %016lx t7 = %016lx\n", + regs->r6, regs->r7, regs->r8); + + if (r9_15) { + printk("s0 = %016lx s1 = %016lx s2 = %016lx\n", + r9_15[9], r9_15[10], r9_15[11]); + printk("s3 = %016lx s4 = %016lx s5 = %016lx\n", + r9_15[12], r9_15[13], r9_15[14]); + printk("s6 = %016lx\n", r9_15[15]); + } + + printk("a0 = %016lx a1 = %016lx a2 = %016lx\n", + regs->r16, regs->r17, regs->r18); + printk("a3 = %016lx a4 = %016lx a5 = %016lx\n", + regs->r19, regs->r20, regs->r21); + printk("t8 = %016lx t9 = %016lx t10= %016lx\n", + regs->r22, regs->r23, regs->r24); + printk("t11= %016lx pv = %016lx at = %016lx\n", + regs->r25, regs->r27, regs->r28); + printk("gp = %016lx sp = %p\n", regs->gp, regs+1); +#if 0 +__halt(); +#endif +} + +#if 0 +static char * ireg_name[] = {"v0", "t0", "t1", "t2", "t3", "t4", "t5", "t6", + "t7", "s0", "s1", "s2", "s3", "s4", "s5", "s6", + "a0", "a1", "a2", "a3", "a4", "a5", "t8", "t9", + "t10", "t11", "ra", "pv", "at", "gp", "sp", "zero"}; +#endif + +static void +dik_show_code(unsigned int *pc) +{ + long i; + + printk("Code:"); + for (i = -6; i < 2; i++) { + unsigned int insn; + if (__get_user(insn, (unsigned int __user *)pc + i)) + break; + printk("%c%08x%c", i ? ' ' : '<', insn, i ? ' ' : '>'); + } + printk("\n"); +} + +static void +dik_show_trace(unsigned long *sp) +{ + long i = 0; + printk("Trace:\n"); + while (0x1ff8 & (unsigned long) sp) { + extern char _stext[], _etext[]; + unsigned long tmp = *sp; + sp++; + if (tmp < (unsigned long) &_stext) + continue; + if (tmp >= (unsigned long) &_etext) + continue; + printk("[<%lx>]", tmp); + print_symbol(" %s", tmp); + printk("\n"); + if (i > 40) { + printk(" ..."); + break; + } + } + printk("\n"); +} + +static int kstack_depth_to_print = 24; + +void show_stack(struct task_struct *task, unsigned long *sp) +{ + unsigned long *stack; + int i; + + /* + * debugging aid: "show_stack(NULL);" prints the + * back trace for this cpu. + */ + if(sp==NULL) + sp=(unsigned long*)&sp; + + stack = sp; + for(i=0; i < kstack_depth_to_print; i++) { + if (((long) stack & (THREAD_SIZE-1)) == 0) + break; + if (i && ((i % 4) == 0)) + printk("\n "); + printk("%016lx ", *stack++); + } + printk("\n"); + dik_show_trace(sp); +} + +void dump_stack(void) +{ + show_stack(NULL, NULL); +} + +EXPORT_SYMBOL(dump_stack); + +void +die_if_kernel(char * str, struct pt_regs *regs, long err, unsigned long *r9_15) +{ + if (regs->ps & 8) + return; +#ifdef CONFIG_SMP + printk("CPU %d ", hard_smp_processor_id()); +#endif + printk("%s(%d): %s %ld\n", current->comm, task_pid_nr(current), str, err); + dik_show_regs(regs, r9_15); + add_taint(TAINT_DIE); + dik_show_trace((unsigned long *)(regs+1)); + dik_show_code((unsigned int *)regs->pc); + + if (test_and_set_thread_flag (TIF_DIE_IF_KERNEL)) { + printk("die_if_kernel recursion detected.\n"); + local_irq_enable(); + while (1); + } + do_exit(SIGSEGV); +} + +#ifndef CONFIG_MATHEMU +static long dummy_emul(void) { return 0; } +long (*alpha_fp_emul_imprecise)(struct pt_regs *regs, unsigned long writemask) + = (void *)dummy_emul; +long (*alpha_fp_emul) (unsigned long pc) + = (void *)dummy_emul; +#else +long alpha_fp_emul_imprecise(struct pt_regs *regs, unsigned long writemask); +long alpha_fp_emul (unsigned long pc); +#endif + +asmlinkage void +do_entArith(unsigned long summary, unsigned long write_mask, + struct pt_regs *regs) +{ + long si_code = FPE_FLTINV; + siginfo_t info; + + if (summary & 1) { + /* Software-completion summary bit is set, so try to + emulate the instruction. If the processor supports + precise exceptions, we don't have to search. */ + if (!amask(AMASK_PRECISE_TRAP)) + si_code = alpha_fp_emul(regs->pc - 4); + else + si_code = alpha_fp_emul_imprecise(regs, write_mask); + if (si_code == 0) + return; + } + die_if_kernel("Arithmetic fault", regs, 0, NULL); + + info.si_signo = SIGFPE; + info.si_errno = 0; + info.si_code = si_code; + info.si_addr = (void __user *) regs->pc; + send_sig_info(SIGFPE, &info, current); +} + +asmlinkage void +do_entIF(unsigned long type, struct pt_regs *regs) +{ + siginfo_t info; + int signo, code; + + if ((regs->ps & ~IPL_MAX) == 0) { + if (type == 1) { + const unsigned int *data + = (const unsigned int *) regs->pc; + printk("Kernel bug at %s:%d\n", + (const char *)(data[1] | (long)data[2] << 32), + data[0]); + } + die_if_kernel((type == 1 ? "Kernel Bug" : "Instruction fault"), + regs, type, NULL); + } + + switch (type) { + case 0: /* breakpoint */ + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_BRKPT; + info.si_trapno = 0; + info.si_addr = (void __user *) regs->pc; + + if (ptrace_cancel_bpt(current)) { + regs->pc -= 4; /* make pc point to former bpt */ + } + + send_sig_info(SIGTRAP, &info, current); + return; + + case 1: /* bugcheck */ + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = __SI_FAULT; + info.si_addr = (void __user *) regs->pc; + info.si_trapno = 0; + send_sig_info(SIGTRAP, &info, current); + return; + + case 2: /* gentrap */ + info.si_addr = (void __user *) regs->pc; + info.si_trapno = regs->r16; + switch ((long) regs->r16) { + case GEN_INTOVF: + signo = SIGFPE; + code = FPE_INTOVF; + break; + case GEN_INTDIV: + signo = SIGFPE; + code = FPE_INTDIV; + break; + case GEN_FLTOVF: + signo = SIGFPE; + code = FPE_FLTOVF; + break; + case GEN_FLTDIV: + signo = SIGFPE; + code = FPE_FLTDIV; + break; + case GEN_FLTUND: + signo = SIGFPE; + code = FPE_FLTUND; + break; + case GEN_FLTINV: + signo = SIGFPE; + code = FPE_FLTINV; + break; + case GEN_FLTINE: + signo = SIGFPE; + code = FPE_FLTRES; + break; + case GEN_ROPRAND: + signo = SIGFPE; + code = __SI_FAULT; + break; + + case GEN_DECOVF: + case GEN_DECDIV: + case GEN_DECINV: + case GEN_ASSERTERR: + case GEN_NULPTRERR: + case GEN_STKOVF: + case GEN_STRLENERR: + case GEN_SUBSTRERR: + case GEN_RANGERR: + case GEN_SUBRNG: + case GEN_SUBRNG1: + case GEN_SUBRNG2: + case GEN_SUBRNG3: + case GEN_SUBRNG4: + case GEN_SUBRNG5: + case GEN_SUBRNG6: + case GEN_SUBRNG7: + default: + signo = SIGTRAP; + code = __SI_FAULT; + break; + } + + info.si_signo = signo; + info.si_errno = 0; + info.si_code = code; + info.si_addr = (void __user *) regs->pc; + send_sig_info(signo, &info, current); + return; + + case 4: /* opDEC */ + if (implver() == IMPLVER_EV4) { + long si_code; + + /* The some versions of SRM do not handle + the opDEC properly - they return the PC of the + opDEC fault, not the instruction after as the + Alpha architecture requires. Here we fix it up. + We do this by intentionally causing an opDEC + fault during the boot sequence and testing if + we get the correct PC. If not, we set a flag + to correct it every time through. */ + regs->pc += opDEC_fix; + + /* EV4 does not implement anything except normal + rounding. Everything else will come here as + an illegal instruction. Emulate them. */ + si_code = alpha_fp_emul(regs->pc - 4); + if (si_code == 0) + return; + if (si_code > 0) { + info.si_signo = SIGFPE; + info.si_errno = 0; + info.si_code = si_code; + info.si_addr = (void __user *) regs->pc; + send_sig_info(SIGFPE, &info, current); + return; + } + } + break; + + case 3: /* FEN fault */ + /* Irritating users can call PAL_clrfen to disable the + FPU for the process. The kernel will then trap in + do_switch_stack and undo_switch_stack when we try + to save and restore the FP registers. + + Given that GCC by default generates code that uses the + FP registers, PAL_clrfen is not useful except for DoS + attacks. So turn the bleeding FPU back on and be done + with it. */ + current_thread_info()->pcb.flags |= 1; + __reload_thread(¤t_thread_info()->pcb); + return; + + case 5: /* illoc */ + default: /* unexpected instruction-fault type */ + ; + } + + info.si_signo = SIGILL; + info.si_errno = 0; + info.si_code = ILL_ILLOPC; + info.si_addr = (void __user *) regs->pc; + send_sig_info(SIGILL, &info, current); +} + +/* There is an ifdef in the PALcode in MILO that enables a + "kernel debugging entry point" as an unprivileged call_pal. + + We don't want to have anything to do with it, but unfortunately + several versions of MILO included in distributions have it enabled, + and if we don't put something on the entry point we'll oops. */ + +asmlinkage void +do_entDbg(struct pt_regs *regs) +{ + siginfo_t info; + + die_if_kernel("Instruction fault", regs, 0, NULL); + + info.si_signo = SIGILL; + info.si_errno = 0; + info.si_code = ILL_ILLOPC; + info.si_addr = (void __user *) regs->pc; + force_sig_info(SIGILL, &info, current); +} + + +/* + * entUna has a different register layout to be reasonably simple. It + * needs access to all the integer registers (the kernel doesn't use + * fp-regs), and it needs to have them in order for simpler access. + * + * Due to the non-standard register layout (and because we don't want + * to handle floating-point regs), user-mode unaligned accesses are + * handled separately by do_entUnaUser below. + * + * Oh, btw, we don't handle the "gp" register correctly, but if we fault + * on a gp-register unaligned load/store, something is _very_ wrong + * in the kernel anyway.. + */ +struct allregs { + unsigned long regs[32]; + unsigned long ps, pc, gp, a0, a1, a2; +}; + +struct unaligned_stat { + unsigned long count, va, pc; +} unaligned[2]; + + +/* Macro for exception fixup code to access integer registers. */ +#define una_reg(r) (_regs[(r) >= 16 && (r) <= 18 ? (r)+19 : (r)]) + + +asmlinkage void +do_entUna(void * va, unsigned long opcode, unsigned long reg, + struct allregs *regs) +{ + long error, tmp1, tmp2, tmp3, tmp4; + unsigned long pc = regs->pc - 4; + unsigned long *_regs = regs->regs; + const struct exception_table_entry *fixup; + + unaligned[0].count++; + unaligned[0].va = (unsigned long) va; + unaligned[0].pc = pc; + + /* We don't want to use the generic get/put unaligned macros as + we want to trap exceptions. Only if we actually get an + exception will we decide whether we should have caught it. */ + + switch (opcode) { + case 0x0c: /* ldwu */ + __asm__ __volatile__( + "1: ldq_u %1,0(%3)\n" + "2: ldq_u %2,1(%3)\n" + " extwl %1,%3,%1\n" + " extwh %2,%3,%2\n" + "3:\n" + ".section __ex_table,\"a\"\n" + " .long 1b - .\n" + " lda %1,3b-1b(%0)\n" + " .long 2b - .\n" + " lda %2,3b-2b(%0)\n" + ".previous" + : "=r"(error), "=&r"(tmp1), "=&r"(tmp2) + : "r"(va), "0"(0)); + if (error) + goto got_exception; + una_reg(reg) = tmp1|tmp2; + return; + + case 0x28: /* ldl */ + __asm__ __volatile__( + "1: ldq_u %1,0(%3)\n" + "2: ldq_u %2,3(%3)\n" + " extll %1,%3,%1\n" + " extlh %2,%3,%2\n" + "3:\n" + ".section __ex_table,\"a\"\n" + " .long 1b - .\n" + " lda %1,3b-1b(%0)\n" + " .long 2b - .\n" + " lda %2,3b-2b(%0)\n" + ".previous" + : "=r"(error), "=&r"(tmp1), "=&r"(tmp2) + : "r"(va), "0"(0)); + if (error) + goto got_exception; + una_reg(reg) = (int)(tmp1|tmp2); + return; + + case 0x29: /* ldq */ + __asm__ __volatile__( + "1: ldq_u %1,0(%3)\n" + "2: ldq_u %2,7(%3)\n" + " extql %1,%3,%1\n" + " extqh %2,%3,%2\n" + "3:\n" + ".section __ex_table,\"a\"\n" + " .long 1b - .\n" + " lda %1,3b-1b(%0)\n" + " .long 2b - .\n" + " lda %2,3b-2b(%0)\n" + ".previous" + : "=r"(error), "=&r"(tmp1), "=&r"(tmp2) + : "r"(va), "0"(0)); + if (error) + goto got_exception; + una_reg(reg) = tmp1|tmp2; + return; + + /* Note that the store sequences do not indicate that they change + memory because it _should_ be affecting nothing in this context. + (Otherwise we have other, much larger, problems.) */ + case 0x0d: /* stw */ + __asm__ __volatile__( + "1: ldq_u %2,1(%5)\n" + "2: ldq_u %1,0(%5)\n" + " inswh %6,%5,%4\n" + " inswl %6,%5,%3\n" + " mskwh %2,%5,%2\n" + " mskwl %1,%5,%1\n" + " or %2,%4,%2\n" + " or %1,%3,%1\n" + "3: stq_u %2,1(%5)\n" + "4: stq_u %1,0(%5)\n" + "5:\n" + ".section __ex_table,\"a\"\n" + " .long 1b - .\n" + " lda %2,5b-1b(%0)\n" + " .long 2b - .\n" + " lda %1,5b-2b(%0)\n" + " .long 3b - .\n" + " lda $31,5b-3b(%0)\n" + " .long 4b - .\n" + " lda $31,5b-4b(%0)\n" + ".previous" + : "=r"(error), "=&r"(tmp1), "=&r"(tmp2), + "=&r"(tmp3), "=&r"(tmp4) + : "r"(va), "r"(una_reg(reg)), "0"(0)); + if (error) + goto got_exception; + return; + + case 0x2c: /* stl */ + __asm__ __volatile__( + "1: ldq_u %2,3(%5)\n" + "2: ldq_u %1,0(%5)\n" + " inslh %6,%5,%4\n" + " insll %6,%5,%3\n" + " msklh %2,%5,%2\n" + " mskll %1,%5,%1\n" + " or %2,%4,%2\n" + " or %1,%3,%1\n" + "3: stq_u %2,3(%5)\n" + "4: stq_u %1,0(%5)\n" + "5:\n" + ".section __ex_table,\"a\"\n" + " .long 1b - .\n" + " lda %2,5b-1b(%0)\n" + " .long 2b - .\n" + " lda %1,5b-2b(%0)\n" + " .long 3b - .\n" + " lda $31,5b-3b(%0)\n" + " .long 4b - .\n" + " lda $31,5b-4b(%0)\n" + ".previous" + : "=r"(error), "=&r"(tmp1), "=&r"(tmp2), + "=&r"(tmp3), "=&r"(tmp4) + : "r"(va), "r"(una_reg(reg)), "0"(0)); + if (error) + goto got_exception; + return; + + case 0x2d: /* stq */ + __asm__ __volatile__( + "1: ldq_u %2,7(%5)\n" + "2: ldq_u %1,0(%5)\n" + " insqh %6,%5,%4\n" + " insql %6,%5,%3\n" + " mskqh %2,%5,%2\n" + " mskql %1,%5,%1\n" + " or %2,%4,%2\n" + " or %1,%3,%1\n" + "3: stq_u %2,7(%5)\n" + "4: stq_u %1,0(%5)\n" + "5:\n" + ".section __ex_table,\"a\"\n\t" + " .long 1b - .\n" + " lda %2,5b-1b(%0)\n" + " .long 2b - .\n" + " lda %1,5b-2b(%0)\n" + " .long 3b - .\n" + " lda $31,5b-3b(%0)\n" + " .long 4b - .\n" + " lda $31,5b-4b(%0)\n" + ".previous" + : "=r"(error), "=&r"(tmp1), "=&r"(tmp2), + "=&r"(tmp3), "=&r"(tmp4) + : "r"(va), "r"(una_reg(reg)), "0"(0)); + if (error) + goto got_exception; + return; + } + + printk("Bad unaligned kernel access at %016lx: %p %lx %lu\n", + pc, va, opcode, reg); + do_exit(SIGSEGV); + +got_exception: + /* Ok, we caught the exception, but we don't want it. Is there + someone to pass it along to? */ + if ((fixup = search_exception_tables(pc)) != 0) { + unsigned long newpc; + newpc = fixup_exception(una_reg, fixup, pc); + + printk("Forwarding unaligned exception at %lx (%lx)\n", + pc, newpc); + + regs->pc = newpc; + return; + } + + /* + * Yikes! No one to forward the exception to. + * Since the registers are in a weird format, dump them ourselves. + */ + + printk("%s(%d): unhandled unaligned exception\n", + current->comm, task_pid_nr(current)); + + printk("pc = [<%016lx>] ra = [<%016lx>] ps = %04lx\n", + pc, una_reg(26), regs->ps); + printk("r0 = %016lx r1 = %016lx r2 = %016lx\n", + una_reg(0), una_reg(1), una_reg(2)); + printk("r3 = %016lx r4 = %016lx r5 = %016lx\n", + una_reg(3), una_reg(4), una_reg(5)); + printk("r6 = %016lx r7 = %016lx r8 = %016lx\n", + una_reg(6), una_reg(7), una_reg(8)); + printk("r9 = %016lx r10= %016lx r11= %016lx\n", + una_reg(9), una_reg(10), una_reg(11)); + printk("r12= %016lx r13= %016lx r14= %016lx\n", + una_reg(12), una_reg(13), una_reg(14)); + printk("r15= %016lx\n", una_reg(15)); + printk("r16= %016lx r17= %016lx r18= %016lx\n", + una_reg(16), una_reg(17), una_reg(18)); + printk("r19= %016lx r20= %016lx r21= %016lx\n", + una_reg(19), una_reg(20), una_reg(21)); + printk("r22= %016lx r23= %016lx r24= %016lx\n", + una_reg(22), una_reg(23), una_reg(24)); + printk("r25= %016lx r27= %016lx r28= %016lx\n", + una_reg(25), una_reg(27), una_reg(28)); + printk("gp = %016lx sp = %p\n", regs->gp, regs+1); + + dik_show_code((unsigned int *)pc); + dik_show_trace((unsigned long *)(regs+1)); + + if (test_and_set_thread_flag (TIF_DIE_IF_KERNEL)) { + printk("die_if_kernel recursion detected.\n"); + local_irq_enable(); + while (1); + } + do_exit(SIGSEGV); +} + +/* + * Convert an s-floating point value in memory format to the + * corresponding value in register format. The exponent + * needs to be remapped to preserve non-finite values + * (infinities, not-a-numbers, denormals). + */ +static inline unsigned long +s_mem_to_reg (unsigned long s_mem) +{ + unsigned long frac = (s_mem >> 0) & 0x7fffff; + unsigned long sign = (s_mem >> 31) & 0x1; + unsigned long exp_msb = (s_mem >> 30) & 0x1; + unsigned long exp_low = (s_mem >> 23) & 0x7f; + unsigned long exp; + + exp = (exp_msb << 10) | exp_low; /* common case */ + if (exp_msb) { + if (exp_low == 0x7f) { + exp = 0x7ff; + } + } else { + if (exp_low == 0x00) { + exp = 0x000; + } else { + exp |= (0x7 << 7); + } + } + return (sign << 63) | (exp << 52) | (frac << 29); +} + +/* + * Convert an s-floating point value in register format to the + * corresponding value in memory format. + */ +static inline unsigned long +s_reg_to_mem (unsigned long s_reg) +{ + return ((s_reg >> 62) << 30) | ((s_reg << 5) >> 34); +} + +/* + * Handle user-level unaligned fault. Handling user-level unaligned + * faults is *extremely* slow and produces nasty messages. A user + * program *should* fix unaligned faults ASAP. + * + * Notice that we have (almost) the regular kernel stack layout here, + * so finding the appropriate registers is a little more difficult + * than in the kernel case. + * + * Finally, we handle regular integer load/stores only. In + * particular, load-linked/store-conditionally and floating point + * load/stores are not supported. The former make no sense with + * unaligned faults (they are guaranteed to fail) and I don't think + * the latter will occur in any decent program. + * + * Sigh. We *do* have to handle some FP operations, because GCC will + * uses them as temporary storage for integer memory to memory copies. + * However, we need to deal with stt/ldt and sts/lds only. + */ + +#define OP_INT_MASK ( 1L << 0x28 | 1L << 0x2c /* ldl stl */ \ + | 1L << 0x29 | 1L << 0x2d /* ldq stq */ \ + | 1L << 0x0c | 1L << 0x0d /* ldwu stw */ \ + | 1L << 0x0a | 1L << 0x0e ) /* ldbu stb */ + +#define OP_WRITE_MASK ( 1L << 0x26 | 1L << 0x27 /* sts stt */ \ + | 1L << 0x2c | 1L << 0x2d /* stl stq */ \ + | 1L << 0x0d | 1L << 0x0e ) /* stw stb */ + +#define R(x) ((size_t) &((struct pt_regs *)0)->x) + +static int unauser_reg_offsets[32] = { + R(r0), R(r1), R(r2), R(r3), R(r4), R(r5), R(r6), R(r7), R(r8), + /* r9 ... r15 are stored in front of regs. */ + -56, -48, -40, -32, -24, -16, -8, + R(r16), R(r17), R(r18), + R(r19), R(r20), R(r21), R(r22), R(r23), R(r24), R(r25), R(r26), + R(r27), R(r28), R(gp), + 0, 0 +}; + +#undef R + +asmlinkage void +do_entUnaUser(void __user * va, unsigned long opcode, + unsigned long reg, struct pt_regs *regs) +{ + static DEFINE_RATELIMIT_STATE(ratelimit, 5 * HZ, 5); + + unsigned long tmp1, tmp2, tmp3, tmp4; + unsigned long fake_reg, *reg_addr = &fake_reg; + siginfo_t info; + long error; + + /* Check the UAC bits to decide what the user wants us to do + with the unaliged access. */ + + if (!test_thread_flag (TIF_UAC_NOPRINT)) { + if (__ratelimit(&ratelimit)) { + printk("%s(%d): unaligned trap at %016lx: %p %lx %ld\n", + current->comm, task_pid_nr(current), + regs->pc - 4, va, opcode, reg); + } + } + if (test_thread_flag (TIF_UAC_SIGBUS)) + goto give_sigbus; + /* Not sure why you'd want to use this, but... */ + if (test_thread_flag (TIF_UAC_NOFIX)) + return; + + /* Don't bother reading ds in the access check since we already + know that this came from the user. Also rely on the fact that + the page at TASK_SIZE is unmapped and so can't be touched anyway. */ + if (!__access_ok((unsigned long)va, 0, USER_DS)) + goto give_sigsegv; + + ++unaligned[1].count; + unaligned[1].va = (unsigned long)va; + unaligned[1].pc = regs->pc - 4; + + if ((1L << opcode) & OP_INT_MASK) { + /* it's an integer load/store */ + if (reg < 30) { + reg_addr = (unsigned long *) + ((char *)regs + unauser_reg_offsets[reg]); + } else if (reg == 30) { + /* usp in PAL regs */ + fake_reg = rdusp(); + } else { + /* zero "register" */ + fake_reg = 0; + } + } + + /* We don't want to use the generic get/put unaligned macros as + we want to trap exceptions. Only if we actually get an + exception will we decide whether we should have caught it. */ + + switch (opcode) { + case 0x0c: /* ldwu */ + __asm__ __volatile__( + "1: ldq_u %1,0(%3)\n" + "2: ldq_u %2,1(%3)\n" + " extwl %1,%3,%1\n" + " extwh %2,%3,%2\n" + "3:\n" + ".section __ex_table,\"a\"\n" + " .long 1b - .\n" + " lda %1,3b-1b(%0)\n" + " .long 2b - .\n" + " lda %2,3b-2b(%0)\n" + ".previous" + : "=r"(error), "=&r"(tmp1), "=&r"(tmp2) + : "r"(va), "0"(0)); + if (error) + goto give_sigsegv; + *reg_addr = tmp1|tmp2; + break; + + case 0x22: /* lds */ + __asm__ __volatile__( + "1: ldq_u %1,0(%3)\n" + "2: ldq_u %2,3(%3)\n" + " extll %1,%3,%1\n" + " extlh %2,%3,%2\n" + "3:\n" + ".section __ex_table,\"a\"\n" + " .long 1b - .\n" + " lda %1,3b-1b(%0)\n" + " .long 2b - .\n" + " lda %2,3b-2b(%0)\n" + ".previous" + : "=r"(error), "=&r"(tmp1), "=&r"(tmp2) + : "r"(va), "0"(0)); + if (error) + goto give_sigsegv; + alpha_write_fp_reg(reg, s_mem_to_reg((int)(tmp1|tmp2))); + return; + + case 0x23: /* ldt */ + __asm__ __volatile__( + "1: ldq_u %1,0(%3)\n" + "2: ldq_u %2,7(%3)\n" + " extql %1,%3,%1\n" + " extqh %2,%3,%2\n" + "3:\n" + ".section __ex_table,\"a\"\n" + " .long 1b - .\n" + " lda %1,3b-1b(%0)\n" + " .long 2b - .\n" + " lda %2,3b-2b(%0)\n" + ".previous" + : "=r"(error), "=&r"(tmp1), "=&r"(tmp2) + : "r"(va), "0"(0)); + if (error) + goto give_sigsegv; + alpha_write_fp_reg(reg, tmp1|tmp2); + return; + + case 0x28: /* ldl */ + __asm__ __volatile__( + "1: ldq_u %1,0(%3)\n" + "2: ldq_u %2,3(%3)\n" + " extll %1,%3,%1\n" + " extlh %2,%3,%2\n" + "3:\n" + ".section __ex_table,\"a\"\n" + " .long 1b - .\n" + " lda %1,3b-1b(%0)\n" + " .long 2b - .\n" + " lda %2,3b-2b(%0)\n" + ".previous" + : "=r"(error), "=&r"(tmp1), "=&r"(tmp2) + : "r"(va), "0"(0)); + if (error) + goto give_sigsegv; + *reg_addr = (int)(tmp1|tmp2); + break; + + case 0x29: /* ldq */ + __asm__ __volatile__( + "1: ldq_u %1,0(%3)\n" + "2: ldq_u %2,7(%3)\n" + " extql %1,%3,%1\n" + " extqh %2,%3,%2\n" + "3:\n" + ".section __ex_table,\"a\"\n" + " .long 1b - .\n" + " lda %1,3b-1b(%0)\n" + " .long 2b - .\n" + " lda %2,3b-2b(%0)\n" + ".previous" + : "=r"(error), "=&r"(tmp1), "=&r"(tmp2) + : "r"(va), "0"(0)); + if (error) + goto give_sigsegv; + *reg_addr = tmp1|tmp2; + break; + + /* Note that the store sequences do not indicate that they change + memory because it _should_ be affecting nothing in this context. + (Otherwise we have other, much larger, problems.) */ + case 0x0d: /* stw */ + __asm__ __volatile__( + "1: ldq_u %2,1(%5)\n" + "2: ldq_u %1,0(%5)\n" + " inswh %6,%5,%4\n" + " inswl %6,%5,%3\n" + " mskwh %2,%5,%2\n" + " mskwl %1,%5,%1\n" + " or %2,%4,%2\n" + " or %1,%3,%1\n" + "3: stq_u %2,1(%5)\n" + "4: stq_u %1,0(%5)\n" + "5:\n" + ".section __ex_table,\"a\"\n" + " .long 1b - .\n" + " lda %2,5b-1b(%0)\n" + " .long 2b - .\n" + " lda %1,5b-2b(%0)\n" + " .long 3b - .\n" + " lda $31,5b-3b(%0)\n" + " .long 4b - .\n" + " lda $31,5b-4b(%0)\n" + ".previous" + : "=r"(error), "=&r"(tmp1), "=&r"(tmp2), + "=&r"(tmp3), "=&r"(tmp4) + : "r"(va), "r"(*reg_addr), "0"(0)); + if (error) + goto give_sigsegv; + return; + + case 0x26: /* sts */ + fake_reg = s_reg_to_mem(alpha_read_fp_reg(reg)); + /* FALLTHRU */ + + case 0x2c: /* stl */ + __asm__ __volatile__( + "1: ldq_u %2,3(%5)\n" + "2: ldq_u %1,0(%5)\n" + " inslh %6,%5,%4\n" + " insll %6,%5,%3\n" + " msklh %2,%5,%2\n" + " mskll %1,%5,%1\n" + " or %2,%4,%2\n" + " or %1,%3,%1\n" + "3: stq_u %2,3(%5)\n" + "4: stq_u %1,0(%5)\n" + "5:\n" + ".section __ex_table,\"a\"\n" + " .long 1b - .\n" + " lda %2,5b-1b(%0)\n" + " .long 2b - .\n" + " lda %1,5b-2b(%0)\n" + " .long 3b - .\n" + " lda $31,5b-3b(%0)\n" + " .long 4b - .\n" + " lda $31,5b-4b(%0)\n" + ".previous" + : "=r"(error), "=&r"(tmp1), "=&r"(tmp2), + "=&r"(tmp3), "=&r"(tmp4) + : "r"(va), "r"(*reg_addr), "0"(0)); + if (error) + goto give_sigsegv; + return; + + case 0x27: /* stt */ + fake_reg = alpha_read_fp_reg(reg); + /* FALLTHRU */ + + case 0x2d: /* stq */ + __asm__ __volatile__( + "1: ldq_u %2,7(%5)\n" + "2: ldq_u %1,0(%5)\n" + " insqh %6,%5,%4\n" + " insql %6,%5,%3\n" + " mskqh %2,%5,%2\n" + " mskql %1,%5,%1\n" + " or %2,%4,%2\n" + " or %1,%3,%1\n" + "3: stq_u %2,7(%5)\n" + "4: stq_u %1,0(%5)\n" + "5:\n" + ".section __ex_table,\"a\"\n\t" + " .long 1b - .\n" + " lda %2,5b-1b(%0)\n" + " .long 2b - .\n" + " lda %1,5b-2b(%0)\n" + " .long 3b - .\n" + " lda $31,5b-3b(%0)\n" + " .long 4b - .\n" + " lda $31,5b-4b(%0)\n" + ".previous" + : "=r"(error), "=&r"(tmp1), "=&r"(tmp2), + "=&r"(tmp3), "=&r"(tmp4) + : "r"(va), "r"(*reg_addr), "0"(0)); + if (error) + goto give_sigsegv; + return; + + default: + /* What instruction were you trying to use, exactly? */ + goto give_sigbus; + } + + /* Only integer loads should get here; everyone else returns early. */ + if (reg == 30) + wrusp(fake_reg); + return; + +give_sigsegv: + regs->pc -= 4; /* make pc point to faulting insn */ + info.si_signo = SIGSEGV; + info.si_errno = 0; + + /* We need to replicate some of the logic in mm/fault.c, + since we don't have access to the fault code in the + exception handling return path. */ + if (!__access_ok((unsigned long)va, 0, USER_DS)) + info.si_code = SEGV_ACCERR; + else { + struct mm_struct *mm = current->mm; + down_read(&mm->mmap_sem); + if (find_vma(mm, (unsigned long)va)) + info.si_code = SEGV_ACCERR; + else + info.si_code = SEGV_MAPERR; + up_read(&mm->mmap_sem); + } + info.si_addr = va; + send_sig_info(SIGSEGV, &info, current); + return; + +give_sigbus: + regs->pc -= 4; + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRALN; + info.si_addr = va; + send_sig_info(SIGBUS, &info, current); + return; +} + +void __cpuinit +trap_init(void) +{ + /* Tell PAL-code what global pointer we want in the kernel. */ + register unsigned long gptr __asm__("$29"); + wrkgp(gptr); + + /* Hack for Multia (UDB) and JENSEN: some of their SRMs have + a bug in the handling of the opDEC fault. Fix it up if so. */ + if (implver() == IMPLVER_EV4) + opDEC_check(); + + wrent(entArith, 1); + wrent(entMM, 2); + wrent(entIF, 3); + wrent(entUna, 4); + wrent(entSys, 5); + wrent(entDbg, 6); +} diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S new file mode 100644 index 00000000..647b84c1 --- /dev/null +++ b/arch/alpha/kernel/vmlinux.lds.S @@ -0,0 +1,77 @@ +#include <asm-generic/vmlinux.lds.h> +#include <asm/thread_info.h> +#include <asm/cache.h> +#include <asm/page.h> +#include <asm/setup.h> + +OUTPUT_FORMAT("elf64-alpha") +OUTPUT_ARCH(alpha) +ENTRY(__start) +PHDRS { kernel PT_LOAD; note PT_NOTE; } +jiffies = jiffies_64; +SECTIONS +{ +#ifdef CONFIG_ALPHA_LEGACY_START_ADDRESS + . = 0xfffffc0000310000; +#else + . = 0xfffffc0001010000; +#endif + + _text = .; /* Text and read-only data */ + .text : { + HEAD_TEXT + TEXT_TEXT + SCHED_TEXT + LOCK_TEXT + *(.fixup) + *(.gnu.warning) + } :kernel + swapper_pg_dir = SWAPPER_PGD; + _etext = .; /* End of text section */ + + NOTES :kernel :note + .dummy : { + *(.dummy) + } :kernel + + RODATA + EXCEPTION_TABLE(16) + + /* Will be freed after init */ + __init_begin = ALIGN(PAGE_SIZE); + INIT_TEXT_SECTION(PAGE_SIZE) + INIT_DATA_SECTION(16) + PERCPU_SECTION(L1_CACHE_BYTES) + /* Align to THREAD_SIZE rather than PAGE_SIZE here so any padding page + needed for the THREAD_SIZE aligned init_task gets freed after init */ + . = ALIGN(THREAD_SIZE); + __init_end = .; + /* Freed after init ends here */ + + _sdata = .; /* Start of rw data section */ + _data = .; + RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) + + .got : { + *(.got) + } + .sdata : { + *(.sdata) + } + _edata = .; /* End of data section */ + + BSS_SECTION(0, 0, 0) + _end = .; + + .mdebug 0 : { + *(.mdebug) + } + .note 0 : { + *(.note) + } + + STABS_DEBUG + DWARF_DEBUG + + DISCARDS +} diff --git a/arch/alpha/lib/Makefile b/arch/alpha/lib/Makefile new file mode 100644 index 00000000..c0a83ab6 --- /dev/null +++ b/arch/alpha/lib/Makefile @@ -0,0 +1,58 @@ +# +# Makefile for alpha-specific library files.. +# + +asflags-y := $(KBUILD_CFLAGS) +ccflags-y := -Werror + +# Many of these routines have implementations tuned for ev6. +# Choose them iff we're targeting ev6 specifically. +ev6-$(CONFIG_ALPHA_EV6) := ev6- + +# Several make use of the cttz instruction introduced in ev67. +ev67-$(CONFIG_ALPHA_EV67) := ev67- + +lib-y = __divqu.o __remqu.o __divlu.o __remlu.o \ + udelay.o \ + $(ev6-y)memset.o \ + $(ev6-y)memcpy.o \ + memmove.o \ + checksum.o \ + csum_partial_copy.o \ + $(ev67-y)strlen.o \ + $(ev67-y)strcat.o \ + strcpy.o \ + $(ev67-y)strncat.o \ + strncpy.o \ + $(ev6-y)stxcpy.o \ + $(ev6-y)stxncpy.o \ + $(ev67-y)strchr.o \ + $(ev67-y)strrchr.o \ + $(ev6-y)memchr.o \ + $(ev6-y)copy_user.o \ + $(ev6-y)clear_user.o \ + $(ev6-y)strncpy_from_user.o \ + $(ev67-y)strlen_user.o \ + $(ev6-y)csum_ipv6_magic.o \ + $(ev6-y)clear_page.o \ + $(ev6-y)copy_page.o \ + fpreg.o \ + callback_srm.o srm_puts.o srm_printk.o \ + fls.o + +lib-$(CONFIG_SMP) += dec_and_lock.o + +# The division routines are built from single source, with different defines. +AFLAGS___divqu.o = -DDIV +AFLAGS___remqu.o = -DREM +AFLAGS___divlu.o = -DDIV -DINTSIZE +AFLAGS___remlu.o = -DREM -DINTSIZE + +$(obj)/__divqu.o: $(obj)/$(ev6-y)divide.S + $(cmd_as_o_S) +$(obj)/__remqu.o: $(obj)/$(ev6-y)divide.S + $(cmd_as_o_S) +$(obj)/__divlu.o: $(obj)/$(ev6-y)divide.S + $(cmd_as_o_S) +$(obj)/__remlu.o: $(obj)/$(ev6-y)divide.S + $(cmd_as_o_S) diff --git a/arch/alpha/lib/callback_srm.S b/arch/alpha/lib/callback_srm.S new file mode 100644 index 00000000..8804bec2 --- /dev/null +++ b/arch/alpha/lib/callback_srm.S @@ -0,0 +1,103 @@ +/* + * arch/alpha/lib/callback_srm.S + */ + +#include <asm/console.h> + +.text +#define HWRPB_CRB_OFFSET 0xc0 + +#if defined(CONFIG_ALPHA_SRM) || defined(CONFIG_ALPHA_GENERIC) +.align 4 +srm_dispatch: +#if defined(CONFIG_ALPHA_GENERIC) + ldl $4,alpha_using_srm + beq $4,nosrm +#endif + ldq $0,hwrpb # gp is set up by CALLBACK macro. + ldl $25,0($25) # Pick up the wrapper data. + mov $20,$21 # Shift arguments right. + mov $19,$20 + ldq $1,HWRPB_CRB_OFFSET($0) + mov $18,$19 + mov $17,$18 + mov $16,$17 + addq $0,$1,$2 # CRB address + ldq $27,0($2) # DISPATCH procedure descriptor (VMS call std) + extwl $25,0,$16 # SRM callback function code + ldq $3,8($27) # call address + extwl $25,2,$25 # argument information (VMS calling std) + jmp ($3) # Return directly to caller of wrapper. + +.align 4 +.globl srm_fixup +.ent srm_fixup +srm_fixup: + ldgp $29,0($27) +#if defined(CONFIG_ALPHA_GENERIC) + ldl $4,alpha_using_srm + beq $4,nosrm +#endif + ldq $0,hwrpb + ldq $1,HWRPB_CRB_OFFSET($0) + addq $0,$1,$2 # CRB address + ldq $27,16($2) # VA of FIXUP procedure descriptor + ldq $3,8($27) # call address + lda $25,2($31) # two integer arguments + jmp ($3) # Return directly to caller of srm_fixup. +.end srm_fixup + +#if defined(CONFIG_ALPHA_GENERIC) +.align 3 +nosrm: + lda $0,-1($31) + ret +#endif + +#define CALLBACK(NAME, CODE, ARG_CNT) \ +.align 4; .globl callback_##NAME; .ent callback_##NAME; callback_##NAME##: \ +ldgp $29,0($27); br $25,srm_dispatch; .word CODE, ARG_CNT; .end callback_##NAME + +#else /* defined(CONFIG_ALPHA_SRM) || defined(CONFIG_ALPHA_GENERIC) */ + +#define CALLBACK(NAME, CODE, ARG_CNT) \ +.align 3; .globl callback_##NAME; .ent callback_##NAME; callback_##NAME##: \ +lda $0,-1($31); ret; .end callback_##NAME + +.align 3 +.globl srm_fixup +.ent srm_fixup +srm_fixup: + lda $0,-1($31) + ret +.end srm_fixup +#endif /* defined(CONFIG_ALPHA_SRM) || defined(CONFIG_ALPHA_GENERIC) */ + +CALLBACK(puts, CCB_PUTS, 4) +CALLBACK(open, CCB_OPEN, 3) +CALLBACK(close, CCB_CLOSE, 2) +CALLBACK(read, CCB_READ, 5) +CALLBACK(open_console, CCB_OPEN_CONSOLE, 1) +CALLBACK(close_console, CCB_CLOSE_CONSOLE, 1) +CALLBACK(getenv, CCB_GET_ENV, 4) +CALLBACK(setenv, CCB_SET_ENV, 4) +CALLBACK(getc, CCB_GETC, 2) +CALLBACK(reset_term, CCB_RESET_TERM, 2) +CALLBACK(term_int, CCB_SET_TERM_INT, 3) +CALLBACK(term_ctl, CCB_SET_TERM_CTL, 3) +CALLBACK(process_keycode, CCB_PROCESS_KEYCODE, 3) +CALLBACK(ioctl, CCB_IOCTL, 6) +CALLBACK(write, CCB_WRITE, 5) +CALLBACK(reset_env, CCB_RESET_ENV, 4) +CALLBACK(save_env, CCB_SAVE_ENV, 1) +CALLBACK(pswitch, CCB_PSWITCH, 3) +CALLBACK(bios_emul, CCB_BIOS_EMUL, 5) + +.data +__alpha_using_srm: # For use by bootpheader + .long 7 # value is not 1 for link debugging + .weak alpha_using_srm; alpha_using_srm = __alpha_using_srm +__callback_init_done: # For use by bootpheader + .long 7 # value is not 1 for link debugging + .weak callback_init_done; callback_init_done = __callback_init_done + diff --git a/arch/alpha/lib/checksum.c b/arch/alpha/lib/checksum.c new file mode 100644 index 00000000..199f6efa --- /dev/null +++ b/arch/alpha/lib/checksum.c @@ -0,0 +1,184 @@ +/* + * arch/alpha/lib/checksum.c + * + * This file contains network checksum routines that are better done + * in an architecture-specific manner due to speed.. + * Comments in other versions indicate that the algorithms are from RFC1071 + * + * accelerated versions (and 21264 assembly versions ) contributed by + * Rick Gorton <rick.gorton@alpha-processor.com> + */ + +#include <linux/module.h> +#include <linux/string.h> + +#include <asm/byteorder.h> + +static inline unsigned short from64to16(unsigned long x) +{ + /* Using extract instructions is a bit more efficient + than the original shift/bitmask version. */ + + union { + unsigned long ul; + unsigned int ui[2]; + unsigned short us[4]; + } in_v, tmp_v, out_v; + + in_v.ul = x; + tmp_v.ul = (unsigned long) in_v.ui[0] + (unsigned long) in_v.ui[1]; + + /* Since the bits of tmp_v.sh[3] are going to always be zero, + we don't have to bother to add that in. */ + out_v.ul = (unsigned long) tmp_v.us[0] + (unsigned long) tmp_v.us[1] + + (unsigned long) tmp_v.us[2]; + + /* Similarly, out_v.us[2] is always zero for the final add. */ + return out_v.us[0] + out_v.us[1]; +} + +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented. + */ +__sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, + unsigned short len, + unsigned short proto, + __wsum sum) +{ + return (__force __sum16)~from64to16( + (__force u64)saddr + (__force u64)daddr + + (__force u64)sum + ((len + proto) << 8)); +} + +__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, + unsigned short len, + unsigned short proto, + __wsum sum) +{ + unsigned long result; + + result = (__force u64)saddr + (__force u64)daddr + + (__force u64)sum + ((len + proto) << 8); + + /* Fold down to 32-bits so we don't lose in the typedef-less + network stack. */ + /* 64 to 33 */ + result = (result & 0xffffffff) + (result >> 32); + /* 33 to 32 */ + result = (result & 0xffffffff) + (result >> 32); + return (__force __wsum)result; +} +EXPORT_SYMBOL(csum_tcpudp_nofold); + +/* + * Do a 64-bit checksum on an arbitrary memory area.. + * + * This isn't a great routine, but it's not _horrible_ either. The + * inner loop could be unrolled a bit further, and there are better + * ways to do the carry, but this is reasonable. + */ +static inline unsigned long do_csum(const unsigned char * buff, int len) +{ + int odd, count; + unsigned long result = 0; + + if (len <= 0) + goto out; + odd = 1 & (unsigned long) buff; + if (odd) { + result = *buff << 8; + len--; + buff++; + } + count = len >> 1; /* nr of 16-bit words.. */ + if (count) { + if (2 & (unsigned long) buff) { + result += *(unsigned short *) buff; + count--; + len -= 2; + buff += 2; + } + count >>= 1; /* nr of 32-bit words.. */ + if (count) { + if (4 & (unsigned long) buff) { + result += *(unsigned int *) buff; + count--; + len -= 4; + buff += 4; + } + count >>= 1; /* nr of 64-bit words.. */ + if (count) { + unsigned long carry = 0; + do { + unsigned long w = *(unsigned long *) buff; + count--; + buff += 8; + result += carry; + result += w; + carry = (w > result); + } while (count); + result += carry; + result = (result & 0xffffffff) + (result >> 32); + } + if (len & 4) { + result += *(unsigned int *) buff; + buff += 4; + } + } + if (len & 2) { + result += *(unsigned short *) buff; + buff += 2; + } + } + if (len & 1) + result += *buff; + result = from64to16(result); + if (odd) + result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); +out: + return result; +} + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + */ +__sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + return (__force __sum16)~do_csum(iph,ihl*4); +} + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +__wsum csum_partial(const void *buff, int len, __wsum sum) +{ + unsigned long result = do_csum(buff, len); + + /* add in old sum, and carry.. */ + result += (__force u32)sum; + /* 32+c bits -> 32 bits */ + result = (result & 0xffffffff) + (result >> 32); + return (__force __wsum)result; +} + +EXPORT_SYMBOL(csum_partial); + +/* + * this routine is used for miscellaneous IP-like checksums, mainly + * in icmp.c + */ +__sum16 ip_compute_csum(const void *buff, int len) +{ + return (__force __sum16)~from64to16(do_csum(buff,len)); +} diff --git a/arch/alpha/lib/clear_page.S b/arch/alpha/lib/clear_page.S new file mode 100644 index 00000000..a221ae26 --- /dev/null +++ b/arch/alpha/lib/clear_page.S @@ -0,0 +1,39 @@ +/* + * arch/alpha/lib/clear_page.S + * + * Zero an entire page. + */ + + .text + .align 4 + .global clear_page + .ent clear_page +clear_page: + .prologue 0 + + lda $0,128 + nop + unop + nop + +1: stq $31,0($16) + stq $31,8($16) + stq $31,16($16) + stq $31,24($16) + + stq $31,32($16) + stq $31,40($16) + stq $31,48($16) + subq $0,1,$0 + + stq $31,56($16) + addq $16,64,$16 + unop + bne $0,1b + + ret + nop + unop + nop + + .end clear_page diff --git a/arch/alpha/lib/clear_user.S b/arch/alpha/lib/clear_user.S new file mode 100644 index 00000000..8860316c --- /dev/null +++ b/arch/alpha/lib/clear_user.S @@ -0,0 +1,113 @@ +/* + * arch/alpha/lib/clear_user.S + * Contributed by Richard Henderson <rth@tamu.edu> + * + * Zero user space, handling exceptions as we go. + * + * We have to make sure that $0 is always up-to-date and contains the + * right "bytes left to zero" value (and that it is updated only _after_ + * a successful copy). There is also some rather minor exception setup + * stuff. + * + * NOTE! This is not directly C-callable, because the calling semantics + * are different: + * + * Inputs: + * length in $0 + * destination address in $6 + * exception pointer in $7 + * return address in $28 (exceptions expect it there) + * + * Outputs: + * bytes left to copy in $0 + * + * Clobbers: + * $1,$2,$3,$4,$5,$6 + */ + +/* Allow an exception for an insn; exit if we get one. */ +#define EX(x,y...) \ + 99: x,##y; \ + .section __ex_table,"a"; \ + .long 99b - .; \ + lda $31, $exception-99b($31); \ + .previous + + .set noat + .set noreorder + .align 4 + + .globl __do_clear_user + .ent __do_clear_user + .frame $30, 0, $28 + .prologue 0 + +$loop: + and $1, 3, $4 # e0 : + beq $4, 1f # .. e1 : + +0: EX( stq_u $31, 0($6) ) # e0 : zero one word + subq $0, 8, $0 # .. e1 : + subq $4, 1, $4 # e0 : + addq $6, 8, $6 # .. e1 : + bne $4, 0b # e1 : + unop # : + +1: bic $1, 3, $1 # e0 : + beq $1, $tail # .. e1 : + +2: EX( stq_u $31, 0($6) ) # e0 : zero four words + subq $0, 8, $0 # .. e1 : + EX( stq_u $31, 8($6) ) # e0 : + subq $0, 8, $0 # .. e1 : + EX( stq_u $31, 16($6) ) # e0 : + subq $0, 8, $0 # .. e1 : + EX( stq_u $31, 24($6) ) # e0 : + subq $0, 8, $0 # .. e1 : + subq $1, 4, $1 # e0 : + addq $6, 32, $6 # .. e1 : + bne $1, 2b # e1 : + +$tail: + bne $2, 1f # e1 : is there a tail to do? + ret $31, ($28), 1 # .. e1 : + +1: EX( ldq_u $5, 0($6) ) # e0 : + clr $0 # .. e1 : + nop # e1 : + mskqh $5, $0, $5 # e0 : + EX( stq_u $5, 0($6) ) # e0 : + ret $31, ($28), 1 # .. e1 : + +__do_clear_user: + and $6, 7, $4 # e0 : find dest misalignment + beq $0, $zerolength # .. e1 : + addq $0, $4, $1 # e0 : bias counter + and $1, 7, $2 # e1 : number of bytes in tail + srl $1, 3, $1 # e0 : + beq $4, $loop # .. e1 : + + EX( ldq_u $5, 0($6) ) # e0 : load dst word to mask back in + beq $1, $oneword # .. e1 : sub-word store? + + mskql $5, $6, $5 # e0 : take care of misaligned head + addq $6, 8, $6 # .. e1 : + EX( stq_u $5, -8($6) ) # e0 : + addq $0, $4, $0 # .. e1 : bytes left -= 8 - misalignment + subq $1, 1, $1 # e0 : + subq $0, 8, $0 # .. e1 : + br $loop # e1 : + unop # : + +$oneword: + mskql $5, $6, $4 # e0 : + mskqh $5, $2, $5 # e0 : + or $5, $4, $5 # e1 : + EX( stq_u $5, 0($6) ) # e0 : + clr $0 # .. e1 : + +$zerolength: +$exception: + ret $31, ($28), 1 # .. e1 : + + .end __do_clear_user diff --git a/arch/alpha/lib/copy_page.S b/arch/alpha/lib/copy_page.S new file mode 100644 index 00000000..9f3b9745 --- /dev/null +++ b/arch/alpha/lib/copy_page.S @@ -0,0 +1,49 @@ +/* + * arch/alpha/lib/copy_page.S + * + * Copy an entire page. + */ + + .text + .align 4 + .global copy_page + .ent copy_page +copy_page: + .prologue 0 + + lda $18,128 + nop + unop + nop + +1: ldq $0,0($17) + ldq $1,8($17) + ldq $2,16($17) + ldq $3,24($17) + + ldq $4,32($17) + ldq $5,40($17) + ldq $6,48($17) + ldq $7,56($17) + + stq $0,0($16) + subq $18,1,$18 + stq $1,8($16) + addq $17,64,$17 + + stq $2,16($16) + stq $3,24($16) + stq $4,32($16) + stq $5,40($16) + + stq $6,48($16) + stq $7,56($16) + addq $16,64,$16 + bne $18, 1b + + ret + nop + unop + nop + + .end copy_page diff --git a/arch/alpha/lib/copy_user.S b/arch/alpha/lib/copy_user.S new file mode 100644 index 00000000..6f3fab9e --- /dev/null +++ b/arch/alpha/lib/copy_user.S @@ -0,0 +1,145 @@ +/* + * arch/alpha/lib/copy_user.S + * + * Copy to/from user space, handling exceptions as we go.. This + * isn't exactly pretty. + * + * This is essentially the same as "memcpy()", but with a few twists. + * Notably, we have to make sure that $0 is always up-to-date and + * contains the right "bytes left to copy" value (and that it is updated + * only _after_ a successful copy). There is also some rather minor + * exception setup stuff.. + * + * NOTE! This is not directly C-callable, because the calling semantics are + * different: + * + * Inputs: + * length in $0 + * destination address in $6 + * source address in $7 + * return address in $28 + * + * Outputs: + * bytes left to copy in $0 + * + * Clobbers: + * $1,$2,$3,$4,$5,$6,$7 + */ + +/* Allow an exception for an insn; exit if we get one. */ +#define EXI(x,y...) \ + 99: x,##y; \ + .section __ex_table,"a"; \ + .long 99b - .; \ + lda $31, $exitin-99b($31); \ + .previous + +#define EXO(x,y...) \ + 99: x,##y; \ + .section __ex_table,"a"; \ + .long 99b - .; \ + lda $31, $exitout-99b($31); \ + .previous + + .set noat + .align 4 + .globl __copy_user + .ent __copy_user +__copy_user: + .prologue 0 + and $6,7,$3 + beq $0,$35 + beq $3,$36 + subq $3,8,$3 + .align 4 +$37: + EXI( ldq_u $1,0($7) ) + EXO( ldq_u $2,0($6) ) + extbl $1,$7,$1 + mskbl $2,$6,$2 + insbl $1,$6,$1 + addq $3,1,$3 + bis $1,$2,$1 + EXO( stq_u $1,0($6) ) + subq $0,1,$0 + addq $6,1,$6 + addq $7,1,$7 + beq $0,$41 + bne $3,$37 +$36: + and $7,7,$1 + bic $0,7,$4 + beq $1,$43 + beq $4,$48 + EXI( ldq_u $3,0($7) ) + .align 4 +$50: + EXI( ldq_u $2,8($7) ) + subq $4,8,$4 + extql $3,$7,$3 + extqh $2,$7,$1 + bis $3,$1,$1 + EXO( stq $1,0($6) ) + addq $7,8,$7 + subq $0,8,$0 + addq $6,8,$6 + bis $2,$2,$3 + bne $4,$50 +$48: + beq $0,$41 + .align 4 +$57: + EXI( ldq_u $1,0($7) ) + EXO( ldq_u $2,0($6) ) + extbl $1,$7,$1 + mskbl $2,$6,$2 + insbl $1,$6,$1 + bis $1,$2,$1 + EXO( stq_u $1,0($6) ) + subq $0,1,$0 + addq $6,1,$6 + addq $7,1,$7 + bne $0,$57 + br $31,$41 + .align 4 +$43: + beq $4,$65 + .align 4 +$66: + EXI( ldq $1,0($7) ) + subq $4,8,$4 + EXO( stq $1,0($6) ) + addq $7,8,$7 + subq $0,8,$0 + addq $6,8,$6 + bne $4,$66 +$65: + beq $0,$41 + EXI( ldq $2,0($7) ) + EXO( ldq $1,0($6) ) + mskql $2,$0,$2 + mskqh $1,$0,$1 + bis $2,$1,$2 + EXO( stq $2,0($6) ) + bis $31,$31,$0 +$41: +$35: +$exitout: + ret $31,($28),1 + +$exitin: + /* A stupid byte-by-byte zeroing of the rest of the output + buffer. This cures security holes by never leaving + random kernel data around to be copied elsewhere. */ + + mov $0,$1 +$101: + EXO ( ldq_u $2,0($6) ) + subq $1,1,$1 + mskbl $2,$6,$2 + EXO ( stq_u $2,0($6) ) + addq $6,1,$6 + bgt $1,$101 + ret $31,($28),1 + + .end __copy_user diff --git a/arch/alpha/lib/csum_ipv6_magic.S b/arch/alpha/lib/csum_ipv6_magic.S new file mode 100644 index 00000000..2c2acb96 --- /dev/null +++ b/arch/alpha/lib/csum_ipv6_magic.S @@ -0,0 +1,115 @@ +/* + * arch/alpha/lib/csum_ipv6_magic.S + * Contributed by Richard Henderson <rth@tamu.edu> + * + * unsigned short csum_ipv6_magic(struct in6_addr *saddr, + * struct in6_addr *daddr, + * __u32 len, + * unsigned short proto, + * unsigned int csum); + * + * Misalignment handling (which costs 16 instructions / 8 cycles) + * added by Ivan Kokshaysky <ink@jurassic.park.msu.ru> + */ + + .globl csum_ipv6_magic + .align 4 + .ent csum_ipv6_magic + .frame $30,0,$26,0 +csum_ipv6_magic: + .prologue 0 + + ldq_u $0,0($16) # e0 : load src & dst addr words + zapnot $20,15,$20 # .. e1 : zero extend incoming csum + extqh $18,1,$4 # e0 : byte swap len & proto while we wait + ldq_u $21,7($16) # .. e1 : handle misalignment + + extbl $18,1,$5 # e0 : + ldq_u $1,8($16) # .. e1 : + extbl $18,2,$6 # e0 : + ldq_u $22,15($16) # .. e1 : + + extbl $18,3,$18 # e0 : + ldq_u $2,0($17) # .. e1 : + sra $4,32,$4 # e0 : + ldq_u $23,7($17) # .. e1 : + + extql $0,$16,$0 # e0 : + ldq_u $3,8($17) # .. e1 : + extqh $21,$16,$21 # e0 : + ldq_u $24,15($17) # .. e1 : + + sll $5,16,$5 # e0 : + or $0,$21,$0 # .. e1 : 1st src word complete + extql $1,$16,$1 # e0 : + addq $20,$0,$20 # .. e1 : begin summing the words + + extqh $22,$16,$22 # e0 : + cmpult $20,$0,$0 # .. e1 : + sll $6,8,$6 # e0 : + or $1,$22,$1 # .. e1 : 2nd src word complete + + extql $2,$17,$2 # e0 : + or $4,$18,$18 # .. e1 : + extqh $23,$17,$23 # e0 : + or $5,$6,$5 # .. e1 : + + extql $3,$17,$3 # e0 : + or $2,$23,$2 # .. e1 : 1st dst word complete + extqh $24,$17,$24 # e0 : + or $18,$5,$18 # .. e1 : len complete + + extwh $19,7,$7 # e0 : + or $3,$24,$3 # .. e1 : 2nd dst word complete + extbl $19,1,$19 # e0 : + addq $20,$1,$20 # .. e1 : + + or $19,$7,$19 # e0 : + cmpult $20,$1,$1 # .. e1 : + sll $19,48,$19 # e0 : + nop # .. e0 : + + sra $19,32,$19 # e0 : proto complete + addq $20,$2,$20 # .. e1 : + cmpult $20,$2,$2 # e0 : + addq $20,$3,$20 # .. e1 : + + cmpult $20,$3,$3 # e0 : + addq $20,$18,$20 # .. e1 : + cmpult $20,$18,$18 # e0 : + addq $20,$19,$20 # .. e1 : + + cmpult $20,$19,$19 # e0 : + addq $0,$1,$0 # .. e1 : merge the carries back into the csum + addq $2,$3,$2 # e0 : + addq $18,$19,$18 # .. e1 : + + addq $0,$2,$0 # e0 : + addq $20,$18,$20 # .. e1 : + addq $0,$20,$0 # e0 : + unop # : + + extwl $0,2,$2 # e0 : begin folding the 64-bit value + zapnot $0,3,$3 # .. e1 : + extwl $0,4,$1 # e0 : + addq $2,$3,$3 # .. e1 : + + extwl $0,6,$0 # e0 : + addq $3,$1,$3 # .. e1 : + addq $0,$3,$0 # e0 : + unop # : + + extwl $0,2,$1 # e0 : fold 18-bit value + zapnot $0,3,$0 # .. e1 : + addq $0,$1,$0 # e0 : + unop # : + + extwl $0,2,$1 # e0 : fold 17-bit value + zapnot $0,3,$0 # .. e1 : + addq $0,$1,$0 # e0 : + not $0,$0 # .. e1 : and complement. + + zapnot $0,3,$0 # e0 : + ret # .. e1 : + + .end csum_ipv6_magic diff --git a/arch/alpha/lib/csum_partial_copy.c b/arch/alpha/lib/csum_partial_copy.c new file mode 100644 index 00000000..40736da9 --- /dev/null +++ b/arch/alpha/lib/csum_partial_copy.c @@ -0,0 +1,378 @@ +/* + * csum_partial_copy - do IP checksumming and copy + * + * (C) Copyright 1996 Linus Torvalds + * accelerated versions (and 21264 assembly versions ) contributed by + * Rick Gorton <rick.gorton@alpha-processor.com> + * + * Don't look at this too closely - you'll go mad. The things + * we do for performance.. + */ + +#include <linux/types.h> +#include <linux/string.h> +#include <asm/uaccess.h> + + +#define ldq_u(x,y) \ +__asm__ __volatile__("ldq_u %0,%1":"=r" (x):"m" (*(const unsigned long *)(y))) + +#define stq_u(x,y) \ +__asm__ __volatile__("stq_u %1,%0":"=m" (*(unsigned long *)(y)):"r" (x)) + +#define extql(x,y,z) \ +__asm__ __volatile__("extql %1,%2,%0":"=r" (z):"r" (x),"r" (y)) + +#define extqh(x,y,z) \ +__asm__ __volatile__("extqh %1,%2,%0":"=r" (z):"r" (x),"r" (y)) + +#define mskql(x,y,z) \ +__asm__ __volatile__("mskql %1,%2,%0":"=r" (z):"r" (x),"r" (y)) + +#define mskqh(x,y,z) \ +__asm__ __volatile__("mskqh %1,%2,%0":"=r" (z):"r" (x),"r" (y)) + +#define insql(x,y,z) \ +__asm__ __volatile__("insql %1,%2,%0":"=r" (z):"r" (x),"r" (y)) + +#define insqh(x,y,z) \ +__asm__ __volatile__("insqh %1,%2,%0":"=r" (z):"r" (x),"r" (y)) + + +#define __get_user_u(x,ptr) \ +({ \ + long __guu_err; \ + __asm__ __volatile__( \ + "1: ldq_u %0,%2\n" \ + "2:\n" \ + ".section __ex_table,\"a\"\n" \ + " .long 1b - .\n" \ + " lda %0,2b-1b(%1)\n" \ + ".previous" \ + : "=r"(x), "=r"(__guu_err) \ + : "m"(__m(ptr)), "1"(0)); \ + __guu_err; \ +}) + +#define __put_user_u(x,ptr) \ +({ \ + long __puu_err; \ + __asm__ __volatile__( \ + "1: stq_u %2,%1\n" \ + "2:\n" \ + ".section __ex_table,\"a\"\n" \ + " .long 1b - ." \ + " lda $31,2b-1b(%0)\n" \ + ".previous" \ + : "=r"(__puu_err) \ + : "m"(__m(addr)), "rJ"(x), "0"(0)); \ + __puu_err; \ +}) + + +static inline unsigned short from64to16(unsigned long x) +{ + /* Using extract instructions is a bit more efficient + than the original shift/bitmask version. */ + + union { + unsigned long ul; + unsigned int ui[2]; + unsigned short us[4]; + } in_v, tmp_v, out_v; + + in_v.ul = x; + tmp_v.ul = (unsigned long) in_v.ui[0] + (unsigned long) in_v.ui[1]; + + /* Since the bits of tmp_v.sh[3] are going to always be zero, + we don't have to bother to add that in. */ + out_v.ul = (unsigned long) tmp_v.us[0] + (unsigned long) tmp_v.us[1] + + (unsigned long) tmp_v.us[2]; + + /* Similarly, out_v.us[2] is always zero for the final add. */ + return out_v.us[0] + out_v.us[1]; +} + + + +/* + * Ok. This isn't fun, but this is the EASY case. + */ +static inline unsigned long +csum_partial_cfu_aligned(const unsigned long __user *src, unsigned long *dst, + long len, unsigned long checksum, + int *errp) +{ + unsigned long carry = 0; + int err = 0; + + while (len >= 0) { + unsigned long word; + err |= __get_user(word, src); + checksum += carry; + src++; + checksum += word; + len -= 8; + carry = checksum < word; + *dst = word; + dst++; + } + len += 8; + checksum += carry; + if (len) { + unsigned long word, tmp; + err |= __get_user(word, src); + tmp = *dst; + mskql(word, len, word); + checksum += word; + mskqh(tmp, len, tmp); + carry = checksum < word; + *dst = word | tmp; + checksum += carry; + } + if (err) *errp = err; + return checksum; +} + +/* + * This is even less fun, but this is still reasonably + * easy. + */ +static inline unsigned long +csum_partial_cfu_dest_aligned(const unsigned long __user *src, + unsigned long *dst, + unsigned long soff, + long len, unsigned long checksum, + int *errp) +{ + unsigned long first; + unsigned long word, carry; + unsigned long lastsrc = 7+len+(unsigned long)src; + int err = 0; + + err |= __get_user_u(first,src); + carry = 0; + while (len >= 0) { + unsigned long second; + + err |= __get_user_u(second, src+1); + extql(first, soff, word); + len -= 8; + src++; + extqh(second, soff, first); + checksum += carry; + word |= first; + first = second; + checksum += word; + *dst = word; + dst++; + carry = checksum < word; + } + len += 8; + checksum += carry; + if (len) { + unsigned long tmp; + unsigned long second; + err |= __get_user_u(second, lastsrc); + tmp = *dst; + extql(first, soff, word); + extqh(second, soff, first); + word |= first; + mskql(word, len, word); + checksum += word; + mskqh(tmp, len, tmp); + carry = checksum < word; + *dst = word | tmp; + checksum += carry; + } + if (err) *errp = err; + return checksum; +} + +/* + * This is slightly less fun than the above.. + */ +static inline unsigned long +csum_partial_cfu_src_aligned(const unsigned long __user *src, + unsigned long *dst, + unsigned long doff, + long len, unsigned long checksum, + unsigned long partial_dest, + int *errp) +{ + unsigned long carry = 0; + unsigned long word; + unsigned long second_dest; + int err = 0; + + mskql(partial_dest, doff, partial_dest); + while (len >= 0) { + err |= __get_user(word, src); + len -= 8; + insql(word, doff, second_dest); + checksum += carry; + stq_u(partial_dest | second_dest, dst); + src++; + checksum += word; + insqh(word, doff, partial_dest); + carry = checksum < word; + dst++; + } + len += 8; + if (len) { + checksum += carry; + err |= __get_user(word, src); + mskql(word, len, word); + len -= 8; + checksum += word; + insql(word, doff, second_dest); + len += doff; + carry = checksum < word; + partial_dest |= second_dest; + if (len >= 0) { + stq_u(partial_dest, dst); + if (!len) goto out; + dst++; + insqh(word, doff, partial_dest); + } + doff = len; + } + ldq_u(second_dest, dst); + mskqh(second_dest, doff, second_dest); + stq_u(partial_dest | second_dest, dst); +out: + checksum += carry; + if (err) *errp = err; + return checksum; +} + +/* + * This is so totally un-fun that it's frightening. Don't + * look at this too closely, you'll go blind. + */ +static inline unsigned long +csum_partial_cfu_unaligned(const unsigned long __user * src, + unsigned long * dst, + unsigned long soff, unsigned long doff, + long len, unsigned long checksum, + unsigned long partial_dest, + int *errp) +{ + unsigned long carry = 0; + unsigned long first; + unsigned long lastsrc; + int err = 0; + + err |= __get_user_u(first, src); + lastsrc = 7+len+(unsigned long)src; + mskql(partial_dest, doff, partial_dest); + while (len >= 0) { + unsigned long second, word; + unsigned long second_dest; + + err |= __get_user_u(second, src+1); + extql(first, soff, word); + checksum += carry; + len -= 8; + extqh(second, soff, first); + src++; + word |= first; + first = second; + insql(word, doff, second_dest); + checksum += word; + stq_u(partial_dest | second_dest, dst); + carry = checksum < word; + insqh(word, doff, partial_dest); + dst++; + } + len += doff; + checksum += carry; + if (len >= 0) { + unsigned long second, word; + unsigned long second_dest; + + err |= __get_user_u(second, lastsrc); + extql(first, soff, word); + extqh(second, soff, first); + word |= first; + first = second; + mskql(word, len-doff, word); + checksum += word; + insql(word, doff, second_dest); + carry = checksum < word; + stq_u(partial_dest | second_dest, dst); + if (len) { + ldq_u(second_dest, dst+1); + insqh(word, doff, partial_dest); + mskqh(second_dest, len, second_dest); + stq_u(partial_dest | second_dest, dst+1); + } + checksum += carry; + } else { + unsigned long second, word; + unsigned long second_dest; + + err |= __get_user_u(second, lastsrc); + extql(first, soff, word); + extqh(second, soff, first); + word |= first; + ldq_u(second_dest, dst); + mskql(word, len-doff, word); + checksum += word; + mskqh(second_dest, len, second_dest); + carry = checksum < word; + insql(word, doff, word); + stq_u(partial_dest | word | second_dest, dst); + checksum += carry; + } + if (err) *errp = err; + return checksum; +} + +__wsum +csum_partial_copy_from_user(const void __user *src, void *dst, int len, + __wsum sum, int *errp) +{ + unsigned long checksum = (__force u32) sum; + unsigned long soff = 7 & (unsigned long) src; + unsigned long doff = 7 & (unsigned long) dst; + + if (len) { + if (!doff) { + if (!soff) + checksum = csum_partial_cfu_aligned( + (const unsigned long __user *) src, + (unsigned long *) dst, + len-8, checksum, errp); + else + checksum = csum_partial_cfu_dest_aligned( + (const unsigned long __user *) src, + (unsigned long *) dst, + soff, len-8, checksum, errp); + } else { + unsigned long partial_dest; + ldq_u(partial_dest, dst); + if (!soff) + checksum = csum_partial_cfu_src_aligned( + (const unsigned long __user *) src, + (unsigned long *) dst, + doff, len-8, checksum, + partial_dest, errp); + else + checksum = csum_partial_cfu_unaligned( + (const unsigned long __user *) src, + (unsigned long *) dst, + soff, doff, len-8, checksum, + partial_dest, errp); + } + checksum = from64to16 (checksum); + } + return (__force __wsum)checksum; +} + +__wsum +csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum) +{ + return csum_partial_copy_from_user((__force const void __user *)src, + dst, len, sum, NULL); +} diff --git a/arch/alpha/lib/dbg_current.S b/arch/alpha/lib/dbg_current.S new file mode 100644 index 00000000..e6d07101 --- /dev/null +++ b/arch/alpha/lib/dbg_current.S @@ -0,0 +1,29 @@ +/* + * arch/alpha/lib/dbg_current.S + * Contributed by Richard Henderson (rth@cygnus.com) + * + * Trap if we find current not correct. + */ + +#include <asm/pal.h> + + .text + .set noat + + .globl _mcount + .ent _mcount +_mcount: + .frame $30, 0, $28, 0 + .prologue 0 + + lda $0, -0x4000($30) + cmpult $8, $30, $1 + cmpule $0, $30, $2 + and $1, $2, $3 + bne $3, 1f + + call_pal PAL_bugchk + +1: ret $31, ($28), 1 + + .end _mcount diff --git a/arch/alpha/lib/dbg_stackcheck.S b/arch/alpha/lib/dbg_stackcheck.S new file mode 100644 index 00000000..78f6b924 --- /dev/null +++ b/arch/alpha/lib/dbg_stackcheck.S @@ -0,0 +1,27 @@ +/* + * arch/alpha/lib/dbg_stackcheck.S + * Contributed by Richard Henderson (rth@tamu.edu) + * + * Verify that we have not overflowed the stack. Oops if we have. + */ + +#include <asm/asm-offsets.h> + + .text + .set noat + + .align 3 + .globl _mcount + .ent _mcount +_mcount: + .frame $30, 0, $28, 0 + .prologue 0 + + lda $0, TASK_SIZE($8) + cmpult $30, $0, $0 + bne $0, 1f + ret ($28) +1: stq $31, -8($31) # oops me, damn it. + br 1b + + .end _mcount diff --git a/arch/alpha/lib/dbg_stackkill.S b/arch/alpha/lib/dbg_stackkill.S new file mode 100644 index 00000000..c1e40a1a --- /dev/null +++ b/arch/alpha/lib/dbg_stackkill.S @@ -0,0 +1,35 @@ +/* + * arch/alpha/lib/dbg_stackkill.S + * Contributed by Richard Henderson (rth@cygnus.com) + * + * Clobber the balance of the kernel stack, hoping to catch + * uninitialized local variables in the act. + */ + +#include <asm/asm-offsets.h> + + .text + .set noat + + .align 5 + .globl _mcount + .ent _mcount +_mcount: + .frame $30, 0, $28, 0 + .prologue 0 + + ldi $0, 0xdeadbeef + lda $2, -STACK_SIZE + sll $0, 32, $1 + and $30, $2, $2 + or $0, $1, $0 + lda $2, TASK_SIZE($2) + cmpult $2, $30, $1 + beq $1, 2f +1: stq $0, 0($2) + addq $2, 8, $2 + cmpult $2, $30, $1 + bne $1, 1b +2: ret ($28) + + .end _mcount diff --git a/arch/alpha/lib/dec_and_lock.c b/arch/alpha/lib/dec_and_lock.c new file mode 100644 index 00000000..f9f5fe83 --- /dev/null +++ b/arch/alpha/lib/dec_and_lock.c @@ -0,0 +1,41 @@ +/* + * arch/alpha/lib/dec_and_lock.c + * + * ll/sc version of atomic_dec_and_lock() + * + */ + +#include <linux/spinlock.h> +#include <linux/atomic.h> + + asm (".text \n\ + .global _atomic_dec_and_lock \n\ + .ent _atomic_dec_and_lock \n\ + .align 4 \n\ +_atomic_dec_and_lock: \n\ + .prologue 0 \n\ +1: ldl_l $1, 0($16) \n\ + subl $1, 1, $1 \n\ + beq $1, 2f \n\ + stl_c $1, 0($16) \n\ + beq $1, 4f \n\ + mb \n\ + clr $0 \n\ + ret \n\ +2: br $29, 3f \n\ +3: ldgp $29, 0($29) \n\ + br $atomic_dec_and_lock_1..ng \n\ + .subsection 2 \n\ +4: br 1b \n\ + .previous \n\ + .end _atomic_dec_and_lock"); + +static int __used atomic_dec_and_lock_1(atomic_t *atomic, spinlock_t *lock) +{ + /* Slow path */ + spin_lock(lock); + if (atomic_dec_and_test(atomic)) + return 1; + spin_unlock(lock); + return 0; +} diff --git a/arch/alpha/lib/divide.S b/arch/alpha/lib/divide.S new file mode 100644 index 00000000..2d1a0484 --- /dev/null +++ b/arch/alpha/lib/divide.S @@ -0,0 +1,195 @@ +/* + * arch/alpha/lib/divide.S + * + * (C) 1995 Linus Torvalds + * + * Alpha division.. + */ + +/* + * The alpha chip doesn't provide hardware division, so we have to do it + * by hand. The compiler expects the functions + * + * __divqu: 64-bit unsigned long divide + * __remqu: 64-bit unsigned long remainder + * __divqs/__remqs: signed 64-bit + * __divlu/__remlu: unsigned 32-bit + * __divls/__remls: signed 32-bit + * + * These are not normal C functions: instead of the normal + * calling sequence, these expect their arguments in registers + * $24 and $25, and return the result in $27. Register $28 may + * be clobbered (assembly temporary), anything else must be saved. + * + * In short: painful. + * + * This is a rather simple bit-at-a-time algorithm: it's very good + * at dividing random 64-bit numbers, but the more usual case where + * the divisor is small is handled better by the DEC algorithm + * using lookup tables. This uses much less memory, though, and is + * nicer on the cache.. Besides, I don't know the copyright status + * of the DEC code. + */ + +/* + * My temporaries: + * $0 - current bit + * $1 - shifted divisor + * $2 - modulus/quotient + * + * $23 - return address + * $24 - dividend + * $25 - divisor + * + * $27 - quotient/modulus + * $28 - compare status + */ + +#define halt .long 0 + +/* + * Select function type and registers + */ +#define mask $0 +#define divisor $1 +#define compare $28 +#define tmp1 $3 +#define tmp2 $4 + +#ifdef DIV +#define DIV_ONLY(x,y...) x,##y +#define MOD_ONLY(x,y...) +#define func(x) __div##x +#define modulus $2 +#define quotient $27 +#define GETSIGN(x) xor $24,$25,x +#define STACK 48 +#else +#define DIV_ONLY(x,y...) +#define MOD_ONLY(x,y...) x,##y +#define func(x) __rem##x +#define modulus $27 +#define quotient $2 +#define GETSIGN(x) bis $24,$24,x +#define STACK 32 +#endif + +/* + * For 32-bit operations, we need to extend to 64-bit + */ +#ifdef INTSIZE +#define ufunction func(lu) +#define sfunction func(l) +#define LONGIFY(x) zapnot x,15,x +#define SLONGIFY(x) addl x,0,x +#else +#define ufunction func(qu) +#define sfunction func(q) +#define LONGIFY(x) +#define SLONGIFY(x) +#endif + +.set noat +.align 3 +.globl ufunction +.ent ufunction +ufunction: + subq $30,STACK,$30 + .frame $30,STACK,$23 + .prologue 0 + +7: stq $1, 0($30) + bis $25,$25,divisor + stq $2, 8($30) + bis $24,$24,modulus + stq $0,16($30) + bis $31,$31,quotient + LONGIFY(divisor) + stq tmp1,24($30) + LONGIFY(modulus) + bis $31,1,mask + DIV_ONLY(stq tmp2,32($30)) + beq divisor, 9f /* div by zero */ + +#ifdef INTSIZE + /* + * shift divisor left, using 3-bit shifts for + * 32-bit divides as we can't overflow. Three-bit + * shifts will result in looping three times less + * here, but can result in two loops more later. + * Thus using a large shift isn't worth it (and + * s8add pairs better than a sll..) + */ +1: cmpult divisor,modulus,compare + s8addq divisor,$31,divisor + s8addq mask,$31,mask + bne compare,1b +#else +1: cmpult divisor,modulus,compare + blt divisor, 2f + addq divisor,divisor,divisor + addq mask,mask,mask + bne compare,1b + unop +#endif + + /* ok, start to go right again.. */ +2: DIV_ONLY(addq quotient,mask,tmp2) + srl mask,1,mask + cmpule divisor,modulus,compare + subq modulus,divisor,tmp1 + DIV_ONLY(cmovne compare,tmp2,quotient) + srl divisor,1,divisor + cmovne compare,tmp1,modulus + bne mask,2b + +9: ldq $1, 0($30) + ldq $2, 8($30) + ldq $0,16($30) + ldq tmp1,24($30) + DIV_ONLY(ldq tmp2,32($30)) + addq $30,STACK,$30 + ret $31,($23),1 + .end ufunction + +/* + * Uhh.. Ugly signed division. I'd rather not have it at all, but + * it's needed in some circumstances. There are different ways to + * handle this, really. This does: + * -a / b = a / -b = -(a / b) + * -a % b = -(a % b) + * a % -b = a % b + * which is probably not the best solution, but at least should + * have the property that (x/y)*y + (x%y) = x. + */ +.align 3 +.globl sfunction +.ent sfunction +sfunction: + subq $30,STACK,$30 + .frame $30,STACK,$23 + .prologue 0 + bis $24,$25,$28 + SLONGIFY($28) + bge $28,7b + stq $24,0($30) + subq $31,$24,$28 + stq $25,8($30) + cmovlt $24,$28,$24 /* abs($24) */ + stq $23,16($30) + subq $31,$25,$28 + stq tmp1,24($30) + cmovlt $25,$28,$25 /* abs($25) */ + unop + bsr $23,ufunction + ldq $24,0($30) + ldq $25,8($30) + GETSIGN($28) + subq $31,$27,tmp1 + SLONGIFY($28) + ldq $23,16($30) + cmovlt $28,tmp1,$27 + ldq tmp1,24($30) + addq $30,STACK,$30 + ret $31,($23),1 + .end sfunction diff --git a/arch/alpha/lib/ev6-clear_page.S b/arch/alpha/lib/ev6-clear_page.S new file mode 100644 index 00000000..adf4f7be --- /dev/null +++ b/arch/alpha/lib/ev6-clear_page.S @@ -0,0 +1,54 @@ +/* + * arch/alpha/lib/ev6-clear_page.S + * + * Zero an entire page. + */ + + .text + .align 4 + .global clear_page + .ent clear_page +clear_page: + .prologue 0 + + lda $0,128 + lda $1,125 + addq $16,64,$2 + addq $16,128,$3 + + addq $16,192,$17 + wh64 ($16) + wh64 ($2) + wh64 ($3) + +1: wh64 ($17) + stq $31,0($16) + subq $0,1,$0 + subq $1,1,$1 + + stq $31,8($16) + stq $31,16($16) + addq $17,64,$2 + nop + + stq $31,24($16) + stq $31,32($16) + cmovgt $1,$2,$17 + nop + + stq $31,40($16) + stq $31,48($16) + nop + nop + + stq $31,56($16) + addq $16,64,$16 + nop + bne $0,1b + + ret + nop + nop + nop + + .end clear_page diff --git a/arch/alpha/lib/ev6-clear_user.S b/arch/alpha/lib/ev6-clear_user.S new file mode 100644 index 00000000..4f42a16b --- /dev/null +++ b/arch/alpha/lib/ev6-clear_user.S @@ -0,0 +1,225 @@ +/* + * arch/alpha/lib/ev6-clear_user.S + * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com> + * + * Zero user space, handling exceptions as we go. + * + * We have to make sure that $0 is always up-to-date and contains the + * right "bytes left to zero" value (and that it is updated only _after_ + * a successful copy). There is also some rather minor exception setup + * stuff. + * + * NOTE! This is not directly C-callable, because the calling semantics + * are different: + * + * Inputs: + * length in $0 + * destination address in $6 + * exception pointer in $7 + * return address in $28 (exceptions expect it there) + * + * Outputs: + * bytes left to copy in $0 + * + * Clobbers: + * $1,$2,$3,$4,$5,$6 + * + * Much of the information about 21264 scheduling/coding comes from: + * Compiler Writer's Guide for the Alpha 21264 + * abbreviated as 'CWG' in other comments here + * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html + * Scheduling notation: + * E - either cluster + * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 + * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 + * Try not to change the actual algorithm if possible for consistency. + * Determining actual stalls (other than slotting) doesn't appear to be easy to do. + * From perusing the source code context where this routine is called, it is + * a fair assumption that significant fractions of entire pages are zeroed, so + * it's going to be worth the effort to hand-unroll a big loop, and use wh64. + * ASSUMPTION: + * The believed purpose of only updating $0 after a store is that a signal + * may come along during the execution of this chunk of code, and we don't + * want to leave a hole (and we also want to avoid repeating lots of work) + */ + +/* Allow an exception for an insn; exit if we get one. */ +#define EX(x,y...) \ + 99: x,##y; \ + .section __ex_table,"a"; \ + .long 99b - .; \ + lda $31, $exception-99b($31); \ + .previous + + .set noat + .set noreorder + .align 4 + + .globl __do_clear_user + .ent __do_clear_user + .frame $30, 0, $28 + .prologue 0 + + # Pipeline info : Slotting & Comments +__do_clear_user: + and $6, 7, $4 # .. E .. .. : find dest head misalignment + beq $0, $zerolength # U .. .. .. : U L U L + + addq $0, $4, $1 # .. .. .. E : bias counter + and $1, 7, $2 # .. .. E .. : number of misaligned bytes in tail +# Note - we never actually use $2, so this is a moot computation +# and we can rewrite this later... + srl $1, 3, $1 # .. E .. .. : number of quadwords to clear + beq $4, $headalign # U .. .. .. : U L U L + +/* + * Head is not aligned. Write (8 - $4) bytes to head of destination + * This means $6 is known to be misaligned + */ + EX( ldq_u $5, 0($6) ) # .. .. .. L : load dst word to mask back in + beq $1, $onebyte # .. .. U .. : sub-word store? + mskql $5, $6, $5 # .. U .. .. : take care of misaligned head + addq $6, 8, $6 # E .. .. .. : L U U L + + EX( stq_u $5, -8($6) ) # .. .. .. L : + subq $1, 1, $1 # .. .. E .. : + addq $0, $4, $0 # .. E .. .. : bytes left -= 8 - misalignment + subq $0, 8, $0 # E .. .. .. : U L U L + + .align 4 +/* + * (The .align directive ought to be a moot point) + * values upon initial entry to the loop + * $1 is number of quadwords to clear (zero is a valid value) + * $2 is number of trailing bytes (0..7) ($2 never used...) + * $6 is known to be aligned 0mod8 + */ +$headalign: + subq $1, 16, $4 # .. .. .. E : If < 16, we can not use the huge loop + and $6, 0x3f, $2 # .. .. E .. : Forward work for huge loop + subq $2, 0x40, $3 # .. E .. .. : bias counter (huge loop) + blt $4, $trailquad # U .. .. .. : U L U L + +/* + * We know that we're going to do at least 16 quads, which means we are + * going to be able to use the large block clear loop at least once. + * Figure out how many quads we need to clear before we are 0mod64 aligned + * so we can use the wh64 instruction. + */ + + nop # .. .. .. E + nop # .. .. E .. + nop # .. E .. .. + beq $3, $bigalign # U .. .. .. : U L U L : Aligned 0mod64 + +$alignmod64: + EX( stq_u $31, 0($6) ) # .. .. .. L + addq $3, 8, $3 # .. .. E .. + subq $0, 8, $0 # .. E .. .. + nop # E .. .. .. : U L U L + + nop # .. .. .. E + subq $1, 1, $1 # .. .. E .. + addq $6, 8, $6 # .. E .. .. + blt $3, $alignmod64 # U .. .. .. : U L U L + +$bigalign: +/* + * $0 is the number of bytes left + * $1 is the number of quads left + * $6 is aligned 0mod64 + * we know that we'll be taking a minimum of one trip through + * CWG Section 3.7.6: do not expect a sustained store rate of > 1/cycle + * We are _not_ going to update $0 after every single store. That + * would be silly, because there will be cross-cluster dependencies + * no matter how the code is scheduled. By doing it in slightly + * staggered fashion, we can still do this loop in 5 fetches + * The worse case will be doing two extra quads in some future execution, + * in the event of an interrupted clear. + * Assumes the wh64 needs to be for 2 trips through the loop in the future + * The wh64 is issued on for the starting destination address for trip +2 + * through the loop, and if there are less than two trips left, the target + * address will be for the current trip. + */ + nop # E : + nop # E : + nop # E : + bis $6,$6,$3 # E : U L U L : Initial wh64 address is dest + /* This might actually help for the current trip... */ + +$do_wh64: + wh64 ($3) # .. .. .. L1 : memory subsystem hint + subq $1, 16, $4 # .. .. E .. : Forward calculation - repeat the loop? + EX( stq_u $31, 0($6) ) # .. L .. .. + subq $0, 8, $0 # E .. .. .. : U L U L + + addq $6, 128, $3 # E : Target address of wh64 + EX( stq_u $31, 8($6) ) # L : + EX( stq_u $31, 16($6) ) # L : + subq $0, 16, $0 # E : U L L U + + nop # E : + EX( stq_u $31, 24($6) ) # L : + EX( stq_u $31, 32($6) ) # L : + subq $0, 168, $5 # E : U L L U : two trips through the loop left? + /* 168 = 192 - 24, since we've already completed some stores */ + + subq $0, 16, $0 # E : + EX( stq_u $31, 40($6) ) # L : + EX( stq_u $31, 48($6) ) # L : + cmovlt $5, $6, $3 # E : U L L U : Latency 2, extra mapping cycle + + subq $1, 8, $1 # E : + subq $0, 16, $0 # E : + EX( stq_u $31, 56($6) ) # L : + nop # E : U L U L + + nop # E : + subq $0, 8, $0 # E : + addq $6, 64, $6 # E : + bge $4, $do_wh64 # U : U L U L + +$trailquad: + # zero to 16 quadwords left to store, plus any trailing bytes + # $1 is the number of quadwords left to go. + # + nop # .. .. .. E + nop # .. .. E .. + nop # .. E .. .. + beq $1, $trailbytes # U .. .. .. : U L U L : Only 0..7 bytes to go + +$onequad: + EX( stq_u $31, 0($6) ) # .. .. .. L + subq $1, 1, $1 # .. .. E .. + subq $0, 8, $0 # .. E .. .. + nop # E .. .. .. : U L U L + + nop # .. .. .. E + nop # .. .. E .. + addq $6, 8, $6 # .. E .. .. + bgt $1, $onequad # U .. .. .. : U L U L + + # We have an unknown number of bytes left to go. +$trailbytes: + nop # .. .. .. E + nop # .. .. E .. + nop # .. E .. .. + beq $0, $zerolength # U .. .. .. : U L U L + + # $0 contains the number of bytes left to copy (0..31) + # so we will use $0 as the loop counter + # We know for a fact that $0 > 0 zero due to previous context +$onebyte: + EX( stb $31, 0($6) ) # .. .. .. L + subq $0, 1, $0 # .. .. E .. : + addq $6, 1, $6 # .. E .. .. : + bgt $0, $onebyte # U .. .. .. : U L U L + +$zerolength: +$exception: # Destination for exception recovery(?) + nop # .. .. .. E : + nop # .. .. E .. : + nop # .. E .. .. : + ret $31, ($28), 1 # L0 .. .. .. : L U L U + .end __do_clear_user + diff --git a/arch/alpha/lib/ev6-copy_page.S b/arch/alpha/lib/ev6-copy_page.S new file mode 100644 index 00000000..b789db19 --- /dev/null +++ b/arch/alpha/lib/ev6-copy_page.S @@ -0,0 +1,203 @@ +/* + * arch/alpha/lib/ev6-copy_page.S + * + * Copy an entire page. + */ + +/* The following comparison of this routine vs the normal copy_page.S + was written by an unnamed ev6 hardware designer and forwarded to me + via Steven Hobbs <hobbs@steven.zko.dec.com>. + + First Problem: STQ overflows. + ----------------------------- + + It would be nice if EV6 handled every resource overflow efficiently, + but for some it doesn't. Including store queue overflows. It causes + a trap and a restart of the pipe. + + To get around this we sometimes use (to borrow a term from a VSSAD + researcher) "aeration". The idea is to slow the rate at which the + processor receives valid instructions by inserting nops in the fetch + path. In doing so, you can prevent the overflow and actually make + the code run faster. You can, of course, take advantage of the fact + that the processor can fetch at most 4 aligned instructions per cycle. + + I inserted enough nops to force it to take 10 cycles to fetch the + loop code. In theory, EV6 should be able to execute this loop in + 9 cycles but I was not able to get it to run that fast -- the initial + conditions were such that I could not reach this optimum rate on + (chaotic) EV6. I wrote the code such that everything would issue + in order. + + Second Problem: Dcache index matches. + ------------------------------------- + + If you are going to use this routine on random aligned pages, there + is a 25% chance that the pages will be at the same dcache indices. + This results in many nasty memory traps without care. + + The solution is to schedule the prefetches to avoid the memory + conflicts. I schedule the wh64 prefetches farther ahead of the + read prefetches to avoid this problem. + + Third Problem: Needs more prefetching. + -------------------------------------- + + In order to improve the code I added deeper prefetching to take the + most advantage of EV6's bandwidth. + + I also prefetched the read stream. Note that adding the read prefetch + forced me to add another cycle to the inner-most kernel - up to 11 + from the original 8 cycles per iteration. We could improve performance + further by unrolling the loop and doing multiple prefetches per cycle. + + I think that the code below will be very robust and fast code for the + purposes of copying aligned pages. It is slower when both source and + destination pages are in the dcache, but it is my guess that this is + less important than the dcache miss case. */ + + + .text + .align 4 + .global copy_page + .ent copy_page +copy_page: + .prologue 0 + + /* Prefetch 5 read cachelines; write-hint 10 cache lines. */ + wh64 ($16) + ldl $31,0($17) + ldl $31,64($17) + lda $1,1*64($16) + + wh64 ($1) + ldl $31,128($17) + ldl $31,192($17) + lda $1,2*64($16) + + wh64 ($1) + ldl $31,256($17) + lda $18,118 + lda $1,3*64($16) + + wh64 ($1) + nop + lda $1,4*64($16) + lda $2,5*64($16) + + wh64 ($1) + wh64 ($2) + lda $1,6*64($16) + lda $2,7*64($16) + + wh64 ($1) + wh64 ($2) + lda $1,8*64($16) + lda $2,9*64($16) + + wh64 ($1) + wh64 ($2) + lda $19,10*64($16) + nop + + /* Main prefetching/write-hinting loop. */ +1: ldq $0,0($17) + ldq $1,8($17) + unop + unop + + unop + unop + ldq $2,16($17) + ldq $3,24($17) + + ldq $4,32($17) + ldq $5,40($17) + unop + unop + + unop + unop + ldq $6,48($17) + ldq $7,56($17) + + ldl $31,320($17) + unop + unop + unop + + /* This gives the extra cycle of aeration above the minimum. */ + unop + unop + unop + unop + + wh64 ($19) + unop + unop + unop + + stq $0,0($16) + subq $18,1,$18 + stq $1,8($16) + unop + + unop + stq $2,16($16) + addq $17,64,$17 + stq $3,24($16) + + stq $4,32($16) + stq $5,40($16) + addq $19,64,$19 + unop + + stq $6,48($16) + stq $7,56($16) + addq $16,64,$16 + bne $18, 1b + + /* Prefetch the final 5 cache lines of the read stream. */ + lda $18,10 + ldl $31,320($17) + ldl $31,384($17) + ldl $31,448($17) + + ldl $31,512($17) + ldl $31,576($17) + nop + nop + + /* Non-prefetching, non-write-hinting cleanup loop for the + final 10 cache lines. */ +2: ldq $0,0($17) + ldq $1,8($17) + ldq $2,16($17) + ldq $3,24($17) + + ldq $4,32($17) + ldq $5,40($17) + ldq $6,48($17) + ldq $7,56($17) + + stq $0,0($16) + subq $18,1,$18 + stq $1,8($16) + addq $17,64,$17 + + stq $2,16($16) + stq $3,24($16) + stq $4,32($16) + stq $5,40($16) + + stq $6,48($16) + stq $7,56($16) + addq $16,64,$16 + bne $18, 2b + + ret + nop + unop + nop + + .end copy_page diff --git a/arch/alpha/lib/ev6-copy_user.S b/arch/alpha/lib/ev6-copy_user.S new file mode 100644 index 00000000..db42ffe9 --- /dev/null +++ b/arch/alpha/lib/ev6-copy_user.S @@ -0,0 +1,259 @@ +/* + * arch/alpha/lib/ev6-copy_user.S + * + * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com> + * + * Copy to/from user space, handling exceptions as we go.. This + * isn't exactly pretty. + * + * This is essentially the same as "memcpy()", but with a few twists. + * Notably, we have to make sure that $0 is always up-to-date and + * contains the right "bytes left to copy" value (and that it is updated + * only _after_ a successful copy). There is also some rather minor + * exception setup stuff.. + * + * NOTE! This is not directly C-callable, because the calling semantics are + * different: + * + * Inputs: + * length in $0 + * destination address in $6 + * source address in $7 + * return address in $28 + * + * Outputs: + * bytes left to copy in $0 + * + * Clobbers: + * $1,$2,$3,$4,$5,$6,$7 + * + * Much of the information about 21264 scheduling/coding comes from: + * Compiler Writer's Guide for the Alpha 21264 + * abbreviated as 'CWG' in other comments here + * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html + * Scheduling notation: + * E - either cluster + * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 + * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 + */ + +/* Allow an exception for an insn; exit if we get one. */ +#define EXI(x,y...) \ + 99: x,##y; \ + .section __ex_table,"a"; \ + .long 99b - .; \ + lda $31, $exitin-99b($31); \ + .previous + +#define EXO(x,y...) \ + 99: x,##y; \ + .section __ex_table,"a"; \ + .long 99b - .; \ + lda $31, $exitout-99b($31); \ + .previous + + .set noat + .align 4 + .globl __copy_user + .ent __copy_user + # Pipeline info: Slotting & Comments +__copy_user: + .prologue 0 + subq $0, 32, $1 # .. E .. .. : Is this going to be a small copy? + beq $0, $zerolength # U .. .. .. : U L U L + + and $6,7,$3 # .. .. .. E : is leading dest misalignment + ble $1, $onebyteloop # .. .. U .. : 1st branch : small amount of data + beq $3, $destaligned # .. U .. .. : 2nd (one cycle fetcher stall) + subq $3, 8, $3 # E .. .. .. : L U U L : trip counter +/* + * The fetcher stall also hides the 1 cycle cross-cluster stall for $3 (L --> U) + * This loop aligns the destination a byte at a time + * We know we have at least one trip through this loop + */ +$aligndest: + EXI( ldbu $1,0($7) ) # .. .. .. L : Keep loads separate from stores + addq $6,1,$6 # .. .. E .. : Section 3.8 in the CWG + addq $3,1,$3 # .. E .. .. : + nop # E .. .. .. : U L U L + +/* + * the -1 is to compensate for the inc($6) done in a previous quadpack + * which allows us zero dependencies within either quadpack in the loop + */ + EXO( stb $1,-1($6) ) # .. .. .. L : + addq $7,1,$7 # .. .. E .. : Section 3.8 in the CWG + subq $0,1,$0 # .. E .. .. : + bne $3, $aligndest # U .. .. .. : U L U L + +/* + * If we fell through into here, we have a minimum of 33 - 7 bytes + * If we arrived via branch, we have a minimum of 32 bytes + */ +$destaligned: + and $7,7,$1 # .. .. .. E : Check _current_ source alignment + bic $0,7,$4 # .. .. E .. : number bytes as a quadword loop + EXI( ldq_u $3,0($7) ) # .. L .. .. : Forward fetch for fallthrough code + beq $1,$quadaligned # U .. .. .. : U L U L + +/* + * In the worst case, we've just executed an ldq_u here from 0($7) + * and we'll repeat it once if we take the branch + */ + +/* Misaligned quadword loop - not unrolled. Leave it that way. */ +$misquad: + EXI( ldq_u $2,8($7) ) # .. .. .. L : + subq $4,8,$4 # .. .. E .. : + extql $3,$7,$3 # .. U .. .. : + extqh $2,$7,$1 # U .. .. .. : U U L L + + bis $3,$1,$1 # .. .. .. E : + EXO( stq $1,0($6) ) # .. .. L .. : + addq $7,8,$7 # .. E .. .. : + subq $0,8,$0 # E .. .. .. : U L L U + + addq $6,8,$6 # .. .. .. E : + bis $2,$2,$3 # .. .. E .. : + nop # .. E .. .. : + bne $4,$misquad # U .. .. .. : U L U L + + nop # .. .. .. E + nop # .. .. E .. + nop # .. E .. .. + beq $0,$zerolength # U .. .. .. : U L U L + +/* We know we have at least one trip through the byte loop */ + EXI ( ldbu $2,0($7) ) # .. .. .. L : No loads in the same quad + addq $6,1,$6 # .. .. E .. : as the store (Section 3.8 in CWG) + nop # .. E .. .. : + br $31, $dirtyentry # L0 .. .. .. : L U U L +/* Do the trailing byte loop load, then hop into the store part of the loop */ + +/* + * A minimum of (33 - 7) bytes to do a quad at a time. + * Based upon the usage context, it's worth the effort to unroll this loop + * $0 - number of bytes to be moved + * $4 - number of bytes to move as quadwords + * $6 is current destination address + * $7 is current source address + */ +$quadaligned: + subq $4, 32, $2 # .. .. .. E : do not unroll for small stuff + nop # .. .. E .. + nop # .. E .. .. + blt $2, $onequad # U .. .. .. : U L U L + +/* + * There is a significant assumption here that the source and destination + * addresses differ by more than 32 bytes. In this particular case, a + * sparsity of registers further bounds this to be a minimum of 8 bytes. + * But if this isn't met, then the output result will be incorrect. + * Furthermore, due to a lack of available registers, we really can't + * unroll this to be an 8x loop (which would enable us to use the wh64 + * instruction memory hint instruction). + */ +$unroll4: + EXI( ldq $1,0($7) ) # .. .. .. L + EXI( ldq $2,8($7) ) # .. .. L .. + subq $4,32,$4 # .. E .. .. + nop # E .. .. .. : U U L L + + addq $7,16,$7 # .. .. .. E + EXO( stq $1,0($6) ) # .. .. L .. + EXO( stq $2,8($6) ) # .. L .. .. + subq $0,16,$0 # E .. .. .. : U L L U + + addq $6,16,$6 # .. .. .. E + EXI( ldq $1,0($7) ) # .. .. L .. + EXI( ldq $2,8($7) ) # .. L .. .. + subq $4, 32, $3 # E .. .. .. : U U L L : is there enough for another trip? + + EXO( stq $1,0($6) ) # .. .. .. L + EXO( stq $2,8($6) ) # .. .. L .. + subq $0,16,$0 # .. E .. .. + addq $7,16,$7 # E .. .. .. : U L L U + + nop # .. .. .. E + nop # .. .. E .. + addq $6,16,$6 # .. E .. .. + bgt $3,$unroll4 # U .. .. .. : U L U L + + nop + nop + nop + beq $4, $noquads + +$onequad: + EXI( ldq $1,0($7) ) + subq $4,8,$4 + addq $7,8,$7 + nop + + EXO( stq $1,0($6) ) + subq $0,8,$0 + addq $6,8,$6 + bne $4,$onequad + +$noquads: + nop + nop + nop + beq $0,$zerolength + +/* + * For small copies (or the tail of a larger copy), do a very simple byte loop. + * There's no point in doing a lot of complex alignment calculations to try to + * to quadword stuff for a small amount of data. + * $0 - remaining number of bytes left to copy + * $6 - current dest addr + * $7 - current source addr + */ + +$onebyteloop: + EXI ( ldbu $2,0($7) ) # .. .. .. L : No loads in the same quad + addq $6,1,$6 # .. .. E .. : as the store (Section 3.8 in CWG) + nop # .. E .. .. : + nop # E .. .. .. : U L U L + +$dirtyentry: +/* + * the -1 is to compensate for the inc($6) done in a previous quadpack + * which allows us zero dependencies within either quadpack in the loop + */ + EXO ( stb $2,-1($6) ) # .. .. .. L : + addq $7,1,$7 # .. .. E .. : quadpack as the load + subq $0,1,$0 # .. E .. .. : change count _after_ copy + bgt $0,$onebyteloop # U .. .. .. : U L U L + +$zerolength: +$exitout: # Destination for exception recovery(?) + nop # .. .. .. E + nop # .. .. E .. + nop # .. E .. .. + ret $31,($28),1 # L0 .. .. .. : L U L U + +$exitin: + + /* A stupid byte-by-byte zeroing of the rest of the output + buffer. This cures security holes by never leaving + random kernel data around to be copied elsewhere. */ + + nop + nop + nop + mov $0,$1 + +$101: + EXO ( stb $31,0($6) ) # L + subq $1,1,$1 # E + addq $6,1,$6 # E + bgt $1,$101 # U + + nop + nop + nop + ret $31,($28),1 # L0 + + .end __copy_user + diff --git a/arch/alpha/lib/ev6-csum_ipv6_magic.S b/arch/alpha/lib/ev6-csum_ipv6_magic.S new file mode 100644 index 00000000..fc0bc399 --- /dev/null +++ b/arch/alpha/lib/ev6-csum_ipv6_magic.S @@ -0,0 +1,150 @@ +/* + * arch/alpha/lib/ev6-csum_ipv6_magic.S + * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com> + * + * unsigned short csum_ipv6_magic(struct in6_addr *saddr, + * struct in6_addr *daddr, + * __u32 len, + * unsigned short proto, + * unsigned int csum); + * + * Much of the information about 21264 scheduling/coding comes from: + * Compiler Writer's Guide for the Alpha 21264 + * abbreviated as 'CWG' in other comments here + * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html + * Scheduling notation: + * E - either cluster + * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 + * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 + * Try not to change the actual algorithm if possible for consistency. + * Determining actual stalls (other than slotting) doesn't appear to be easy to do. + * + * unsigned short csum_ipv6_magic(struct in6_addr *saddr, + * struct in6_addr *daddr, + * __u32 len, + * unsigned short proto, + * unsigned int csum); + * + * Swap <proto> (takes form 0xaabb) + * Then shift it left by 48, so result is: + * 0xbbaa0000 00000000 + * Then turn it back into a sign extended 32-bit item + * 0xbbaa0000 + * + * Swap <len> (an unsigned int) using Mike Burrows' 7-instruction sequence + * (we can't hide the 3-cycle latency of the unpkbw in the 6-instruction sequence) + * Assume input takes form 0xAABBCCDD + * + * Finally, original 'folding' approach is to split the long into 4 unsigned shorts + * add 4 ushorts, resulting in ushort/carry + * add carry bits + ushort --> ushort + * add carry bits + ushort --> ushort (in case the carry results in an overflow) + * Truncate to a ushort. (took 13 instructions) + * From doing some testing, using the approach in checksum.c:from64to16() + * results in the same outcome: + * split into 2 uints, add those, generating a ulong + * add the 3 low ushorts together, generating a uint + * a final add of the 2 lower ushorts + * truncating the result. + * + * Misalignment handling added by Ivan Kokshaysky <ink@jurassic.park.msu.ru> + * The cost is 16 instructions (~8 cycles), including two extra loads which + * may cause additional delay in rare cases (load-load replay traps). + */ + + .globl csum_ipv6_magic + .align 4 + .ent csum_ipv6_magic + .frame $30,0,$26,0 +csum_ipv6_magic: + .prologue 0 + + ldq_u $0,0($16) # L : Latency: 3 + inslh $18,7,$4 # U : 0000000000AABBCC + ldq_u $1,8($16) # L : Latency: 3 + sll $19,8,$7 # U : U L U L : 0x00000000 00aabb00 + + and $16,7,$6 # E : src misalignment + ldq_u $5,15($16) # L : Latency: 3 + zapnot $20,15,$20 # U : zero extend incoming csum + ldq_u $2,0($17) # L : U L U L : Latency: 3 + + extql $0,$6,$0 # U : + extqh $1,$6,$22 # U : + ldq_u $3,8($17) # L : Latency: 3 + sll $19,24,$19 # U : U U L U : 0x000000aa bb000000 + + cmoveq $6,$31,$22 # E : src aligned? + ldq_u $23,15($17) # L : Latency: 3 + inswl $18,3,$18 # U : 000000CCDD000000 + addl $19,$7,$19 # E : U L U L : <sign bits>bbaabb00 + + or $0,$22,$0 # E : 1st src word complete + extql $1,$6,$1 # U : + or $18,$4,$18 # E : 000000CCDDAABBCC + extqh $5,$6,$5 # U : L U L U + + and $17,7,$6 # E : dst misalignment + extql $2,$6,$2 # U : + or $1,$5,$1 # E : 2nd src word complete + extqh $3,$6,$22 # U : L U L U : + + cmoveq $6,$31,$22 # E : dst aligned? + extql $3,$6,$3 # U : + addq $20,$0,$20 # E : begin summing the words + extqh $23,$6,$23 # U : L U L U : + + srl $18,16,$4 # U : 0000000000CCDDAA + or $2,$22,$2 # E : 1st dst word complete + zap $19,0x3,$19 # U : <sign bits>bbaa0000 + or $3,$23,$3 # E : U L U L : 2nd dst word complete + + cmpult $20,$0,$0 # E : + addq $20,$1,$20 # E : + zapnot $18,0xa,$18 # U : 00000000DD00BB00 + zap $4,0xa,$4 # U : U U L L : 0000000000CC00AA + + or $18,$4,$18 # E : 00000000DDCCBBAA + nop # E : + cmpult $20,$1,$1 # E : + addq $20,$2,$20 # E : U L U L + + cmpult $20,$2,$2 # E : + addq $20,$3,$20 # E : + cmpult $20,$3,$3 # E : (1 cycle stall on $20) + addq $20,$18,$20 # E : U L U L (1 cycle stall on $20) + + cmpult $20,$18,$18 # E : + addq $20,$19,$20 # E : (1 cycle stall on $20) + addq $0,$1,$0 # E : merge the carries back into the csum + addq $2,$3,$2 # E : + + cmpult $20,$19,$19 # E : + addq $18,$19,$18 # E : (1 cycle stall on $19) + addq $0,$2,$0 # E : + addq $20,$18,$20 # E : U L U L : + /* (1 cycle stall on $18, 2 cycles on $20) */ + + addq $0,$20,$0 # E : + zapnot $0,15,$1 # U : Start folding output (1 cycle stall on $0) + nop # E : + srl $0,32,$0 # U : U L U L : (1 cycle stall on $0) + + addq $1,$0,$1 # E : Finished generating ulong + extwl $1,2,$2 # U : ushort[1] (1 cycle stall on $1) + zapnot $1,3,$0 # U : ushort[0] (1 cycle stall on $1) + extwl $1,4,$1 # U : ushort[2] (1 cycle stall on $1) + + addq $0,$2,$0 # E + addq $0,$1,$3 # E : Finished generating uint + /* (1 cycle stall on $0) */ + extwl $3,2,$1 # U : ushort[1] (1 cycle stall on $3) + nop # E : L U L U + + addq $1,$3,$0 # E : Final carry + not $0,$4 # E : complement (1 cycle stall on $0) + zapnot $4,3,$0 # U : clear upper garbage bits + /* (1 cycle stall on $4) */ + ret # L0 : L U L U + + .end csum_ipv6_magic diff --git a/arch/alpha/lib/ev6-divide.S b/arch/alpha/lib/ev6-divide.S new file mode 100644 index 00000000..2a82b9be --- /dev/null +++ b/arch/alpha/lib/ev6-divide.S @@ -0,0 +1,259 @@ +/* + * arch/alpha/lib/ev6-divide.S + * + * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com> + * + * Alpha division.. + */ + +/* + * The alpha chip doesn't provide hardware division, so we have to do it + * by hand. The compiler expects the functions + * + * __divqu: 64-bit unsigned long divide + * __remqu: 64-bit unsigned long remainder + * __divqs/__remqs: signed 64-bit + * __divlu/__remlu: unsigned 32-bit + * __divls/__remls: signed 32-bit + * + * These are not normal C functions: instead of the normal + * calling sequence, these expect their arguments in registers + * $24 and $25, and return the result in $27. Register $28 may + * be clobbered (assembly temporary), anything else must be saved. + * + * In short: painful. + * + * This is a rather simple bit-at-a-time algorithm: it's very good + * at dividing random 64-bit numbers, but the more usual case where + * the divisor is small is handled better by the DEC algorithm + * using lookup tables. This uses much less memory, though, and is + * nicer on the cache.. Besides, I don't know the copyright status + * of the DEC code. + */ + +/* + * My temporaries: + * $0 - current bit + * $1 - shifted divisor + * $2 - modulus/quotient + * + * $23 - return address + * $24 - dividend + * $25 - divisor + * + * $27 - quotient/modulus + * $28 - compare status + * + * Much of the information about 21264 scheduling/coding comes from: + * Compiler Writer's Guide for the Alpha 21264 + * abbreviated as 'CWG' in other comments here + * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html + * Scheduling notation: + * E - either cluster + * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 + * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 + * Try not to change the actual algorithm if possible for consistency. + */ + +#define halt .long 0 + +/* + * Select function type and registers + */ +#define mask $0 +#define divisor $1 +#define compare $28 +#define tmp1 $3 +#define tmp2 $4 + +#ifdef DIV +#define DIV_ONLY(x,y...) x,##y +#define MOD_ONLY(x,y...) +#define func(x) __div##x +#define modulus $2 +#define quotient $27 +#define GETSIGN(x) xor $24,$25,x +#define STACK 48 +#else +#define DIV_ONLY(x,y...) +#define MOD_ONLY(x,y...) x,##y +#define func(x) __rem##x +#define modulus $27 +#define quotient $2 +#define GETSIGN(x) bis $24,$24,x +#define STACK 32 +#endif + +/* + * For 32-bit operations, we need to extend to 64-bit + */ +#ifdef INTSIZE +#define ufunction func(lu) +#define sfunction func(l) +#define LONGIFY(x) zapnot x,15,x +#define SLONGIFY(x) addl x,0,x +#else +#define ufunction func(qu) +#define sfunction func(q) +#define LONGIFY(x) +#define SLONGIFY(x) +#endif + +.set noat +.align 4 +.globl ufunction +.ent ufunction +ufunction: + subq $30,STACK,$30 # E : + .frame $30,STACK,$23 + .prologue 0 + +7: stq $1, 0($30) # L : + bis $25,$25,divisor # E : + stq $2, 8($30) # L : L U L U + + bis $24,$24,modulus # E : + stq $0,16($30) # L : + bis $31,$31,quotient # E : + LONGIFY(divisor) # E : U L L U + + stq tmp1,24($30) # L : + LONGIFY(modulus) # E : + bis $31,1,mask # E : + DIV_ONLY(stq tmp2,32($30)) # L : L U U L + + beq divisor, 9f /* div by zero */ + /* + * In spite of the DIV_ONLY being either a non-instruction + * or an actual stq, the addition of the .align directive + * below ensures that label 1 is going to be nicely aligned + */ + + .align 4 +#ifdef INTSIZE + /* + * shift divisor left, using 3-bit shifts for + * 32-bit divides as we can't overflow. Three-bit + * shifts will result in looping three times less + * here, but can result in two loops more later. + * Thus using a large shift isn't worth it (and + * s8add pairs better than a sll..) + */ +1: cmpult divisor,modulus,compare # E : + s8addq divisor,$31,divisor # E : + s8addq mask,$31,mask # E : + bne compare,1b # U : U L U L +#else +1: cmpult divisor,modulus,compare # E : + nop # E : + nop # E : + blt divisor, 2f # U : U L U L + + addq divisor,divisor,divisor # E : + addq mask,mask,mask # E : + unop # E : + bne compare,1b # U : U L U L +#endif + + /* ok, start to go right again.. */ +2: + /* + * Keep things nicely bundled... use a nop instead of not + * having an instruction for DIV_ONLY + */ +#ifdef DIV + DIV_ONLY(addq quotient,mask,tmp2) # E : +#else + nop # E : +#endif + srl mask,1,mask # U : + cmpule divisor,modulus,compare # E : + subq modulus,divisor,tmp1 # E : + +#ifdef DIV + DIV_ONLY(cmovne compare,tmp2,quotient) # E : Latency 2, extra map slot + nop # E : as part of the cmovne + srl divisor,1,divisor # U : + nop # E : L U L U + + nop # E : + cmovne compare,tmp1,modulus # E : Latency 2, extra map slot + nop # E : as part of the cmovne + bne mask,2b # U : U L U L +#else + srl divisor,1,divisor # U : + cmovne compare,tmp1,modulus # E : Latency 2, extra map slot + nop # E : as part of the cmovne + bne mask,2b # U : U L L U +#endif + +9: ldq $1, 0($30) # L : + ldq $2, 8($30) # L : + nop # E : + nop # E : U U L L + + ldq $0,16($30) # L : + ldq tmp1,24($30) # L : + nop # E : + nop # E : + +#ifdef DIV + DIV_ONLY(ldq tmp2,32($30)) # L : +#else + nop # E : +#endif + addq $30,STACK,$30 # E : + ret $31,($23),1 # L0 : L U U L + .end ufunction + +/* + * Uhh.. Ugly signed division. I'd rather not have it at all, but + * it's needed in some circumstances. There are different ways to + * handle this, really. This does: + * -a / b = a / -b = -(a / b) + * -a % b = -(a % b) + * a % -b = a % b + * which is probably not the best solution, but at least should + * have the property that (x/y)*y + (x%y) = x. + */ +.align 4 +.globl sfunction +.ent sfunction +sfunction: + subq $30,STACK,$30 # E : + .frame $30,STACK,$23 + .prologue 0 + bis $24,$25,$28 # E : + SLONGIFY($28) # E : + bge $28,7b # U : + + stq $24,0($30) # L : + subq $31,$24,$28 # E : + stq $25,8($30) # L : + nop # E : U L U L + + cmovlt $24,$28,$24 /* abs($24) */ # E : Latency 2, extra map slot + nop # E : as part of the cmov + stq $23,16($30) # L : + subq $31,$25,$28 # E : U L U L + + stq tmp1,24($30) # L : + cmovlt $25,$28,$25 /* abs($25) */ # E : Latency 2, extra map slot + nop # E : + bsr $23,ufunction # L0: L U L U + + ldq $24,0($30) # L : + ldq $25,8($30) # L : + GETSIGN($28) # E : + subq $31,$27,tmp1 # E : U U L L + + SLONGIFY($28) # E : + ldq $23,16($30) # L : + cmovlt $28,tmp1,$27 # E : Latency 2, extra map slot + nop # E : U L L U : as part of the cmov + + ldq tmp1,24($30) # L : + nop # E : as part of the cmov + addq $30,STACK,$30 # E : + ret $31,($23),1 # L0 : L U U L + .end sfunction diff --git a/arch/alpha/lib/ev6-memchr.S b/arch/alpha/lib/ev6-memchr.S new file mode 100644 index 00000000..1a5f71b9 --- /dev/null +++ b/arch/alpha/lib/ev6-memchr.S @@ -0,0 +1,191 @@ +/* + * arch/alpha/lib/ev6-memchr.S + * + * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com> + * + * Finds characters in a memory area. Optimized for the Alpha: + * + * - memory accessed as aligned quadwords only + * - uses cmpbge to compare 8 bytes in parallel + * - does binary search to find 0 byte in last + * quadword (HAKMEM needed 12 instructions to + * do this instead of the 9 instructions that + * binary search needs). + * + * For correctness consider that: + * + * - only minimum number of quadwords may be accessed + * - the third argument is an unsigned long + * + * Much of the information about 21264 scheduling/coding comes from: + * Compiler Writer's Guide for the Alpha 21264 + * abbreviated as 'CWG' in other comments here + * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html + * Scheduling notation: + * E - either cluster + * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 + * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 + * Try not to change the actual algorithm if possible for consistency. + */ + + .set noreorder + .set noat + + .align 4 + .globl memchr + .ent memchr +memchr: + .frame $30,0,$26,0 + .prologue 0 + + # Hack -- if someone passes in (size_t)-1, hoping to just + # search til the end of the address space, we will overflow + # below when we find the address of the last byte. Given + # that we will never have a 56-bit address space, cropping + # the length is the easiest way to avoid trouble. + zap $18, 0x80, $5 # U : Bound length + beq $18, $not_found # U : + ldq_u $1, 0($16) # L : load first quadword Latency=3 + and $17, 0xff, $17 # E : L L U U : 00000000000000ch + + insbl $17, 1, $2 # U : 000000000000ch00 + cmpult $18, 9, $4 # E : small (< 1 quad) string? + or $2, $17, $17 # E : 000000000000chch + lda $3, -1($31) # E : U L L U + + sll $17, 16, $2 # U : 00000000chch0000 + addq $16, $5, $5 # E : Max search address + or $2, $17, $17 # E : 00000000chchchch + sll $17, 32, $2 # U : U L L U : chchchch00000000 + + or $2, $17, $17 # E : chchchchchchchch + extql $1, $16, $7 # U : $7 is upper bits + beq $4, $first_quad # U : + ldq_u $6, -1($5) # L : L U U L : eight or less bytes to search Latency=3 + + extqh $6, $16, $6 # U : 2 cycle stall for $6 + mov $16, $0 # E : + nop # E : + or $7, $6, $1 # E : L U L U $1 = quadword starting at $16 + + # Deal with the case where at most 8 bytes remain to be searched + # in $1. E.g.: + # $18 = 6 + # $1 = ????c6c5c4c3c2c1 +$last_quad: + negq $18, $6 # E : + xor $17, $1, $1 # E : + srl $3, $6, $6 # U : $6 = mask of $18 bits set + cmpbge $31, $1, $2 # E : L U L U + + nop + nop + and $2, $6, $2 # E : + beq $2, $not_found # U : U L U L + +$found_it: +#ifdef CONFIG_ALPHA_EV67 + /* + * Since we are guaranteed to have set one of the bits, we don't + * have to worry about coming back with a 0x40 out of cttz... + */ + cttz $2, $3 # U0 : + addq $0, $3, $0 # E : All done + nop # E : + ret # L0 : L U L U +#else + /* + * Slow and clunky. It can probably be improved. + * An exercise left for others. + */ + negq $2, $3 # E : + and $2, $3, $2 # E : + and $2, 0x0f, $1 # E : + addq $0, 4, $3 # E : + + cmoveq $1, $3, $0 # E : Latency 2, extra map cycle + nop # E : keep with cmov + and $2, 0x33, $1 # E : + addq $0, 2, $3 # E : U L U L : 2 cycle stall on $0 + + cmoveq $1, $3, $0 # E : Latency 2, extra map cycle + nop # E : keep with cmov + and $2, 0x55, $1 # E : + addq $0, 1, $3 # E : U L U L : 2 cycle stall on $0 + + cmoveq $1, $3, $0 # E : Latency 2, extra map cycle + nop + nop + ret # L0 : L U L U +#endif + + # Deal with the case where $18 > 8 bytes remain to be + # searched. $16 may not be aligned. + .align 4 +$first_quad: + andnot $16, 0x7, $0 # E : + insqh $3, $16, $2 # U : $2 = 0000ffffffffffff ($16<0:2> ff) + xor $1, $17, $1 # E : + or $1, $2, $1 # E : U L U L $1 = ====ffffffffffff + + cmpbge $31, $1, $2 # E : + bne $2, $found_it # U : + # At least one byte left to process. + ldq $1, 8($0) # L : + subq $5, 1, $18 # E : U L U L + + addq $0, 8, $0 # E : + # Make $18 point to last quad to be accessed (the + # last quad may or may not be partial). + andnot $18, 0x7, $18 # E : + cmpult $0, $18, $2 # E : + beq $2, $final # U : U L U L + + # At least two quads remain to be accessed. + + subq $18, $0, $4 # E : $4 <- nr quads to be processed + and $4, 8, $4 # E : odd number of quads? + bne $4, $odd_quad_count # U : + # At least three quads remain to be accessed + mov $1, $4 # E : L U L U : move prefetched value to correct reg + + .align 4 +$unrolled_loop: + ldq $1, 8($0) # L : prefetch $1 + xor $17, $4, $2 # E : + cmpbge $31, $2, $2 # E : + bne $2, $found_it # U : U L U L + + addq $0, 8, $0 # E : + nop # E : + nop # E : + nop # E : + +$odd_quad_count: + xor $17, $1, $2 # E : + ldq $4, 8($0) # L : prefetch $4 + cmpbge $31, $2, $2 # E : + addq $0, 8, $6 # E : + + bne $2, $found_it # U : + cmpult $6, $18, $6 # E : + addq $0, 8, $0 # E : + nop # E : + + bne $6, $unrolled_loop # U : + mov $4, $1 # E : move prefetched value into $1 + nop # E : + nop # E : + +$final: subq $5, $0, $18 # E : $18 <- number of bytes left to do + nop # E : + nop # E : + bne $18, $last_quad # U : + +$not_found: + mov $31, $0 # E : + nop # E : + nop # E : + ret # L0 : + + .end memchr diff --git a/arch/alpha/lib/ev6-memcpy.S b/arch/alpha/lib/ev6-memcpy.S new file mode 100644 index 00000000..52b37b0f --- /dev/null +++ b/arch/alpha/lib/ev6-memcpy.S @@ -0,0 +1,248 @@ +/* + * arch/alpha/lib/ev6-memcpy.S + * 21264 version by Rick Gorton <rick.gorton@alpha-processor.com> + * + * Reasonably optimized memcpy() routine for the Alpha 21264 + * + * - memory accessed as aligned quadwords only + * - uses bcmpge to compare 8 bytes in parallel + * + * Much of the information about 21264 scheduling/coding comes from: + * Compiler Writer's Guide for the Alpha 21264 + * abbreviated as 'CWG' in other comments here + * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html + * Scheduling notation: + * E - either cluster + * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 + * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 + * + * Temp usage notes: + * $1,$2, - scratch + */ + + .set noreorder + .set noat + + .align 4 + .globl memcpy + .ent memcpy +memcpy: + .frame $30,0,$26,0 + .prologue 0 + + mov $16, $0 # E : copy dest to return + ble $18, $nomoredata # U : done with the copy? + xor $16, $17, $1 # E : are source and dest alignments the same? + and $1, 7, $1 # E : are they the same mod 8? + + bne $1, $misaligned # U : Nope - gotta do this the slow way + /* source and dest are same mod 8 address */ + and $16, 7, $1 # E : Are both 0mod8? + beq $1, $both_0mod8 # U : Yes + nop # E : + + /* + * source and dest are same misalignment. move a byte at a time + * until a 0mod8 alignment for both is reached. + * At least one byte more to move + */ + +$head_align: + ldbu $1, 0($17) # L : grab a byte + subq $18, 1, $18 # E : count-- + addq $17, 1, $17 # E : src++ + stb $1, 0($16) # L : + addq $16, 1, $16 # E : dest++ + and $16, 7, $1 # E : Are we at 0mod8 yet? + ble $18, $nomoredata # U : done with the copy? + bne $1, $head_align # U : + +$both_0mod8: + cmple $18, 127, $1 # E : Can we unroll the loop? + bne $1, $no_unroll # U : + and $16, 63, $1 # E : get mod64 alignment + beq $1, $do_unroll # U : no single quads to fiddle + +$single_head_quad: + ldq $1, 0($17) # L : get 8 bytes + subq $18, 8, $18 # E : count -= 8 + addq $17, 8, $17 # E : src += 8 + nop # E : + + stq $1, 0($16) # L : store + addq $16, 8, $16 # E : dest += 8 + and $16, 63, $1 # E : get mod64 alignment + bne $1, $single_head_quad # U : still not fully aligned + +$do_unroll: + addq $16, 64, $7 # E : Initial (+1 trip) wh64 address + cmple $18, 127, $1 # E : Can we go through the unrolled loop? + bne $1, $tail_quads # U : Nope + nop # E : + +$unroll_body: + wh64 ($7) # L1 : memory subsystem hint: 64 bytes at + # ($7) are about to be over-written + ldq $6, 0($17) # L0 : bytes 0..7 + nop # E : + nop # E : + + ldq $4, 8($17) # L : bytes 8..15 + ldq $5, 16($17) # L : bytes 16..23 + addq $7, 64, $7 # E : Update next wh64 address + nop # E : + + ldq $3, 24($17) # L : bytes 24..31 + addq $16, 64, $1 # E : fallback value for wh64 + nop # E : + nop # E : + + addq $17, 32, $17 # E : src += 32 bytes + stq $6, 0($16) # L : bytes 0..7 + nop # E : + nop # E : + + stq $4, 8($16) # L : bytes 8..15 + stq $5, 16($16) # L : bytes 16..23 + subq $18, 192, $2 # E : At least two more trips to go? + nop # E : + + stq $3, 24($16) # L : bytes 24..31 + addq $16, 32, $16 # E : dest += 32 bytes + nop # E : + nop # E : + + ldq $6, 0($17) # L : bytes 0..7 + ldq $4, 8($17) # L : bytes 8..15 + cmovlt $2, $1, $7 # E : Latency 2, extra map slot - Use + # fallback wh64 address if < 2 more trips + nop # E : + + ldq $5, 16($17) # L : bytes 16..23 + ldq $3, 24($17) # L : bytes 24..31 + addq $16, 32, $16 # E : dest += 32 + subq $18, 64, $18 # E : count -= 64 + + addq $17, 32, $17 # E : src += 32 + stq $6, -32($16) # L : bytes 0..7 + stq $4, -24($16) # L : bytes 8..15 + cmple $18, 63, $1 # E : At least one more trip? + + stq $5, -16($16) # L : bytes 16..23 + stq $3, -8($16) # L : bytes 24..31 + nop # E : + beq $1, $unroll_body + +$tail_quads: +$no_unroll: + .align 4 + subq $18, 8, $18 # E : At least a quad left? + blt $18, $less_than_8 # U : Nope + nop # E : + nop # E : + +$move_a_quad: + ldq $1, 0($17) # L : fetch 8 + subq $18, 8, $18 # E : count -= 8 + addq $17, 8, $17 # E : src += 8 + nop # E : + + stq $1, 0($16) # L : store 8 + addq $16, 8, $16 # E : dest += 8 + bge $18, $move_a_quad # U : + nop # E : + +$less_than_8: + .align 4 + addq $18, 8, $18 # E : add back for trailing bytes + ble $18, $nomoredata # U : All-done + nop # E : + nop # E : + + /* Trailing bytes */ +$tail_bytes: + subq $18, 1, $18 # E : count-- + ldbu $1, 0($17) # L : fetch a byte + addq $17, 1, $17 # E : src++ + nop # E : + + stb $1, 0($16) # L : store a byte + addq $16, 1, $16 # E : dest++ + bgt $18, $tail_bytes # U : more to be done? + nop # E : + + /* branching to exit takes 3 extra cycles, so replicate exit here */ + ret $31, ($26), 1 # L0 : + nop # E : + nop # E : + nop # E : + +$misaligned: + mov $0, $4 # E : dest temp + and $0, 7, $1 # E : dest alignment mod8 + beq $1, $dest_0mod8 # U : life doesnt totally suck + nop + +$aligndest: + ble $18, $nomoredata # U : + ldbu $1, 0($17) # L : fetch a byte + subq $18, 1, $18 # E : count-- + addq $17, 1, $17 # E : src++ + + stb $1, 0($4) # L : store it + addq $4, 1, $4 # E : dest++ + and $4, 7, $1 # E : dest 0mod8 yet? + bne $1, $aligndest # U : go until we are aligned. + + /* Source has unknown alignment, but dest is known to be 0mod8 */ +$dest_0mod8: + subq $18, 8, $18 # E : At least a quad left? + blt $18, $misalign_tail # U : Nope + ldq_u $3, 0($17) # L : seed (rotating load) of 8 bytes + nop # E : + +$mis_quad: + ldq_u $16, 8($17) # L : Fetch next 8 + extql $3, $17, $3 # U : masking + extqh $16, $17, $1 # U : masking + bis $3, $1, $1 # E : merged bytes to store + + subq $18, 8, $18 # E : count -= 8 + addq $17, 8, $17 # E : src += 8 + stq $1, 0($4) # L : store 8 (aligned) + mov $16, $3 # E : "rotate" source data + + addq $4, 8, $4 # E : dest += 8 + bge $18, $mis_quad # U : More quads to move + nop + nop + +$misalign_tail: + addq $18, 8, $18 # E : account for tail stuff + ble $18, $nomoredata # U : + nop + nop + +$misalign_byte: + ldbu $1, 0($17) # L : fetch 1 + subq $18, 1, $18 # E : count-- + addq $17, 1, $17 # E : src++ + nop # E : + + stb $1, 0($4) # L : store + addq $4, 1, $4 # E : dest++ + bgt $18, $misalign_byte # U : more to go? + nop + + +$nomoredata: + ret $31, ($26), 1 # L0 : + nop # E : + nop # E : + nop # E : + + .end memcpy + +/* For backwards module compatibility. */ +__memcpy = memcpy +.globl __memcpy diff --git a/arch/alpha/lib/ev6-memset.S b/arch/alpha/lib/ev6-memset.S new file mode 100644 index 00000000..d8b94e1c --- /dev/null +++ b/arch/alpha/lib/ev6-memset.S @@ -0,0 +1,597 @@ +/* + * arch/alpha/lib/ev6-memset.S + * + * This is an efficient (and relatively small) implementation of the C library + * "memset()" function for the 21264 implementation of Alpha. + * + * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com> + * + * Much of the information about 21264 scheduling/coding comes from: + * Compiler Writer's Guide for the Alpha 21264 + * abbreviated as 'CWG' in other comments here + * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html + * Scheduling notation: + * E - either cluster + * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 + * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 + * The algorithm for the leading and trailing quadwords remains the same, + * however the loop has been unrolled to enable better memory throughput, + * and the code has been replicated for each of the entry points: __memset + * and __memsetw to permit better scheduling to eliminate the stalling + * encountered during the mask replication. + * A future enhancement might be to put in a byte store loop for really + * small (say < 32 bytes) memset()s. Whether or not that change would be + * a win in the kernel would depend upon the contextual usage. + * WARNING: Maintaining this is going to be more work than the above version, + * as fixes will need to be made in multiple places. The performance gain + * is worth it. + */ + + .set noat + .set noreorder +.text + .globl __memset + .globl __memsetw + .globl __constant_c_memset + .globl memset + + .ent __memset +.align 5 +__memset: + .frame $30,0,$26,0 + .prologue 0 + + /* + * Serious stalling happens. The only way to mitigate this is to + * undertake a major re-write to interleave the constant materialization + * with other parts of the fall-through code. This is important, even + * though it makes maintenance tougher. + * Do this later. + */ + and $17,255,$1 # E : 00000000000000ch + insbl $17,1,$2 # U : 000000000000ch00 + bis $16,$16,$0 # E : return value + ble $18,end_b # U : zero length requested? + + addq $18,$16,$6 # E : max address to write to + bis $1,$2,$17 # E : 000000000000chch + insbl $1,2,$3 # U : 0000000000ch0000 + insbl $1,3,$4 # U : 00000000ch000000 + + or $3,$4,$3 # E : 00000000chch0000 + inswl $17,4,$5 # U : 0000chch00000000 + xor $16,$6,$1 # E : will complete write be within one quadword? + inswl $17,6,$2 # U : chch000000000000 + + or $17,$3,$17 # E : 00000000chchchch + or $2,$5,$2 # E : chchchch00000000 + bic $1,7,$1 # E : fit within a single quadword? + and $16,7,$3 # E : Target addr misalignment + + or $17,$2,$17 # E : chchchchchchchch + beq $1,within_quad_b # U : + nop # E : + beq $3,aligned_b # U : target is 0mod8 + + /* + * Target address is misaligned, and won't fit within a quadword + */ + ldq_u $4,0($16) # L : Fetch first partial + bis $16,$16,$5 # E : Save the address + insql $17,$16,$2 # U : Insert new bytes + subq $3,8,$3 # E : Invert (for addressing uses) + + addq $18,$3,$18 # E : $18 is new count ($3 is negative) + mskql $4,$16,$4 # U : clear relevant parts of the quad + subq $16,$3,$16 # E : $16 is new aligned destination + bis $2,$4,$1 # E : Final bytes + + nop + stq_u $1,0($5) # L : Store result + nop + nop + +.align 4 +aligned_b: + /* + * We are now guaranteed to be quad aligned, with at least + * one partial quad to write. + */ + + sra $18,3,$3 # U : Number of remaining quads to write + and $18,7,$18 # E : Number of trailing bytes to write + bis $16,$16,$5 # E : Save dest address + beq $3,no_quad_b # U : tail stuff only + + /* + * it's worth the effort to unroll this and use wh64 if possible + * Lifted a bunch of code from clear_user.S + * At this point, entry values are: + * $16 Current destination address + * $5 A copy of $16 + * $6 The max quadword address to write to + * $18 Number trailer bytes + * $3 Number quads to write + */ + + and $16, 0x3f, $2 # E : Forward work (only useful for unrolled loop) + subq $3, 16, $4 # E : Only try to unroll if > 128 bytes + subq $2, 0x40, $1 # E : bias counter (aligning stuff 0mod64) + blt $4, loop_b # U : + + /* + * We know we've got at least 16 quads, minimum of one trip + * through unrolled loop. Do a quad at a time to get us 0mod64 + * aligned. + */ + + nop # E : + nop # E : + nop # E : + beq $1, $bigalign_b # U : + +$alignmod64_b: + stq $17, 0($5) # L : + subq $3, 1, $3 # E : For consistency later + addq $1, 8, $1 # E : Increment towards zero for alignment + addq $5, 8, $4 # E : Initial wh64 address (filler instruction) + + nop + nop + addq $5, 8, $5 # E : Inc address + blt $1, $alignmod64_b # U : + +$bigalign_b: + /* + * $3 - number quads left to go + * $5 - target address (aligned 0mod64) + * $17 - mask of stuff to store + * Scratch registers available: $7, $2, $4, $1 + * we know that we'll be taking a minimum of one trip through + * CWG Section 3.7.6: do not expect a sustained store rate of > 1/cycle + * Assumes the wh64 needs to be for 2 trips through the loop in the future + * The wh64 is issued on for the starting destination address for trip +2 + * through the loop, and if there are less than two trips left, the target + * address will be for the current trip. + */ + +$do_wh64_b: + wh64 ($4) # L1 : memory subsystem write hint + subq $3, 24, $2 # E : For determining future wh64 addresses + stq $17, 0($5) # L : + nop # E : + + addq $5, 128, $4 # E : speculative target of next wh64 + stq $17, 8($5) # L : + stq $17, 16($5) # L : + addq $5, 64, $7 # E : Fallback address for wh64 (== next trip addr) + + stq $17, 24($5) # L : + stq $17, 32($5) # L : + cmovlt $2, $7, $4 # E : Latency 2, extra mapping cycle + nop + + stq $17, 40($5) # L : + stq $17, 48($5) # L : + subq $3, 16, $2 # E : Repeat the loop at least once more? + nop + + stq $17, 56($5) # L : + addq $5, 64, $5 # E : + subq $3, 8, $3 # E : + bge $2, $do_wh64_b # U : + + nop + nop + nop + beq $3, no_quad_b # U : Might have finished already + +.align 4 + /* + * Simple loop for trailing quadwords, or for small amounts + * of data (where we can't use an unrolled loop and wh64) + */ +loop_b: + stq $17,0($5) # L : + subq $3,1,$3 # E : Decrement number quads left + addq $5,8,$5 # E : Inc address + bne $3,loop_b # U : more? + +no_quad_b: + /* + * Write 0..7 trailing bytes. + */ + nop # E : + beq $18,end_b # U : All done? + ldq $7,0($5) # L : + mskqh $7,$6,$2 # U : Mask final quad + + insqh $17,$6,$4 # U : New bits + bis $2,$4,$1 # E : Put it all together + stq $1,0($5) # L : And back to memory + ret $31,($26),1 # L0 : + +within_quad_b: + ldq_u $1,0($16) # L : + insql $17,$16,$2 # U : New bits + mskql $1,$16,$4 # U : Clear old + bis $2,$4,$2 # E : New result + + mskql $2,$6,$4 # U : + mskqh $1,$6,$2 # U : + bis $2,$4,$1 # E : + stq_u $1,0($16) # L : + +end_b: + nop + nop + nop + ret $31,($26),1 # L0 : + .end __memset + + /* + * This is the original body of code, prior to replication and + * rescheduling. Leave it here, as there may be calls to this + * entry point. + */ +.align 4 + .ent __constant_c_memset +__constant_c_memset: + .frame $30,0,$26,0 + .prologue 0 + + addq $18,$16,$6 # E : max address to write to + bis $16,$16,$0 # E : return value + xor $16,$6,$1 # E : will complete write be within one quadword? + ble $18,end # U : zero length requested? + + bic $1,7,$1 # E : fit within a single quadword + beq $1,within_one_quad # U : + and $16,7,$3 # E : Target addr misalignment + beq $3,aligned # U : target is 0mod8 + + /* + * Target address is misaligned, and won't fit within a quadword + */ + ldq_u $4,0($16) # L : Fetch first partial + bis $16,$16,$5 # E : Save the address + insql $17,$16,$2 # U : Insert new bytes + subq $3,8,$3 # E : Invert (for addressing uses) + + addq $18,$3,$18 # E : $18 is new count ($3 is negative) + mskql $4,$16,$4 # U : clear relevant parts of the quad + subq $16,$3,$16 # E : $16 is new aligned destination + bis $2,$4,$1 # E : Final bytes + + nop + stq_u $1,0($5) # L : Store result + nop + nop + +.align 4 +aligned: + /* + * We are now guaranteed to be quad aligned, with at least + * one partial quad to write. + */ + + sra $18,3,$3 # U : Number of remaining quads to write + and $18,7,$18 # E : Number of trailing bytes to write + bis $16,$16,$5 # E : Save dest address + beq $3,no_quad # U : tail stuff only + + /* + * it's worth the effort to unroll this and use wh64 if possible + * Lifted a bunch of code from clear_user.S + * At this point, entry values are: + * $16 Current destination address + * $5 A copy of $16 + * $6 The max quadword address to write to + * $18 Number trailer bytes + * $3 Number quads to write + */ + + and $16, 0x3f, $2 # E : Forward work (only useful for unrolled loop) + subq $3, 16, $4 # E : Only try to unroll if > 128 bytes + subq $2, 0x40, $1 # E : bias counter (aligning stuff 0mod64) + blt $4, loop # U : + + /* + * We know we've got at least 16 quads, minimum of one trip + * through unrolled loop. Do a quad at a time to get us 0mod64 + * aligned. + */ + + nop # E : + nop # E : + nop # E : + beq $1, $bigalign # U : + +$alignmod64: + stq $17, 0($5) # L : + subq $3, 1, $3 # E : For consistency later + addq $1, 8, $1 # E : Increment towards zero for alignment + addq $5, 8, $4 # E : Initial wh64 address (filler instruction) + + nop + nop + addq $5, 8, $5 # E : Inc address + blt $1, $alignmod64 # U : + +$bigalign: + /* + * $3 - number quads left to go + * $5 - target address (aligned 0mod64) + * $17 - mask of stuff to store + * Scratch registers available: $7, $2, $4, $1 + * we know that we'll be taking a minimum of one trip through + * CWG Section 3.7.6: do not expect a sustained store rate of > 1/cycle + * Assumes the wh64 needs to be for 2 trips through the loop in the future + * The wh64 is issued on for the starting destination address for trip +2 + * through the loop, and if there are less than two trips left, the target + * address will be for the current trip. + */ + +$do_wh64: + wh64 ($4) # L1 : memory subsystem write hint + subq $3, 24, $2 # E : For determining future wh64 addresses + stq $17, 0($5) # L : + nop # E : + + addq $5, 128, $4 # E : speculative target of next wh64 + stq $17, 8($5) # L : + stq $17, 16($5) # L : + addq $5, 64, $7 # E : Fallback address for wh64 (== next trip addr) + + stq $17, 24($5) # L : + stq $17, 32($5) # L : + cmovlt $2, $7, $4 # E : Latency 2, extra mapping cycle + nop + + stq $17, 40($5) # L : + stq $17, 48($5) # L : + subq $3, 16, $2 # E : Repeat the loop at least once more? + nop + + stq $17, 56($5) # L : + addq $5, 64, $5 # E : + subq $3, 8, $3 # E : + bge $2, $do_wh64 # U : + + nop + nop + nop + beq $3, no_quad # U : Might have finished already + +.align 4 + /* + * Simple loop for trailing quadwords, or for small amounts + * of data (where we can't use an unrolled loop and wh64) + */ +loop: + stq $17,0($5) # L : + subq $3,1,$3 # E : Decrement number quads left + addq $5,8,$5 # E : Inc address + bne $3,loop # U : more? + +no_quad: + /* + * Write 0..7 trailing bytes. + */ + nop # E : + beq $18,end # U : All done? + ldq $7,0($5) # L : + mskqh $7,$6,$2 # U : Mask final quad + + insqh $17,$6,$4 # U : New bits + bis $2,$4,$1 # E : Put it all together + stq $1,0($5) # L : And back to memory + ret $31,($26),1 # L0 : + +within_one_quad: + ldq_u $1,0($16) # L : + insql $17,$16,$2 # U : New bits + mskql $1,$16,$4 # U : Clear old + bis $2,$4,$2 # E : New result + + mskql $2,$6,$4 # U : + mskqh $1,$6,$2 # U : + bis $2,$4,$1 # E : + stq_u $1,0($16) # L : + +end: + nop + nop + nop + ret $31,($26),1 # L0 : + .end __constant_c_memset + + /* + * This is a replicant of the __constant_c_memset code, rescheduled + * to mask stalls. Note that entry point names also had to change + */ + .align 5 + .ent __memsetw + +__memsetw: + .frame $30,0,$26,0 + .prologue 0 + + inswl $17,0,$5 # U : 000000000000c1c2 + inswl $17,2,$2 # U : 00000000c1c20000 + bis $16,$16,$0 # E : return value + addq $18,$16,$6 # E : max address to write to + + ble $18, end_w # U : zero length requested? + inswl $17,4,$3 # U : 0000c1c200000000 + inswl $17,6,$4 # U : c1c2000000000000 + xor $16,$6,$1 # E : will complete write be within one quadword? + + or $2,$5,$2 # E : 00000000c1c2c1c2 + or $3,$4,$17 # E : c1c2c1c200000000 + bic $1,7,$1 # E : fit within a single quadword + and $16,7,$3 # E : Target addr misalignment + + or $17,$2,$17 # E : c1c2c1c2c1c2c1c2 + beq $1,within_quad_w # U : + nop + beq $3,aligned_w # U : target is 0mod8 + + /* + * Target address is misaligned, and won't fit within a quadword + */ + ldq_u $4,0($16) # L : Fetch first partial + bis $16,$16,$5 # E : Save the address + insql $17,$16,$2 # U : Insert new bytes + subq $3,8,$3 # E : Invert (for addressing uses) + + addq $18,$3,$18 # E : $18 is new count ($3 is negative) + mskql $4,$16,$4 # U : clear relevant parts of the quad + subq $16,$3,$16 # E : $16 is new aligned destination + bis $2,$4,$1 # E : Final bytes + + nop + stq_u $1,0($5) # L : Store result + nop + nop + +.align 4 +aligned_w: + /* + * We are now guaranteed to be quad aligned, with at least + * one partial quad to write. + */ + + sra $18,3,$3 # U : Number of remaining quads to write + and $18,7,$18 # E : Number of trailing bytes to write + bis $16,$16,$5 # E : Save dest address + beq $3,no_quad_w # U : tail stuff only + + /* + * it's worth the effort to unroll this and use wh64 if possible + * Lifted a bunch of code from clear_user.S + * At this point, entry values are: + * $16 Current destination address + * $5 A copy of $16 + * $6 The max quadword address to write to + * $18 Number trailer bytes + * $3 Number quads to write + */ + + and $16, 0x3f, $2 # E : Forward work (only useful for unrolled loop) + subq $3, 16, $4 # E : Only try to unroll if > 128 bytes + subq $2, 0x40, $1 # E : bias counter (aligning stuff 0mod64) + blt $4, loop_w # U : + + /* + * We know we've got at least 16 quads, minimum of one trip + * through unrolled loop. Do a quad at a time to get us 0mod64 + * aligned. + */ + + nop # E : + nop # E : + nop # E : + beq $1, $bigalign_w # U : + +$alignmod64_w: + stq $17, 0($5) # L : + subq $3, 1, $3 # E : For consistency later + addq $1, 8, $1 # E : Increment towards zero for alignment + addq $5, 8, $4 # E : Initial wh64 address (filler instruction) + + nop + nop + addq $5, 8, $5 # E : Inc address + blt $1, $alignmod64_w # U : + +$bigalign_w: + /* + * $3 - number quads left to go + * $5 - target address (aligned 0mod64) + * $17 - mask of stuff to store + * Scratch registers available: $7, $2, $4, $1 + * we know that we'll be taking a minimum of one trip through + * CWG Section 3.7.6: do not expect a sustained store rate of > 1/cycle + * Assumes the wh64 needs to be for 2 trips through the loop in the future + * The wh64 is issued on for the starting destination address for trip +2 + * through the loop, and if there are less than two trips left, the target + * address will be for the current trip. + */ + +$do_wh64_w: + wh64 ($4) # L1 : memory subsystem write hint + subq $3, 24, $2 # E : For determining future wh64 addresses + stq $17, 0($5) # L : + nop # E : + + addq $5, 128, $4 # E : speculative target of next wh64 + stq $17, 8($5) # L : + stq $17, 16($5) # L : + addq $5, 64, $7 # E : Fallback address for wh64 (== next trip addr) + + stq $17, 24($5) # L : + stq $17, 32($5) # L : + cmovlt $2, $7, $4 # E : Latency 2, extra mapping cycle + nop + + stq $17, 40($5) # L : + stq $17, 48($5) # L : + subq $3, 16, $2 # E : Repeat the loop at least once more? + nop + + stq $17, 56($5) # L : + addq $5, 64, $5 # E : + subq $3, 8, $3 # E : + bge $2, $do_wh64_w # U : + + nop + nop + nop + beq $3, no_quad_w # U : Might have finished already + +.align 4 + /* + * Simple loop for trailing quadwords, or for small amounts + * of data (where we can't use an unrolled loop and wh64) + */ +loop_w: + stq $17,0($5) # L : + subq $3,1,$3 # E : Decrement number quads left + addq $5,8,$5 # E : Inc address + bne $3,loop_w # U : more? + +no_quad_w: + /* + * Write 0..7 trailing bytes. + */ + nop # E : + beq $18,end_w # U : All done? + ldq $7,0($5) # L : + mskqh $7,$6,$2 # U : Mask final quad + + insqh $17,$6,$4 # U : New bits + bis $2,$4,$1 # E : Put it all together + stq $1,0($5) # L : And back to memory + ret $31,($26),1 # L0 : + +within_quad_w: + ldq_u $1,0($16) # L : + insql $17,$16,$2 # U : New bits + mskql $1,$16,$4 # U : Clear old + bis $2,$4,$2 # E : New result + + mskql $2,$6,$4 # U : + mskqh $1,$6,$2 # U : + bis $2,$4,$1 # E : + stq_u $1,0($16) # L : + +end_w: + nop + nop + nop + ret $31,($26),1 # L0 : + + .end __memsetw + +memset = __memset diff --git a/arch/alpha/lib/ev6-strncpy_from_user.S b/arch/alpha/lib/ev6-strncpy_from_user.S new file mode 100644 index 00000000..d2e28178 --- /dev/null +++ b/arch/alpha/lib/ev6-strncpy_from_user.S @@ -0,0 +1,424 @@ +/* + * arch/alpha/lib/ev6-strncpy_from_user.S + * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com> + * + * Just like strncpy except in the return value: + * + * -EFAULT if an exception occurs before the terminator is copied. + * N if the buffer filled. + * + * Otherwise the length of the string is returned. + * + * Much of the information about 21264 scheduling/coding comes from: + * Compiler Writer's Guide for the Alpha 21264 + * abbreviated as 'CWG' in other comments here + * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html + * Scheduling notation: + * E - either cluster + * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 + * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 + * A bunch of instructions got moved and temp registers were changed + * to aid in scheduling. Control flow was also re-arranged to eliminate + * branches, and to provide longer code sequences to enable better scheduling. + * A total rewrite (using byte load/stores for start & tail sequences) + * is desirable, but very difficult to do without a from-scratch rewrite. + * Save that for the future. + */ + + +#include <asm/errno.h> +#include <asm/regdef.h> + + +/* Allow an exception for an insn; exit if we get one. */ +#define EX(x,y...) \ + 99: x,##y; \ + .section __ex_table,"a"; \ + .long 99b - .; \ + lda $31, $exception-99b($0); \ + .previous + + + .set noat + .set noreorder + .text + + .globl __strncpy_from_user + .ent __strncpy_from_user + .frame $30, 0, $26 + .prologue 0 + + .align 4 +__strncpy_from_user: + and a0, 7, t3 # E : find dest misalignment + beq a2, $zerolength # U : + + /* Are source and destination co-aligned? */ + mov a0, v0 # E : save the string start + xor a0, a1, t4 # E : + EX( ldq_u t1, 0(a1) ) # L : Latency=3 load first quadword + ldq_u t0, 0(a0) # L : load first (partial) aligned dest quadword + + addq a2, t3, a2 # E : bias count by dest misalignment + subq a2, 1, a3 # E : + addq zero, 1, t10 # E : + and t4, 7, t4 # E : misalignment between the two + + and a3, 7, t6 # E : number of tail bytes + sll t10, t6, t10 # E : t10 = bitmask of last count byte + bne t4, $unaligned # U : + lda t2, -1 # E : build a mask against false zero + + /* + * We are co-aligned; take care of a partial first word. + * On entry to this basic block: + * t0 == the first destination word for masking back in + * t1 == the first source word. + */ + + srl a3, 3, a2 # E : a2 = loop counter = (count - 1)/8 + addq a1, 8, a1 # E : + mskqh t2, a1, t2 # U : detection in the src word + nop + + /* Create the 1st output word and detect 0's in the 1st input word. */ + mskqh t1, a1, t3 # U : + mskql t0, a1, t0 # U : assemble the first output word + ornot t1, t2, t2 # E : + nop + + cmpbge zero, t2, t8 # E : bits set iff null found + or t0, t3, t0 # E : + beq a2, $a_eoc # U : + bne t8, $a_eos # U : 2nd branch in a quad. Bad. + + /* On entry to this basic block: + * t0 == a source quad not containing a null. + * a0 - current aligned destination address + * a1 - current aligned source address + * a2 - count of quadwords to move. + * NOTE: Loop improvement - unrolling this is going to be + * a huge win, since we're going to stall otherwise. + * Fix this later. For _really_ large copies, look + * at using wh64 on a look-ahead basis. See the code + * in clear_user.S and copy_user.S. + * Presumably, since (a0) and (a1) do not overlap (by C definition) + * Lots of nops here: + * - Separate loads from stores + * - Keep it to 1 branch/quadpack so the branch predictor + * can train. + */ +$a_loop: + stq_u t0, 0(a0) # L : + addq a0, 8, a0 # E : + nop + subq a2, 1, a2 # E : + + EX( ldq_u t0, 0(a1) ) # L : + addq a1, 8, a1 # E : + cmpbge zero, t0, t8 # E : Stall 2 cycles on t0 + beq a2, $a_eoc # U : + + beq t8, $a_loop # U : + nop + nop + nop + + /* Take care of the final (partial) word store. At this point + * the end-of-count bit is set in t8 iff it applies. + * + * On entry to this basic block we have: + * t0 == the source word containing the null + * t8 == the cmpbge mask that found it. + */ +$a_eos: + negq t8, t12 # E : find low bit set + and t8, t12, t12 # E : + + /* We're doing a partial word store and so need to combine + our source and original destination words. */ + ldq_u t1, 0(a0) # L : + subq t12, 1, t6 # E : + + or t12, t6, t8 # E : + zapnot t0, t8, t0 # U : clear src bytes > null + zap t1, t8, t1 # U : clear dst bytes <= null + or t0, t1, t0 # E : + + stq_u t0, 0(a0) # L : + br $finish_up # L0 : + nop + nop + + /* Add the end-of-count bit to the eos detection bitmask. */ + .align 4 +$a_eoc: + or t10, t8, t8 + br $a_eos + nop + nop + + +/* The source and destination are not co-aligned. Align the destination + and cope. We have to be very careful about not reading too much and + causing a SEGV. */ + + .align 4 +$u_head: + /* We know just enough now to be able to assemble the first + full source word. We can still find a zero at the end of it + that prevents us from outputting the whole thing. + + On entry to this basic block: + t0 == the first dest word, unmasked + t1 == the shifted low bits of the first source word + t6 == bytemask that is -1 in dest word bytes */ + + EX( ldq_u t2, 8(a1) ) # L : load second src word + addq a1, 8, a1 # E : + mskql t0, a0, t0 # U : mask trailing garbage in dst + extqh t2, a1, t4 # U : + + or t1, t4, t1 # E : first aligned src word complete + mskqh t1, a0, t1 # U : mask leading garbage in src + or t0, t1, t0 # E : first output word complete + or t0, t6, t6 # E : mask original data for zero test + + cmpbge zero, t6, t8 # E : + beq a2, $u_eocfin # U : + bne t8, $u_final # U : bad news - 2nd branch in a quad + lda t6, -1 # E : mask out the bits we have + + mskql t6, a1, t6 # U : already seen + stq_u t0, 0(a0) # L : store first output word + or t6, t2, t2 # E : + cmpbge zero, t2, t8 # E : find nulls in second partial + + addq a0, 8, a0 # E : + subq a2, 1, a2 # E : + bne t8, $u_late_head_exit # U : + nop + + /* Finally, we've got all the stupid leading edge cases taken care + of and we can set up to enter the main loop. */ + + extql t2, a1, t1 # U : position hi-bits of lo word + EX( ldq_u t2, 8(a1) ) # L : read next high-order source word + addq a1, 8, a1 # E : + cmpbge zero, t2, t8 # E : + + beq a2, $u_eoc # U : + bne t8, $u_eos # U : + nop + nop + + /* Unaligned copy main loop. In order to avoid reading too much, + the loop is structured to detect zeros in aligned source words. + This has, unfortunately, effectively pulled half of a loop + iteration out into the head and half into the tail, but it does + prevent nastiness from accumulating in the very thing we want + to run as fast as possible. + + On entry to this basic block: + t1 == the shifted high-order bits from the previous source word + t2 == the unshifted current source word + + We further know that t2 does not contain a null terminator. */ + + /* + * Extra nops here: + * separate load quads from store quads + * only one branch/quad to permit predictor training + */ + + .align 4 +$u_loop: + extqh t2, a1, t0 # U : extract high bits for current word + addq a1, 8, a1 # E : + extql t2, a1, t3 # U : extract low bits for next time + addq a0, 8, a0 # E : + + or t0, t1, t0 # E : current dst word now complete + EX( ldq_u t2, 0(a1) ) # L : load high word for next time + subq a2, 1, a2 # E : + nop + + stq_u t0, -8(a0) # L : save the current word + mov t3, t1 # E : + cmpbge zero, t2, t8 # E : test new word for eos + beq a2, $u_eoc # U : + + beq t8, $u_loop # U : + nop + nop + nop + + /* We've found a zero somewhere in the source word we just read. + If it resides in the lower half, we have one (probably partial) + word to write out, and if it resides in the upper half, we + have one full and one partial word left to write out. + + On entry to this basic block: + t1 == the shifted high-order bits from the previous source word + t2 == the unshifted current source word. */ + .align 4 +$u_eos: + extqh t2, a1, t0 # U : + or t0, t1, t0 # E : first (partial) source word complete + cmpbge zero, t0, t8 # E : is the null in this first bit? + nop + + bne t8, $u_final # U : + stq_u t0, 0(a0) # L : the null was in the high-order bits + addq a0, 8, a0 # E : + subq a2, 1, a2 # E : + + .align 4 +$u_late_head_exit: + extql t2, a1, t0 # U : + cmpbge zero, t0, t8 # E : + or t8, t10, t6 # E : + cmoveq a2, t6, t8 # E : + + /* Take care of a final (probably partial) result word. + On entry to this basic block: + t0 == assembled source word + t8 == cmpbge mask that found the null. */ + .align 4 +$u_final: + negq t8, t6 # E : isolate low bit set + and t6, t8, t12 # E : + ldq_u t1, 0(a0) # L : + subq t12, 1, t6 # E : + + or t6, t12, t8 # E : + zapnot t0, t8, t0 # U : kill source bytes > null + zap t1, t8, t1 # U : kill dest bytes <= null + or t0, t1, t0 # E : + + stq_u t0, 0(a0) # E : + br $finish_up # U : + nop + nop + + .align 4 +$u_eoc: # end-of-count + extqh t2, a1, t0 # U : + or t0, t1, t0 # E : + cmpbge zero, t0, t8 # E : + nop + + .align 4 +$u_eocfin: # end-of-count, final word + or t10, t8, t8 # E : + br $u_final # U : + nop + nop + + /* Unaligned copy entry point. */ + .align 4 +$unaligned: + + srl a3, 3, a2 # U : a2 = loop counter = (count - 1)/8 + and a0, 7, t4 # E : find dest misalignment + and a1, 7, t5 # E : find src misalignment + mov zero, t0 # E : + + /* Conditionally load the first destination word and a bytemask + with 0xff indicating that the destination byte is sacrosanct. */ + + mov zero, t6 # E : + beq t4, 1f # U : + ldq_u t0, 0(a0) # L : + lda t6, -1 # E : + + mskql t6, a0, t6 # E : + nop + nop + nop + + .align 4 +1: + subq a1, t4, a1 # E : sub dest misalignment from src addr + /* If source misalignment is larger than dest misalignment, we need + extra startup checks to avoid SEGV. */ + cmplt t4, t5, t12 # E : + extql t1, a1, t1 # U : shift src into place + lda t2, -1 # E : for creating masks later + + beq t12, $u_head # U : + mskqh t2, t5, t2 # U : begin src byte validity mask + cmpbge zero, t1, t8 # E : is there a zero? + nop + + extql t2, a1, t2 # U : + or t8, t10, t5 # E : test for end-of-count too + cmpbge zero, t2, t3 # E : + cmoveq a2, t5, t8 # E : Latency=2, extra map slot + + nop # E : goes with cmov + andnot t8, t3, t8 # E : + beq t8, $u_head # U : + nop + + /* At this point we've found a zero in the first partial word of + the source. We need to isolate the valid source data and mask + it into the original destination data. (Incidentally, we know + that we'll need at least one byte of that original dest word.) */ + + ldq_u t0, 0(a0) # L : + negq t8, t6 # E : build bitmask of bytes <= zero + mskqh t1, t4, t1 # U : + and t6, t8, t12 # E : + + subq t12, 1, t6 # E : + or t6, t12, t8 # E : + zapnot t2, t8, t2 # U : prepare source word; mirror changes + zapnot t1, t8, t1 # U : to source validity mask + + andnot t0, t2, t0 # E : zero place for source to reside + or t0, t1, t0 # E : and put it there + stq_u t0, 0(a0) # L : + nop + + .align 4 +$finish_up: + zapnot t0, t12, t4 # U : was last byte written null? + and t12, 0xf0, t3 # E : binary search for the address of the + cmovne t4, 1, t4 # E : Latency=2, extra map slot + nop # E : with cmovne + + and t12, 0xcc, t2 # E : last byte written + and t12, 0xaa, t1 # E : + cmovne t3, 4, t3 # E : Latency=2, extra map slot + nop # E : with cmovne + + bic a0, 7, t0 + cmovne t2, 2, t2 # E : Latency=2, extra map slot + nop # E : with cmovne + nop + + cmovne t1, 1, t1 # E : Latency=2, extra map slot + nop # E : with cmovne + addq t0, t3, t0 # E : + addq t1, t2, t1 # E : + + addq t0, t1, t0 # E : + addq t0, t4, t0 # add one if we filled the buffer + subq t0, v0, v0 # find string length + ret # L0 : + + .align 4 +$zerolength: + nop + nop + nop + clr v0 + +$exception: + nop + nop + nop + ret + + .end __strncpy_from_user diff --git a/arch/alpha/lib/ev6-stxcpy.S b/arch/alpha/lib/ev6-stxcpy.S new file mode 100644 index 00000000..4643ff2f --- /dev/null +++ b/arch/alpha/lib/ev6-stxcpy.S @@ -0,0 +1,321 @@ +/* + * arch/alpha/lib/ev6-stxcpy.S + * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com> + * + * Copy a null-terminated string from SRC to DST. + * + * This is an internal routine used by strcpy, stpcpy, and strcat. + * As such, it uses special linkage conventions to make implementation + * of these public functions more efficient. + * + * On input: + * t9 = return address + * a0 = DST + * a1 = SRC + * + * On output: + * t12 = bitmask (with one bit set) indicating the last byte written + * a0 = unaligned address of the last *word* written + * + * Furthermore, v0, a3-a5, t11, and t12 are untouched. + * + * Much of the information about 21264 scheduling/coding comes from: + * Compiler Writer's Guide for the Alpha 21264 + * abbreviated as 'CWG' in other comments here + * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html + * Scheduling notation: + * E - either cluster + * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 + * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 + * Try not to change the actual algorithm if possible for consistency. + */ + +#include <asm/regdef.h> + + .set noat + .set noreorder + + .text + +/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that + doesn't like putting the entry point for a procedure somewhere in the + middle of the procedure descriptor. Work around this by putting the + aligned copy in its own procedure descriptor */ + + + .ent stxcpy_aligned + .align 4 +stxcpy_aligned: + .frame sp, 0, t9 + .prologue 0 + + /* On entry to this basic block: + t0 == the first destination word for masking back in + t1 == the first source word. */ + + /* Create the 1st output word and detect 0's in the 1st input word. */ + lda t2, -1 # E : build a mask against false zero + mskqh t2, a1, t2 # U : detection in the src word (stall) + mskqh t1, a1, t3 # U : + ornot t1, t2, t2 # E : (stall) + + mskql t0, a1, t0 # U : assemble the first output word + cmpbge zero, t2, t8 # E : bits set iff null found + or t0, t3, t1 # E : (stall) + bne t8, $a_eos # U : (stall) + + /* On entry to this basic block: + t0 == the first destination word for masking back in + t1 == a source word not containing a null. */ + /* Nops here to separate store quads from load quads */ + +$a_loop: + stq_u t1, 0(a0) # L : + addq a0, 8, a0 # E : + nop + nop + + ldq_u t1, 0(a1) # L : Latency=3 + addq a1, 8, a1 # E : + cmpbge zero, t1, t8 # E : (3 cycle stall) + beq t8, $a_loop # U : (stall for t8) + + /* Take care of the final (partial) word store. + On entry to this basic block we have: + t1 == the source word containing the null + t8 == the cmpbge mask that found it. */ +$a_eos: + negq t8, t6 # E : find low bit set + and t8, t6, t12 # E : (stall) + /* For the sake of the cache, don't read a destination word + if we're not going to need it. */ + and t12, 0x80, t6 # E : (stall) + bne t6, 1f # U : (stall) + + /* We're doing a partial word store and so need to combine + our source and original destination words. */ + ldq_u t0, 0(a0) # L : Latency=3 + subq t12, 1, t6 # E : + zapnot t1, t6, t1 # U : clear src bytes >= null (stall) + or t12, t6, t8 # E : (stall) + + zap t0, t8, t0 # E : clear dst bytes <= null + or t0, t1, t1 # E : (stall) + nop + nop + +1: stq_u t1, 0(a0) # L : + ret (t9) # L0 : Latency=3 + nop + nop + + .end stxcpy_aligned + + .align 4 + .ent __stxcpy + .globl __stxcpy +__stxcpy: + .frame sp, 0, t9 + .prologue 0 + + /* Are source and destination co-aligned? */ + xor a0, a1, t0 # E : + unop # E : + and t0, 7, t0 # E : (stall) + bne t0, $unaligned # U : (stall) + + /* We are co-aligned; take care of a partial first word. */ + ldq_u t1, 0(a1) # L : load first src word + and a0, 7, t0 # E : take care not to load a word ... + addq a1, 8, a1 # E : + beq t0, stxcpy_aligned # U : ... if we wont need it (stall) + + ldq_u t0, 0(a0) # L : + br stxcpy_aligned # L0 : Latency=3 + nop + nop + + +/* The source and destination are not co-aligned. Align the destination + and cope. We have to be very careful about not reading too much and + causing a SEGV. */ + + .align 4 +$u_head: + /* We know just enough now to be able to assemble the first + full source word. We can still find a zero at the end of it + that prevents us from outputting the whole thing. + + On entry to this basic block: + t0 == the first dest word, for masking back in, if needed else 0 + t1 == the low bits of the first source word + t6 == bytemask that is -1 in dest word bytes */ + + ldq_u t2, 8(a1) # L : + addq a1, 8, a1 # E : + extql t1, a1, t1 # U : (stall on a1) + extqh t2, a1, t4 # U : (stall on a1) + + mskql t0, a0, t0 # U : + or t1, t4, t1 # E : + mskqh t1, a0, t1 # U : (stall on t1) + or t0, t1, t1 # E : (stall on t1) + + or t1, t6, t6 # E : + cmpbge zero, t6, t8 # E : (stall) + lda t6, -1 # E : for masking just below + bne t8, $u_final # U : (stall) + + mskql t6, a1, t6 # U : mask out the bits we have + or t6, t2, t2 # E : already extracted before (stall) + cmpbge zero, t2, t8 # E : testing eos (stall) + bne t8, $u_late_head_exit # U : (stall) + + /* Finally, we've got all the stupid leading edge cases taken care + of and we can set up to enter the main loop. */ + + stq_u t1, 0(a0) # L : store first output word + addq a0, 8, a0 # E : + extql t2, a1, t0 # U : position ho-bits of lo word + ldq_u t2, 8(a1) # U : read next high-order source word + + addq a1, 8, a1 # E : + cmpbge zero, t2, t8 # E : (stall for t2) + nop # E : + bne t8, $u_eos # U : (stall) + + /* Unaligned copy main loop. In order to avoid reading too much, + the loop is structured to detect zeros in aligned source words. + This has, unfortunately, effectively pulled half of a loop + iteration out into the head and half into the tail, but it does + prevent nastiness from accumulating in the very thing we want + to run as fast as possible. + + On entry to this basic block: + t0 == the shifted high-order bits from the previous source word + t2 == the unshifted current source word + + We further know that t2 does not contain a null terminator. */ + + .align 3 +$u_loop: + extqh t2, a1, t1 # U : extract high bits for current word + addq a1, 8, a1 # E : (stall) + extql t2, a1, t3 # U : extract low bits for next time (stall) + addq a0, 8, a0 # E : + + or t0, t1, t1 # E : current dst word now complete + ldq_u t2, 0(a1) # L : Latency=3 load high word for next time + stq_u t1, -8(a0) # L : save the current word (stall) + mov t3, t0 # E : + + cmpbge zero, t2, t8 # E : test new word for eos + beq t8, $u_loop # U : (stall) + nop + nop + + /* We've found a zero somewhere in the source word we just read. + If it resides in the lower half, we have one (probably partial) + word to write out, and if it resides in the upper half, we + have one full and one partial word left to write out. + + On entry to this basic block: + t0 == the shifted high-order bits from the previous source word + t2 == the unshifted current source word. */ +$u_eos: + extqh t2, a1, t1 # U : + or t0, t1, t1 # E : first (partial) source word complete (stall) + cmpbge zero, t1, t8 # E : is the null in this first bit? (stall) + bne t8, $u_final # U : (stall) + +$u_late_head_exit: + stq_u t1, 0(a0) # L : the null was in the high-order bits + addq a0, 8, a0 # E : + extql t2, a1, t1 # U : + cmpbge zero, t1, t8 # E : (stall) + + /* Take care of a final (probably partial) result word. + On entry to this basic block: + t1 == assembled source word + t8 == cmpbge mask that found the null. */ +$u_final: + negq t8, t6 # E : isolate low bit set + and t6, t8, t12 # E : (stall) + and t12, 0x80, t6 # E : avoid dest word load if we can (stall) + bne t6, 1f # U : (stall) + + ldq_u t0, 0(a0) # E : + subq t12, 1, t6 # E : + or t6, t12, t8 # E : (stall) + zapnot t1, t6, t1 # U : kill source bytes >= null (stall) + + zap t0, t8, t0 # U : kill dest bytes <= null (2 cycle data stall) + or t0, t1, t1 # E : (stall) + nop + nop + +1: stq_u t1, 0(a0) # L : + ret (t9) # L0 : Latency=3 + nop + nop + + /* Unaligned copy entry point. */ + .align 4 +$unaligned: + + ldq_u t1, 0(a1) # L : load first source word + and a0, 7, t4 # E : find dest misalignment + and a1, 7, t5 # E : find src misalignment + /* Conditionally load the first destination word and a bytemask + with 0xff indicating that the destination byte is sacrosanct. */ + mov zero, t0 # E : + + mov zero, t6 # E : + beq t4, 1f # U : + ldq_u t0, 0(a0) # L : + lda t6, -1 # E : + + mskql t6, a0, t6 # U : + nop + nop + nop +1: + subq a1, t4, a1 # E : sub dest misalignment from src addr + /* If source misalignment is larger than dest misalignment, we need + extra startup checks to avoid SEGV. */ + cmplt t4, t5, t12 # E : + beq t12, $u_head # U : + lda t2, -1 # E : mask out leading garbage in source + + mskqh t2, t5, t2 # U : + ornot t1, t2, t3 # E : (stall) + cmpbge zero, t3, t8 # E : is there a zero? (stall) + beq t8, $u_head # U : (stall) + + /* At this point we've found a zero in the first partial word of + the source. We need to isolate the valid source data and mask + it into the original destination data. (Incidentally, we know + that we'll need at least one byte of that original dest word.) */ + + ldq_u t0, 0(a0) # L : + negq t8, t6 # E : build bitmask of bytes <= zero + and t6, t8, t12 # E : (stall) + and a1, 7, t5 # E : + + subq t12, 1, t6 # E : + or t6, t12, t8 # E : (stall) + srl t12, t5, t12 # U : adjust final null return value + zapnot t2, t8, t2 # U : prepare source word; mirror changes (stall) + + and t1, t2, t1 # E : to source validity mask + extql t2, a1, t2 # U : + extql t1, a1, t1 # U : (stall) + andnot t0, t2, t0 # .. e1 : zero place for source to reside (stall) + + or t0, t1, t1 # e1 : and put it there + stq_u t1, 0(a0) # .. e0 : (stall) + ret (t9) # e1 : + nop + + .end __stxcpy + diff --git a/arch/alpha/lib/ev6-stxncpy.S b/arch/alpha/lib/ev6-stxncpy.S new file mode 100644 index 00000000..1aa6e97e --- /dev/null +++ b/arch/alpha/lib/ev6-stxncpy.S @@ -0,0 +1,397 @@ +/* + * arch/alpha/lib/ev6-stxncpy.S + * 21264 version contributed by Rick Gorton <rick.gorton@api-networks.com> + * + * Copy no more than COUNT bytes of the null-terminated string from + * SRC to DST. + * + * This is an internal routine used by strncpy, stpncpy, and strncat. + * As such, it uses special linkage conventions to make implementation + * of these public functions more efficient. + * + * On input: + * t9 = return address + * a0 = DST + * a1 = SRC + * a2 = COUNT + * + * Furthermore, COUNT may not be zero. + * + * On output: + * t0 = last word written + * t10 = bitmask (with one bit set) indicating the byte position of + * the end of the range specified by COUNT + * t12 = bitmask (with one bit set) indicating the last byte written + * a0 = unaligned address of the last *word* written + * a2 = the number of full words left in COUNT + * + * Furthermore, v0, a3-a5, t11, and $at are untouched. + * + * Much of the information about 21264 scheduling/coding comes from: + * Compiler Writer's Guide for the Alpha 21264 + * abbreviated as 'CWG' in other comments here + * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html + * Scheduling notation: + * E - either cluster + * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 + * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 + * Try not to change the actual algorithm if possible for consistency. + */ + +#include <asm/regdef.h> + + .set noat + .set noreorder + + .text + +/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that + doesn't like putting the entry point for a procedure somewhere in the + middle of the procedure descriptor. Work around this by putting the + aligned copy in its own procedure descriptor */ + + + .ent stxncpy_aligned + .align 4 +stxncpy_aligned: + .frame sp, 0, t9, 0 + .prologue 0 + + /* On entry to this basic block: + t0 == the first destination word for masking back in + t1 == the first source word. */ + + /* Create the 1st output word and detect 0's in the 1st input word. */ + lda t2, -1 # E : build a mask against false zero + mskqh t2, a1, t2 # U : detection in the src word (stall) + mskqh t1, a1, t3 # U : + ornot t1, t2, t2 # E : (stall) + + mskql t0, a1, t0 # U : assemble the first output word + cmpbge zero, t2, t8 # E : bits set iff null found + or t0, t3, t0 # E : (stall) + beq a2, $a_eoc # U : + + bne t8, $a_eos # U : + nop + nop + nop + + /* On entry to this basic block: + t0 == a source word not containing a null. */ + + /* + * nops here to: + * separate store quads from load quads + * limit of 1 bcond/quad to permit training + */ +$a_loop: + stq_u t0, 0(a0) # L : + addq a0, 8, a0 # E : + subq a2, 1, a2 # E : + nop + + ldq_u t0, 0(a1) # L : + addq a1, 8, a1 # E : + cmpbge zero, t0, t8 # E : + beq a2, $a_eoc # U : + + beq t8, $a_loop # U : + nop + nop + nop + + /* Take care of the final (partial) word store. At this point + the end-of-count bit is set in t8 iff it applies. + + On entry to this basic block we have: + t0 == the source word containing the null + t8 == the cmpbge mask that found it. */ + +$a_eos: + negq t8, t12 # E : find low bit set + and t8, t12, t12 # E : (stall) + /* For the sake of the cache, don't read a destination word + if we're not going to need it. */ + and t12, 0x80, t6 # E : (stall) + bne t6, 1f # U : (stall) + + /* We're doing a partial word store and so need to combine + our source and original destination words. */ + ldq_u t1, 0(a0) # L : + subq t12, 1, t6 # E : + or t12, t6, t8 # E : (stall) + zapnot t0, t8, t0 # U : clear src bytes > null (stall) + + zap t1, t8, t1 # .. e1 : clear dst bytes <= null + or t0, t1, t0 # e1 : (stall) + nop + nop + +1: stq_u t0, 0(a0) # L : + ret (t9) # L0 : Latency=3 + nop + nop + + /* Add the end-of-count bit to the eos detection bitmask. */ +$a_eoc: + or t10, t8, t8 # E : + br $a_eos # L0 : Latency=3 + nop + nop + + .end stxncpy_aligned + + .align 4 + .ent __stxncpy + .globl __stxncpy +__stxncpy: + .frame sp, 0, t9, 0 + .prologue 0 + + /* Are source and destination co-aligned? */ + xor a0, a1, t1 # E : + and a0, 7, t0 # E : find dest misalignment + and t1, 7, t1 # E : (stall) + addq a2, t0, a2 # E : bias count by dest misalignment (stall) + + subq a2, 1, a2 # E : + and a2, 7, t2 # E : (stall) + srl a2, 3, a2 # U : a2 = loop counter = (count - 1)/8 (stall) + addq zero, 1, t10 # E : + + sll t10, t2, t10 # U : t10 = bitmask of last count byte + bne t1, $unaligned # U : + /* We are co-aligned; take care of a partial first word. */ + ldq_u t1, 0(a1) # L : load first src word + addq a1, 8, a1 # E : + + beq t0, stxncpy_aligned # U : avoid loading dest word if not needed + ldq_u t0, 0(a0) # L : + nop + nop + + br stxncpy_aligned # .. e1 : + nop + nop + nop + + + +/* The source and destination are not co-aligned. Align the destination + and cope. We have to be very careful about not reading too much and + causing a SEGV. */ + + .align 4 +$u_head: + /* We know just enough now to be able to assemble the first + full source word. We can still find a zero at the end of it + that prevents us from outputting the whole thing. + + On entry to this basic block: + t0 == the first dest word, unmasked + t1 == the shifted low bits of the first source word + t6 == bytemask that is -1 in dest word bytes */ + + ldq_u t2, 8(a1) # L : Latency=3 load second src word + addq a1, 8, a1 # E : + mskql t0, a0, t0 # U : mask trailing garbage in dst + extqh t2, a1, t4 # U : (3 cycle stall on t2) + + or t1, t4, t1 # E : first aligned src word complete (stall) + mskqh t1, a0, t1 # U : mask leading garbage in src (stall) + or t0, t1, t0 # E : first output word complete (stall) + or t0, t6, t6 # E : mask original data for zero test (stall) + + cmpbge zero, t6, t8 # E : + beq a2, $u_eocfin # U : + lda t6, -1 # E : + nop + + bne t8, $u_final # U : + mskql t6, a1, t6 # U : mask out bits already seen + stq_u t0, 0(a0) # L : store first output word + or t6, t2, t2 # E : (stall) + + cmpbge zero, t2, t8 # E : find nulls in second partial + addq a0, 8, a0 # E : + subq a2, 1, a2 # E : + bne t8, $u_late_head_exit # U : + + /* Finally, we've got all the stupid leading edge cases taken care + of and we can set up to enter the main loop. */ + extql t2, a1, t1 # U : position hi-bits of lo word + beq a2, $u_eoc # U : + ldq_u t2, 8(a1) # L : read next high-order source word + addq a1, 8, a1 # E : + + extqh t2, a1, t0 # U : position lo-bits of hi word (stall) + cmpbge zero, t2, t8 # E : + nop + bne t8, $u_eos # U : + + /* Unaligned copy main loop. In order to avoid reading too much, + the loop is structured to detect zeros in aligned source words. + This has, unfortunately, effectively pulled half of a loop + iteration out into the head and half into the tail, but it does + prevent nastiness from accumulating in the very thing we want + to run as fast as possible. + + On entry to this basic block: + t0 == the shifted low-order bits from the current source word + t1 == the shifted high-order bits from the previous source word + t2 == the unshifted current source word + + We further know that t2 does not contain a null terminator. */ + + .align 4 +$u_loop: + or t0, t1, t0 # E : current dst word now complete + subq a2, 1, a2 # E : decrement word count + extql t2, a1, t1 # U : extract low bits for next time + addq a0, 8, a0 # E : + + stq_u t0, -8(a0) # U : save the current word + beq a2, $u_eoc # U : + ldq_u t2, 8(a1) # U : Latency=3 load high word for next time + addq a1, 8, a1 # E : + + extqh t2, a1, t0 # U : extract low bits (2 cycle stall) + cmpbge zero, t2, t8 # E : test new word for eos + nop + beq t8, $u_loop # U : + + /* We've found a zero somewhere in the source word we just read. + If it resides in the lower half, we have one (probably partial) + word to write out, and if it resides in the upper half, we + have one full and one partial word left to write out. + + On entry to this basic block: + t0 == the shifted low-order bits from the current source word + t1 == the shifted high-order bits from the previous source word + t2 == the unshifted current source word. */ +$u_eos: + or t0, t1, t0 # E : first (partial) source word complete + nop + cmpbge zero, t0, t8 # E : is the null in this first bit? (stall) + bne t8, $u_final # U : (stall) + + stq_u t0, 0(a0) # L : the null was in the high-order bits + addq a0, 8, a0 # E : + subq a2, 1, a2 # E : + nop + +$u_late_head_exit: + extql t2, a1, t0 # U : + cmpbge zero, t0, t8 # E : + or t8, t10, t6 # E : (stall) + cmoveq a2, t6, t8 # E : Latency=2, extra map slot (stall) + + /* Take care of a final (probably partial) result word. + On entry to this basic block: + t0 == assembled source word + t8 == cmpbge mask that found the null. */ +$u_final: + negq t8, t6 # E : isolate low bit set + and t6, t8, t12 # E : (stall) + and t12, 0x80, t6 # E : avoid dest word load if we can (stall) + bne t6, 1f # U : (stall) + + ldq_u t1, 0(a0) # L : + subq t12, 1, t6 # E : + or t6, t12, t8 # E : (stall) + zapnot t0, t8, t0 # U : kill source bytes > null + + zap t1, t8, t1 # U : kill dest bytes <= null + or t0, t1, t0 # E : (stall) + nop + nop + +1: stq_u t0, 0(a0) # L : + ret (t9) # L0 : Latency=3 + + /* Got to end-of-count before end of string. + On entry to this basic block: + t1 == the shifted high-order bits from the previous source word */ +$u_eoc: + and a1, 7, t6 # E : avoid final load if possible + sll t10, t6, t6 # U : (stall) + and t6, 0xff, t6 # E : (stall) + bne t6, 1f # U : (stall) + + ldq_u t2, 8(a1) # L : load final src word + nop + extqh t2, a1, t0 # U : extract low bits for last word (stall) + or t1, t0, t1 # E : (stall) + +1: cmpbge zero, t1, t8 # E : + mov t1, t0 # E : + +$u_eocfin: # end-of-count, final word + or t10, t8, t8 # E : + br $u_final # L0 : Latency=3 + + /* Unaligned copy entry point. */ + .align 4 +$unaligned: + + ldq_u t1, 0(a1) # L : load first source word + and a0, 7, t4 # E : find dest misalignment + and a1, 7, t5 # E : find src misalignment + /* Conditionally load the first destination word and a bytemask + with 0xff indicating that the destination byte is sacrosanct. */ + mov zero, t0 # E : + + mov zero, t6 # E : + beq t4, 1f # U : + ldq_u t0, 0(a0) # L : + lda t6, -1 # E : + + mskql t6, a0, t6 # U : + nop + nop + subq a1, t4, a1 # E : sub dest misalignment from src addr + + /* If source misalignment is larger than dest misalignment, we need + extra startup checks to avoid SEGV. */ + +1: cmplt t4, t5, t12 # E : + extql t1, a1, t1 # U : shift src into place + lda t2, -1 # E : for creating masks later + beq t12, $u_head # U : (stall) + + extql t2, a1, t2 # U : + cmpbge zero, t1, t8 # E : is there a zero? + andnot t2, t6, t2 # E : dest mask for a single word copy + or t8, t10, t5 # E : test for end-of-count too + + cmpbge zero, t2, t3 # E : + cmoveq a2, t5, t8 # E : Latency=2, extra map slot + nop # E : keep with cmoveq + andnot t8, t3, t8 # E : (stall) + + beq t8, $u_head # U : + /* At this point we've found a zero in the first partial word of + the source. We need to isolate the valid source data and mask + it into the original destination data. (Incidentally, we know + that we'll need at least one byte of that original dest word.) */ + ldq_u t0, 0(a0) # L : + negq t8, t6 # E : build bitmask of bytes <= zero + mskqh t1, t4, t1 # U : + + and t6, t8, t12 # E : + subq t12, 1, t6 # E : (stall) + or t6, t12, t8 # E : (stall) + zapnot t2, t8, t2 # U : prepare source word; mirror changes (stall) + + zapnot t1, t8, t1 # U : to source validity mask + andnot t0, t2, t0 # E : zero place for source to reside + or t0, t1, t0 # E : and put it there (stall both t0, t1) + stq_u t0, 0(a0) # L : (stall) + + ret (t9) # L0 : Latency=3 + nop + nop + nop + + .end __stxncpy diff --git a/arch/alpha/lib/ev67-strcat.S b/arch/alpha/lib/ev67-strcat.S new file mode 100644 index 00000000..c426fe3e --- /dev/null +++ b/arch/alpha/lib/ev67-strcat.S @@ -0,0 +1,54 @@ +/* + * arch/alpha/lib/ev67-strcat.S + * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com> + * + * Append a null-terminated string from SRC to DST. + * + * Much of the information about 21264 scheduling/coding comes from: + * Compiler Writer's Guide for the Alpha 21264 + * abbreviated as 'CWG' in other comments here + * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html + * Scheduling notation: + * E - either cluster + * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 + * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 + * Try not to change the actual algorithm if possible for consistency. + * Commentary: It seems bogus to walk the input string twice - once + * to determine the length, and then again while doing the copy. + * A significant (future) enhancement would be to only read the input + * string once. + */ + + + .text + + .align 4 + .globl strcat + .ent strcat +strcat: + .frame $30, 0, $26 + .prologue 0 + + mov $16, $0 # E : set up return value + /* Find the end of the string. */ + ldq_u $1, 0($16) # L : load first quadword (a0 may be misaligned) + lda $2, -1 # E : + insqh $2, $16, $2 # U : + + andnot $16, 7, $16 # E : + or $2, $1, $1 # E : + cmpbge $31, $1, $2 # E : bits set iff byte == 0 + bne $2, $found # U : + +$loop: ldq $1, 8($16) # L : + addq $16, 8, $16 # E : + cmpbge $31, $1, $2 # E : + beq $2, $loop # U : + +$found: cttz $2, $3 # U0 : + addq $16, $3, $16 # E : + /* Now do the append. */ + mov $26, $23 # E : + br __stxcpy # L0 : + + .end strcat diff --git a/arch/alpha/lib/ev67-strchr.S b/arch/alpha/lib/ev67-strchr.S new file mode 100644 index 00000000..fbb7b4ff --- /dev/null +++ b/arch/alpha/lib/ev67-strchr.S @@ -0,0 +1,88 @@ +/* + * arch/alpha/lib/ev67-strchr.S + * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com> + * + * Return the address of a given character within a null-terminated + * string, or null if it is not found. + * + * Much of the information about 21264 scheduling/coding comes from: + * Compiler Writer's Guide for the Alpha 21264 + * abbreviated as 'CWG' in other comments here + * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html + * Scheduling notation: + * E - either cluster + * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 + * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 + * Try not to change the actual algorithm if possible for consistency. + */ + +#include <asm/regdef.h> + + .set noreorder + .set noat + + .align 4 + .globl strchr + .ent strchr +strchr: + .frame sp, 0, ra + .prologue 0 + + ldq_u t0, 0(a0) # L : load first quadword Latency=3 + and a1, 0xff, t3 # E : 00000000000000ch + insbl a1, 1, t5 # U : 000000000000ch00 + insbl a1, 7, a2 # U : ch00000000000000 + + insbl t3, 6, a3 # U : 00ch000000000000 + or t5, t3, a1 # E : 000000000000chch + andnot a0, 7, v0 # E : align our loop pointer + lda t4, -1 # E : build garbage mask + + mskqh t4, a0, t4 # U : only want relevant part of first quad + or a2, a3, a2 # E : chch000000000000 + inswl a1, 2, t5 # E : 00000000chch0000 + inswl a1, 4, a3 # E : 0000chch00000000 + + or a1, a2, a1 # E : chch00000000chch + or a3, t5, t5 # E : 0000chchchch0000 + cmpbge zero, t0, t2 # E : bits set iff byte == zero + cmpbge zero, t4, t4 # E : bits set iff byte is garbage + + /* This quad is _very_ serialized. Lots of stalling happens */ + or t5, a1, a1 # E : chchchchchchchch + xor t0, a1, t1 # E : make bytes == c zero + cmpbge zero, t1, t3 # E : bits set iff byte == c + or t2, t3, t0 # E : bits set iff char match or zero match + + andnot t0, t4, t0 # E : clear garbage bits + cttz t0, a2 # U0 : speculative (in case we get a match) + nop # E : + bne t0, $found # U : + + /* + * Yuk. This loop is going to stall like crazy waiting for the + * data to be loaded. Not much can be done about it unless it's + * unrolled multiple times - is that safe to do in kernel space? + * Or would exception handling recovery code do the trick here? + */ +$loop: ldq t0, 8(v0) # L : Latency=3 + addq v0, 8, v0 # E : + xor t0, a1, t1 # E : + cmpbge zero, t0, t2 # E : bits set iff byte == 0 + + cmpbge zero, t1, t3 # E : bits set iff byte == c + or t2, t3, t0 # E : + cttz t3, a2 # U0 : speculative (in case we get a match) + beq t0, $loop # U : + +$found: negq t0, t1 # E : clear all but least set bit + and t0, t1, t0 # E : + and t0, t3, t1 # E : bit set iff byte was the char + addq v0, a2, v0 # E : Add in the bit number from above + + cmoveq t1, $31, v0 # E : Two mapping slots, latency = 2 + nop + nop + ret # L0 : + + .end strchr diff --git a/arch/alpha/lib/ev67-strlen.S b/arch/alpha/lib/ev67-strlen.S new file mode 100644 index 00000000..50392807 --- /dev/null +++ b/arch/alpha/lib/ev67-strlen.S @@ -0,0 +1,49 @@ +/* + * arch/alpha/lib/ev67-strlen.S + * 21264 version by Rick Gorton <rick.gorton@alpha-processor.com> + * + * Finds length of a 0-terminated string. Optimized for the + * Alpha architecture: + * + * - memory accessed as aligned quadwords only + * - uses bcmpge to compare 8 bytes in parallel + * + * Much of the information about 21264 scheduling/coding comes from: + * Compiler Writer's Guide for the Alpha 21264 + * abbreviated as 'CWG' in other comments here + * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html + * Scheduling notation: + * E - either cluster + * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 + * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 + */ + + .set noreorder + .set noat + + .globl strlen + .ent strlen + .align 4 +strlen: + ldq_u $1, 0($16) # L : load first quadword ($16 may be misaligned) + lda $2, -1($31) # E : + insqh $2, $16, $2 # U : + andnot $16, 7, $0 # E : + + or $2, $1, $1 # E : + cmpbge $31, $1, $2 # E : $2 <- bitmask: bit i == 1 <==> i-th byte == 0 + nop # E : + bne $2, $found # U : + +$loop: ldq $1, 8($0) # L : + addq $0, 8, $0 # E : addr += 8 + cmpbge $31, $1, $2 # E : + beq $2, $loop # U : + +$found: + cttz $2, $3 # U0 : + addq $0, $3, $0 # E : + subq $0, $16, $0 # E : + ret $31, ($26) # L0 : + + .end strlen diff --git a/arch/alpha/lib/ev67-strlen_user.S b/arch/alpha/lib/ev67-strlen_user.S new file mode 100644 index 00000000..57e0d77b --- /dev/null +++ b/arch/alpha/lib/ev67-strlen_user.S @@ -0,0 +1,107 @@ +/* + * arch/alpha/lib/ev67-strlen_user.S + * 21264 version contributed by Rick Gorton <rick.gorton@api-networks.com> + * + * Return the length of the string including the NULL terminator + * (strlen+1) or zero if an error occurred. + * + * In places where it is critical to limit the processing time, + * and the data is not trusted, strnlen_user() should be used. + * It will return a value greater than its second argument if + * that limit would be exceeded. This implementation is allowed + * to access memory beyond the limit, but will not cross a page + * boundary when doing so. + * + * Much of the information about 21264 scheduling/coding comes from: + * Compiler Writer's Guide for the Alpha 21264 + * abbreviated as 'CWG' in other comments here + * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html + * Scheduling notation: + * E - either cluster + * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 + * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 + * Try not to change the actual algorithm if possible for consistency. + */ + +#include <asm/regdef.h> + + +/* Allow an exception for an insn; exit if we get one. */ +#define EX(x,y...) \ + 99: x,##y; \ + .section __ex_table,"a"; \ + .long 99b - .; \ + lda v0, $exception-99b(zero); \ + .previous + + + .set noreorder + .set noat + .text + + .globl __strlen_user + .ent __strlen_user + .frame sp, 0, ra + + .align 4 +__strlen_user: + ldah a1, 32767(zero) # do not use plain strlen_user() for strings + # that might be almost 2 GB long; you should + # be using strnlen_user() instead + nop + nop + nop + + .globl __strnlen_user + + .align 4 +__strnlen_user: + .prologue 0 + EX( ldq_u t0, 0(a0) ) # L : load first quadword (a0 may be misaligned) + lda t1, -1(zero) # E : + + insqh t1, a0, t1 # U : + andnot a0, 7, v0 # E : + or t1, t0, t0 # E : + subq a0, 1, a0 # E : get our +1 for the return + + cmpbge zero, t0, t1 # E : t1 <- bitmask: bit i == 1 <==> i-th byte == 0 + subq a1, 7, t2 # E : + subq a0, v0, t0 # E : + bne t1, $found # U : + + addq t2, t0, t2 # E : + addq a1, 1, a1 # E : + nop # E : + nop # E : + + .align 4 +$loop: ble t2, $limit # U : + EX( ldq t0, 8(v0) ) # L : + nop # E : + nop # E : + + cmpbge zero, t0, t1 # E : + subq t2, 8, t2 # E : + addq v0, 8, v0 # E : addr += 8 + beq t1, $loop # U : + +$found: cttz t1, t2 # U0 : + addq v0, t2, v0 # E : + subq v0, a0, v0 # E : + ret # L0 : + +$exception: + nop + nop + nop + ret + + .align 4 # currently redundant +$limit: + nop + nop + subq a1, t2, v0 + ret + + .end __strlen_user diff --git a/arch/alpha/lib/ev67-strncat.S b/arch/alpha/lib/ev67-strncat.S new file mode 100644 index 00000000..4ae716cd --- /dev/null +++ b/arch/alpha/lib/ev67-strncat.S @@ -0,0 +1,94 @@ +/* + * arch/alpha/lib/ev67-strncat.S + * 21264 version contributed by Rick Gorton <rick.gorton@api-networks.com> + * + * Append no more than COUNT characters from the null-terminated string SRC + * to the null-terminated string DST. Always null-terminate the new DST. + * + * This differs slightly from the semantics in libc in that we never write + * past count, whereas libc may write to count+1. This follows the generic + * implementation in lib/string.c and is, IMHO, more sensible. + * + * Much of the information about 21264 scheduling/coding comes from: + * Compiler Writer's Guide for the Alpha 21264 + * abbreviated as 'CWG' in other comments here + * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html + * Scheduling notation: + * E - either cluster + * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 + * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 + * Try not to change the actual algorithm if possible for consistency. + */ + + + .text + + .align 4 + .globl strncat + .ent strncat +strncat: + .frame $30, 0, $26 + .prologue 0 + + mov $16, $0 # set up return value + beq $18, $zerocount # U : + /* Find the end of the string. */ + ldq_u $1, 0($16) # L : load first quadword ($16 may be misaligned) + lda $2, -1($31) # E : + + insqh $2, $0, $2 # U : + andnot $16, 7, $16 # E : + nop # E : + or $2, $1, $1 # E : + + nop # E : + nop # E : + cmpbge $31, $1, $2 # E : bits set iff byte == 0 + bne $2, $found # U : + +$loop: ldq $1, 8($16) # L : + addq $16, 8, $16 # E : + cmpbge $31, $1, $2 # E : + beq $2, $loop # U : + +$found: cttz $2, $3 # U0 : + addq $16, $3, $16 # E : + nop # E : + bsr $23, __stxncpy # L0 :/* Now do the append. */ + + /* Worry about the null termination. */ + + zapnot $1, $27, $2 # U : was last byte a null? + cmplt $27, $24, $5 # E : did we fill the buffer completely? + bne $2, 0f # U : + ret # L0 : + +0: or $5, $18, $2 # E : + nop + bne $2, 2f # U : + and $24, 0x80, $3 # E : no zero next byte + + nop # E : + bne $3, 1f # U : + /* Here there are bytes left in the current word. Clear one. */ + addq $24, $24, $24 # E : end-of-count bit <<= 1 + nop # E : + +2: zap $1, $24, $1 # U : + nop # E : + stq_u $1, 0($16) # L : + ret # L0 : + +1: /* Here we must clear the first byte of the next DST word */ + stb $31, 8($16) # L : + nop # E : + nop # E : + ret # L0 : + +$zerocount: + nop # E : + nop # E : + nop # E : + ret # L0 : + + .end strncat diff --git a/arch/alpha/lib/ev67-strrchr.S b/arch/alpha/lib/ev67-strrchr.S new file mode 100644 index 00000000..dd0d8c6b --- /dev/null +++ b/arch/alpha/lib/ev67-strrchr.S @@ -0,0 +1,109 @@ +/* + * arch/alpha/lib/ev67-strrchr.S + * 21264 version by Rick Gorton <rick.gorton@alpha-processor.com> + * + * Finds length of a 0-terminated string. Optimized for the + * Alpha architecture: + * + * - memory accessed as aligned quadwords only + * - uses bcmpge to compare 8 bytes in parallel + * + * Much of the information about 21264 scheduling/coding comes from: + * Compiler Writer's Guide for the Alpha 21264 + * abbreviated as 'CWG' in other comments here + * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html + * Scheduling notation: + * E - either cluster + * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 + * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 + */ + + +#include <asm/regdef.h> + + .set noreorder + .set noat + + .align 4 + .ent strrchr + .globl strrchr +strrchr: + .frame sp, 0, ra + .prologue 0 + + and a1, 0xff, t2 # E : 00000000000000ch + insbl a1, 1, t4 # U : 000000000000ch00 + insbl a1, 2, t5 # U : 0000000000ch0000 + ldq_u t0, 0(a0) # L : load first quadword Latency=3 + + mov zero, t6 # E : t6 is last match aligned addr + or t2, t4, a1 # E : 000000000000chch + sll t5, 8, t3 # U : 00000000ch000000 + mov zero, t8 # E : t8 is last match byte compare mask + + andnot a0, 7, v0 # E : align source addr + or t5, t3, t3 # E : 00000000chch0000 + sll a1, 32, t2 # U : 0000chch00000000 + sll a1, 48, t4 # U : chch000000000000 + + or t4, a1, a1 # E : chch00000000chch + or t2, t3, t2 # E : 0000chchchch0000 + or a1, t2, a1 # E : chchchchchchchch + lda t5, -1 # E : build garbage mask + + cmpbge zero, t0, t1 # E : bits set iff byte == zero + mskqh t5, a0, t4 # E : Complete garbage mask + xor t0, a1, t2 # E : make bytes == c zero + cmpbge zero, t4, t4 # E : bits set iff byte is garbage + + cmpbge zero, t2, t3 # E : bits set iff byte == c + andnot t1, t4, t1 # E : clear garbage from null test + andnot t3, t4, t3 # E : clear garbage from char test + bne t1, $eos # U : did we already hit the terminator? + + /* Character search main loop */ +$loop: + ldq t0, 8(v0) # L : load next quadword + cmovne t3, v0, t6 # E : save previous comparisons match + nop # : Latency=2, extra map slot (keep nop with cmov) + nop + + cmovne t3, t3, t8 # E : Latency=2, extra map slot + nop # : keep with cmovne + addq v0, 8, v0 # E : + xor t0, a1, t2 # E : + + cmpbge zero, t0, t1 # E : bits set iff byte == zero + cmpbge zero, t2, t3 # E : bits set iff byte == c + beq t1, $loop # U : if we havnt seen a null, loop + nop + + /* Mask out character matches after terminator */ +$eos: + negq t1, t4 # E : isolate first null byte match + and t1, t4, t4 # E : + subq t4, 1, t5 # E : build a mask of the bytes up to... + or t4, t5, t4 # E : ... and including the null + + and t3, t4, t3 # E : mask out char matches after null + cmovne t3, t3, t8 # E : save it, if match found Latency=2, extra map slot + nop # : Keep with cmovne + nop + + cmovne t3, v0, t6 # E : + nop # : Keep with cmovne + /* Locate the address of the last matched character */ + ctlz t8, t2 # U0 : Latency=3 (0x40 for t8=0) + nop + + cmoveq t8, 0x3f, t2 # E : Compensate for case when no match is seen + nop # E : hide the cmov latency (2) behind ctlz latency + lda t5, 0x3f($31) # E : + subq t5, t2, t5 # E : Normalize leading zero count + + addq t6, t5, v0 # E : and add to quadword address + ret # L0 : Latency=3 + nop + nop + + .end strrchr diff --git a/arch/alpha/lib/fls.c b/arch/alpha/lib/fls.c new file mode 100644 index 00000000..ddd048c0 --- /dev/null +++ b/arch/alpha/lib/fls.c @@ -0,0 +1,38 @@ +/* + * arch/alpha/lib/fls.c + */ + +#include <linux/module.h> +#include <linux/bitops.h> + +/* This is fls(x)-1, except zero is held to zero. This allows most + efficient input into extbl, plus it allows easy handling of fls(0)=0. */ + +const unsigned char __flsm1_tab[256] = +{ + 0, + 0, + 1, 1, + 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, +}; + +EXPORT_SYMBOL(__flsm1_tab); diff --git a/arch/alpha/lib/fpreg.c b/arch/alpha/lib/fpreg.c new file mode 100644 index 00000000..05017ba3 --- /dev/null +++ b/arch/alpha/lib/fpreg.c @@ -0,0 +1,193 @@ +/* + * arch/alpha/lib/fpreg.c + * + * (C) Copyright 1998 Linus Torvalds + */ + +#if defined(CONFIG_ALPHA_EV6) || defined(CONFIG_ALPHA_EV67) +#define STT(reg,val) asm volatile ("ftoit $f"#reg",%0" : "=r"(val)); +#else +#define STT(reg,val) asm volatile ("stt $f"#reg",%0" : "=m"(val)); +#endif + +unsigned long +alpha_read_fp_reg (unsigned long reg) +{ + unsigned long val; + + switch (reg) { + case 0: STT( 0, val); break; + case 1: STT( 1, val); break; + case 2: STT( 2, val); break; + case 3: STT( 3, val); break; + case 4: STT( 4, val); break; + case 5: STT( 5, val); break; + case 6: STT( 6, val); break; + case 7: STT( 7, val); break; + case 8: STT( 8, val); break; + case 9: STT( 9, val); break; + case 10: STT(10, val); break; + case 11: STT(11, val); break; + case 12: STT(12, val); break; + case 13: STT(13, val); break; + case 14: STT(14, val); break; + case 15: STT(15, val); break; + case 16: STT(16, val); break; + case 17: STT(17, val); break; + case 18: STT(18, val); break; + case 19: STT(19, val); break; + case 20: STT(20, val); break; + case 21: STT(21, val); break; + case 22: STT(22, val); break; + case 23: STT(23, val); break; + case 24: STT(24, val); break; + case 25: STT(25, val); break; + case 26: STT(26, val); break; + case 27: STT(27, val); break; + case 28: STT(28, val); break; + case 29: STT(29, val); break; + case 30: STT(30, val); break; + case 31: STT(31, val); break; + default: return 0; + } + return val; +} + +#if defined(CONFIG_ALPHA_EV6) || defined(CONFIG_ALPHA_EV67) +#define LDT(reg,val) asm volatile ("itoft %0,$f"#reg : : "r"(val)); +#else +#define LDT(reg,val) asm volatile ("ldt $f"#reg",%0" : : "m"(val)); +#endif + +void +alpha_write_fp_reg (unsigned long reg, unsigned long val) +{ + switch (reg) { + case 0: LDT( 0, val); break; + case 1: LDT( 1, val); break; + case 2: LDT( 2, val); break; + case 3: LDT( 3, val); break; + case 4: LDT( 4, val); break; + case 5: LDT( 5, val); break; + case 6: LDT( 6, val); break; + case 7: LDT( 7, val); break; + case 8: LDT( 8, val); break; + case 9: LDT( 9, val); break; + case 10: LDT(10, val); break; + case 11: LDT(11, val); break; + case 12: LDT(12, val); break; + case 13: LDT(13, val); break; + case 14: LDT(14, val); break; + case 15: LDT(15, val); break; + case 16: LDT(16, val); break; + case 17: LDT(17, val); break; + case 18: LDT(18, val); break; + case 19: LDT(19, val); break; + case 20: LDT(20, val); break; + case 21: LDT(21, val); break; + case 22: LDT(22, val); break; + case 23: LDT(23, val); break; + case 24: LDT(24, val); break; + case 25: LDT(25, val); break; + case 26: LDT(26, val); break; + case 27: LDT(27, val); break; + case 28: LDT(28, val); break; + case 29: LDT(29, val); break; + case 30: LDT(30, val); break; + case 31: LDT(31, val); break; + } +} + +#if defined(CONFIG_ALPHA_EV6) || defined(CONFIG_ALPHA_EV67) +#define STS(reg,val) asm volatile ("ftois $f"#reg",%0" : "=r"(val)); +#else +#define STS(reg,val) asm volatile ("sts $f"#reg",%0" : "=m"(val)); +#endif + +unsigned long +alpha_read_fp_reg_s (unsigned long reg) +{ + unsigned long val; + + switch (reg) { + case 0: STS( 0, val); break; + case 1: STS( 1, val); break; + case 2: STS( 2, val); break; + case 3: STS( 3, val); break; + case 4: STS( 4, val); break; + case 5: STS( 5, val); break; + case 6: STS( 6, val); break; + case 7: STS( 7, val); break; + case 8: STS( 8, val); break; + case 9: STS( 9, val); break; + case 10: STS(10, val); break; + case 11: STS(11, val); break; + case 12: STS(12, val); break; + case 13: STS(13, val); break; + case 14: STS(14, val); break; + case 15: STS(15, val); break; + case 16: STS(16, val); break; + case 17: STS(17, val); break; + case 18: STS(18, val); break; + case 19: STS(19, val); break; + case 20: STS(20, val); break; + case 21: STS(21, val); break; + case 22: STS(22, val); break; + case 23: STS(23, val); break; + case 24: STS(24, val); break; + case 25: STS(25, val); break; + case 26: STS(26, val); break; + case 27: STS(27, val); break; + case 28: STS(28, val); break; + case 29: STS(29, val); break; + case 30: STS(30, val); break; + case 31: STS(31, val); break; + default: return 0; + } + return val; +} + +#if defined(CONFIG_ALPHA_EV6) || defined(CONFIG_ALPHA_EV67) +#define LDS(reg,val) asm volatile ("itofs %0,$f"#reg : : "r"(val)); +#else +#define LDS(reg,val) asm volatile ("lds $f"#reg",%0" : : "m"(val)); +#endif + +void +alpha_write_fp_reg_s (unsigned long reg, unsigned long val) +{ + switch (reg) { + case 0: LDS( 0, val); break; + case 1: LDS( 1, val); break; + case 2: LDS( 2, val); break; + case 3: LDS( 3, val); break; + case 4: LDS( 4, val); break; + case 5: LDS( 5, val); break; + case 6: LDS( 6, val); break; + case 7: LDS( 7, val); break; + case 8: LDS( 8, val); break; + case 9: LDS( 9, val); break; + case 10: LDS(10, val); break; + case 11: LDS(11, val); break; + case 12: LDS(12, val); break; + case 13: LDS(13, val); break; + case 14: LDS(14, val); break; + case 15: LDS(15, val); break; + case 16: LDS(16, val); break; + case 17: LDS(17, val); break; + case 18: LDS(18, val); break; + case 19: LDS(19, val); break; + case 20: LDS(20, val); break; + case 21: LDS(21, val); break; + case 22: LDS(22, val); break; + case 23: LDS(23, val); break; + case 24: LDS(24, val); break; + case 25: LDS(25, val); break; + case 26: LDS(26, val); break; + case 27: LDS(27, val); break; + case 28: LDS(28, val); break; + case 29: LDS(29, val); break; + case 30: LDS(30, val); break; + case 31: LDS(31, val); break; + } +} diff --git a/arch/alpha/lib/memchr.S b/arch/alpha/lib/memchr.S new file mode 100644 index 00000000..14427eeb --- /dev/null +++ b/arch/alpha/lib/memchr.S @@ -0,0 +1,164 @@ +/* Copyright (C) 1996 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David Mosberger (davidm@cs.arizona.edu). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* Finds characters in a memory area. Optimized for the Alpha: + + - memory accessed as aligned quadwords only + - uses cmpbge to compare 8 bytes in parallel + - does binary search to find 0 byte in last + quadword (HAKMEM needed 12 instructions to + do this instead of the 9 instructions that + binary search needs). + +For correctness consider that: + + - only minimum number of quadwords may be accessed + - the third argument is an unsigned long +*/ + + .set noreorder + .set noat + + .globl memchr + .ent memchr +memchr: + .frame $30,0,$26,0 + .prologue 0 + + # Hack -- if someone passes in (size_t)-1, hoping to just + # search til the end of the address space, we will overflow + # below when we find the address of the last byte. Given + # that we will never have a 56-bit address space, cropping + # the length is the easiest way to avoid trouble. + zap $18, 0x80, $5 #-e0 : + + beq $18, $not_found # .. e1 : + ldq_u $1, 0($16) # e1 : load first quadword + insbl $17, 1, $2 # .. e0 : $2 = 000000000000ch00 + and $17, 0xff, $17 #-e0 : $17 = 00000000000000ch + cmpult $18, 9, $4 # .. e1 : + or $2, $17, $17 # e0 : $17 = 000000000000chch + lda $3, -1($31) # .. e1 : + sll $17, 16, $2 #-e0 : $2 = 00000000chch0000 + addq $16, $5, $5 # .. e1 : + or $2, $17, $17 # e1 : $17 = 00000000chchchch + unop # : + sll $17, 32, $2 #-e0 : $2 = chchchch00000000 + or $2, $17, $17 # e1 : $17 = chchchchchchchch + extql $1, $16, $7 # e0 : + beq $4, $first_quad # .. e1 : + + ldq_u $6, -1($5) #-e1 : eight or less bytes to search + extqh $6, $16, $6 # .. e0 : + mov $16, $0 # e0 : + or $7, $6, $1 # .. e1 : $1 = quadword starting at $16 + + # Deal with the case where at most 8 bytes remain to be searched + # in $1. E.g.: + # $18 = 6 + # $1 = ????c6c5c4c3c2c1 +$last_quad: + negq $18, $6 #-e0 : + xor $17, $1, $1 # .. e1 : + srl $3, $6, $6 # e0 : $6 = mask of $18 bits set + cmpbge $31, $1, $2 # .. e1 : + and $2, $6, $2 #-e0 : + beq $2, $not_found # .. e1 : + +$found_it: + # Now, determine which byte matched: + negq $2, $3 # e0 : + and $2, $3, $2 # e1 : + + and $2, 0x0f, $1 #-e0 : + addq $0, 4, $3 # .. e1 : + cmoveq $1, $3, $0 # e0 : + + addq $0, 2, $3 # .. e1 : + and $2, 0x33, $1 #-e0 : + cmoveq $1, $3, $0 # .. e1 : + + and $2, 0x55, $1 # e0 : + addq $0, 1, $3 # .. e1 : + cmoveq $1, $3, $0 #-e0 : + +$done: ret # .. e1 : + + # Deal with the case where $18 > 8 bytes remain to be + # searched. $16 may not be aligned. + .align 4 +$first_quad: + andnot $16, 0x7, $0 #-e1 : + insqh $3, $16, $2 # .. e0 : $2 = 0000ffffffffffff ($16<0:2> ff) + xor $1, $17, $1 # e0 : + or $1, $2, $1 # e1 : $1 = ====ffffffffffff + cmpbge $31, $1, $2 #-e0 : + bne $2, $found_it # .. e1 : + + # At least one byte left to process. + + ldq $1, 8($0) # e0 : + subq $5, 1, $18 # .. e1 : + addq $0, 8, $0 #-e0 : + + # Make $18 point to last quad to be accessed (the + # last quad may or may not be partial). + + andnot $18, 0x7, $18 # .. e1 : + cmpult $0, $18, $2 # e0 : + beq $2, $final # .. e1 : + + # At least two quads remain to be accessed. + + subq $18, $0, $4 #-e0 : $4 <- nr quads to be processed + and $4, 8, $4 # e1 : odd number of quads? + bne $4, $odd_quad_count # e1 : + + # At least three quads remain to be accessed + + mov $1, $4 # e0 : move prefetched value to correct reg + + .align 4 +$unrolled_loop: + ldq $1, 8($0) #-e0 : prefetch $1 + xor $17, $4, $2 # .. e1 : + cmpbge $31, $2, $2 # e0 : + bne $2, $found_it # .. e1 : + + addq $0, 8, $0 #-e0 : +$odd_quad_count: + xor $17, $1, $2 # .. e1 : + ldq $4, 8($0) # e0 : prefetch $4 + cmpbge $31, $2, $2 # .. e1 : + addq $0, 8, $6 #-e0 : + bne $2, $found_it # .. e1 : + + cmpult $6, $18, $6 # e0 : + addq $0, 8, $0 # .. e1 : + bne $6, $unrolled_loop #-e1 : + + mov $4, $1 # e0 : move prefetched value into $1 +$final: subq $5, $0, $18 # .. e1 : $18 <- number of bytes left to do + bne $18, $last_quad # e1 : + +$not_found: + mov $31, $0 #-e0 : + ret # .. e1 : + + .end memchr diff --git a/arch/alpha/lib/memcpy.c b/arch/alpha/lib/memcpy.c new file mode 100644 index 00000000..64083fc7 --- /dev/null +++ b/arch/alpha/lib/memcpy.c @@ -0,0 +1,163 @@ +/* + * linux/arch/alpha/lib/memcpy.c + * + * Copyright (C) 1995 Linus Torvalds + */ + +/* + * This is a reasonably optimized memcpy() routine. + */ + +/* + * Note that the C code is written to be optimized into good assembly. However, + * at this point gcc is unable to sanely compile "if (n >= 0)", resulting in a + * explicit compare against 0 (instead of just using the proper "blt reg, xx" or + * "bge reg, xx"). I hope alpha-gcc will be fixed to notice this eventually.. + */ + +#include <linux/types.h> + +/* + * This should be done in one go with ldq_u*2/mask/stq_u. Do it + * with a macro so that we can fix it up later.. + */ +#define ALIGN_DEST_TO8_UP(d,s,n) \ + while (d & 7) { \ + if (n <= 0) return; \ + n--; \ + *(char *) d = *(char *) s; \ + d++; s++; \ + } +#define ALIGN_DEST_TO8_DN(d,s,n) \ + while (d & 7) { \ + if (n <= 0) return; \ + n--; \ + d--; s--; \ + *(char *) d = *(char *) s; \ + } + +/* + * This should similarly be done with ldq_u*2/mask/stq. The destination + * is aligned, but we don't fill in a full quad-word + */ +#define DO_REST_UP(d,s,n) \ + while (n > 0) { \ + n--; \ + *(char *) d = *(char *) s; \ + d++; s++; \ + } +#define DO_REST_DN(d,s,n) \ + while (n > 0) { \ + n--; \ + d--; s--; \ + *(char *) d = *(char *) s; \ + } + +/* + * This should be done with ldq/mask/stq. The source and destination are + * aligned, but we don't fill in a full quad-word + */ +#define DO_REST_ALIGNED_UP(d,s,n) DO_REST_UP(d,s,n) +#define DO_REST_ALIGNED_DN(d,s,n) DO_REST_DN(d,s,n) + +/* + * This does unaligned memory copies. We want to avoid storing to + * an unaligned address, as that would do a read-modify-write cycle. + * We also want to avoid double-reading the unaligned reads. + * + * Note the ordering to try to avoid load (and address generation) latencies. + */ +static inline void __memcpy_unaligned_up (unsigned long d, unsigned long s, + long n) +{ + ALIGN_DEST_TO8_UP(d,s,n); + n -= 8; /* to avoid compare against 8 in the loop */ + if (n >= 0) { + unsigned long low_word, high_word; + __asm__("ldq_u %0,%1":"=r" (low_word):"m" (*(unsigned long *) s)); + do { + unsigned long tmp; + __asm__("ldq_u %0,%1":"=r" (high_word):"m" (*(unsigned long *)(s+8))); + n -= 8; + __asm__("extql %1,%2,%0" + :"=r" (low_word) + :"r" (low_word), "r" (s)); + __asm__("extqh %1,%2,%0" + :"=r" (tmp) + :"r" (high_word), "r" (s)); + s += 8; + *(unsigned long *) d = low_word | tmp; + d += 8; + low_word = high_word; + } while (n >= 0); + } + n += 8; + DO_REST_UP(d,s,n); +} + +static inline void __memcpy_unaligned_dn (unsigned long d, unsigned long s, + long n) +{ + /* I don't understand AXP assembler well enough for this. -Tim */ + s += n; + d += n; + while (n--) + * (char *) --d = * (char *) --s; +} + +/* + * Hmm.. Strange. The __asm__ here is there to make gcc use an integer register + * for the load-store. I don't know why, but it would seem that using a floating + * point register for the move seems to slow things down (very small difference, + * though). + * + * Note the ordering to try to avoid load (and address generation) latencies. + */ +static inline void __memcpy_aligned_up (unsigned long d, unsigned long s, + long n) +{ + ALIGN_DEST_TO8_UP(d,s,n); + n -= 8; + while (n >= 0) { + unsigned long tmp; + __asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s)); + n -= 8; + s += 8; + *(unsigned long *) d = tmp; + d += 8; + } + n += 8; + DO_REST_ALIGNED_UP(d,s,n); +} +static inline void __memcpy_aligned_dn (unsigned long d, unsigned long s, + long n) +{ + s += n; + d += n; + ALIGN_DEST_TO8_DN(d,s,n); + n -= 8; + while (n >= 0) { + unsigned long tmp; + s -= 8; + __asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s)); + n -= 8; + d -= 8; + *(unsigned long *) d = tmp; + } + n += 8; + DO_REST_ALIGNED_DN(d,s,n); +} + +void * memcpy(void * dest, const void *src, size_t n) +{ + if (!(((unsigned long) dest ^ (unsigned long) src) & 7)) { + __memcpy_aligned_up ((unsigned long) dest, (unsigned long) src, + n); + return dest; + } + __memcpy_unaligned_up ((unsigned long) dest, (unsigned long) src, n); + return dest; +} + +/* For backward modules compatibility, define __memcpy. */ +asm("__memcpy = memcpy; .globl __memcpy"); diff --git a/arch/alpha/lib/memmove.S b/arch/alpha/lib/memmove.S new file mode 100644 index 00000000..eb3b6e02 --- /dev/null +++ b/arch/alpha/lib/memmove.S @@ -0,0 +1,181 @@ +/* + * arch/alpha/lib/memmove.S + * + * Barely optimized memmove routine for Alpha EV5. + * + * This is hand-massaged output from the original memcpy.c. We defer to + * memcpy whenever possible; the backwards copy loops are not unrolled. + */ + + .set noat + .set noreorder + .text + + .align 4 + .globl memmove + .ent memmove +memmove: + ldgp $29, 0($27) + unop + nop + .prologue 1 + + addq $16,$18,$4 + addq $17,$18,$5 + cmpule $4,$17,$1 /* dest + n <= src */ + cmpule $5,$16,$2 /* dest >= src + n */ + + bis $1,$2,$1 + mov $16,$0 + xor $16,$17,$2 + bne $1,memcpy !samegp + + and $2,7,$2 /* Test for src/dest co-alignment. */ + and $16,7,$1 + cmpule $16,$17,$3 + bne $3,$memmove_up /* dest < src */ + + and $4,7,$1 + bne $2,$misaligned_dn + unop + beq $1,$skip_aligned_byte_loop_head_dn + +$aligned_byte_loop_head_dn: + lda $4,-1($4) + lda $5,-1($5) + unop + ble $18,$egress + + ldq_u $3,0($5) + ldq_u $2,0($4) + lda $18,-1($18) + extbl $3,$5,$1 + + insbl $1,$4,$1 + mskbl $2,$4,$2 + bis $1,$2,$1 + and $4,7,$6 + + stq_u $1,0($4) + bne $6,$aligned_byte_loop_head_dn + +$skip_aligned_byte_loop_head_dn: + lda $18,-8($18) + blt $18,$skip_aligned_word_loop_dn + +$aligned_word_loop_dn: + ldq $1,-8($5) + nop + lda $5,-8($5) + lda $18,-8($18) + + stq $1,-8($4) + nop + lda $4,-8($4) + bge $18,$aligned_word_loop_dn + +$skip_aligned_word_loop_dn: + lda $18,8($18) + bgt $18,$byte_loop_tail_dn + unop + ret $31,($26),1 + + .align 4 +$misaligned_dn: + nop + fnop + unop + beq $18,$egress + +$byte_loop_tail_dn: + ldq_u $3,-1($5) + ldq_u $2,-1($4) + lda $5,-1($5) + lda $4,-1($4) + + lda $18,-1($18) + extbl $3,$5,$1 + insbl $1,$4,$1 + mskbl $2,$4,$2 + + bis $1,$2,$1 + stq_u $1,0($4) + bgt $18,$byte_loop_tail_dn + br $egress + +$memmove_up: + mov $16,$4 + mov $17,$5 + bne $2,$misaligned_up + beq $1,$skip_aligned_byte_loop_head_up + +$aligned_byte_loop_head_up: + unop + ble $18,$egress + ldq_u $3,0($5) + ldq_u $2,0($4) + + lda $18,-1($18) + extbl $3,$5,$1 + insbl $1,$4,$1 + mskbl $2,$4,$2 + + bis $1,$2,$1 + lda $5,1($5) + stq_u $1,0($4) + lda $4,1($4) + + and $4,7,$6 + bne $6,$aligned_byte_loop_head_up + +$skip_aligned_byte_loop_head_up: + lda $18,-8($18) + blt $18,$skip_aligned_word_loop_up + +$aligned_word_loop_up: + ldq $1,0($5) + nop + lda $5,8($5) + lda $18,-8($18) + + stq $1,0($4) + nop + lda $4,8($4) + bge $18,$aligned_word_loop_up + +$skip_aligned_word_loop_up: + lda $18,8($18) + bgt $18,$byte_loop_tail_up + unop + ret $31,($26),1 + + .align 4 +$misaligned_up: + nop + fnop + unop + beq $18,$egress + +$byte_loop_tail_up: + ldq_u $3,0($5) + ldq_u $2,0($4) + lda $18,-1($18) + extbl $3,$5,$1 + + insbl $1,$4,$1 + mskbl $2,$4,$2 + bis $1,$2,$1 + stq_u $1,0($4) + + lda $5,1($5) + lda $4,1($4) + nop + bgt $18,$byte_loop_tail_up + +$egress: + ret $31,($26),1 + nop + nop + nop + + .end memmove diff --git a/arch/alpha/lib/memset.S b/arch/alpha/lib/memset.S new file mode 100644 index 00000000..311b8cfc --- /dev/null +++ b/arch/alpha/lib/memset.S @@ -0,0 +1,124 @@ +/* + * linux/arch/alpha/lib/memset.S + * + * This is an efficient (and small) implementation of the C library "memset()" + * function for the alpha. + * + * (C) Copyright 1996 Linus Torvalds + * + * This routine is "moral-ware": you are free to use it any way you wish, and + * the only obligation I put on you is a moral one: if you make any improvements + * to the routine, please send me your improvements for me to use similarly. + * + * The scheduling comments are according to the EV5 documentation (and done by + * hand, so they might well be incorrect, please do tell me about it..) + */ + + .set noat + .set noreorder +.text + .globl memset + .globl __memset + .globl __memsetw + .globl __constant_c_memset + .ent __memset +.align 5 +__memset: + .frame $30,0,$26,0 + .prologue 0 + + and $17,255,$1 /* E1 */ + insbl $17,1,$17 /* .. E0 */ + bis $17,$1,$17 /* E0 (p-c latency, next cycle) */ + sll $17,16,$1 /* E1 (p-c latency, next cycle) */ + + bis $17,$1,$17 /* E0 (p-c latency, next cycle) */ + sll $17,32,$1 /* E1 (p-c latency, next cycle) */ + bis $17,$1,$17 /* E0 (p-c latency, next cycle) */ + ldq_u $31,0($30) /* .. E1 */ + +.align 5 +__constant_c_memset: + addq $18,$16,$6 /* E0 */ + bis $16,$16,$0 /* .. E1 */ + xor $16,$6,$1 /* E0 */ + ble $18,end /* .. E1 */ + + bic $1,7,$1 /* E0 */ + beq $1,within_one_quad /* .. E1 (note EV5 zero-latency forwarding) */ + and $16,7,$3 /* E0 */ + beq $3,aligned /* .. E1 (note EV5 zero-latency forwarding) */ + + ldq_u $4,0($16) /* E0 */ + bis $16,$16,$5 /* .. E1 */ + insql $17,$16,$2 /* E0 */ + subq $3,8,$3 /* .. E1 */ + + addq $18,$3,$18 /* E0 $18 is new count ($3 is negative) */ + mskql $4,$16,$4 /* .. E1 (and possible load stall) */ + subq $16,$3,$16 /* E0 $16 is new aligned destination */ + bis $2,$4,$1 /* .. E1 */ + + bis $31,$31,$31 /* E0 */ + ldq_u $31,0($30) /* .. E1 */ + stq_u $1,0($5) /* E0 */ + bis $31,$31,$31 /* .. E1 */ + +.align 4 +aligned: + sra $18,3,$3 /* E0 */ + and $18,7,$18 /* .. E1 */ + bis $16,$16,$5 /* E0 */ + beq $3,no_quad /* .. E1 */ + +.align 3 +loop: + stq $17,0($5) /* E0 */ + subq $3,1,$3 /* .. E1 */ + addq $5,8,$5 /* E0 */ + bne $3,loop /* .. E1 */ + +no_quad: + bis $31,$31,$31 /* E0 */ + beq $18,end /* .. E1 */ + ldq $7,0($5) /* E0 */ + mskqh $7,$6,$2 /* .. E1 (and load stall) */ + + insqh $17,$6,$4 /* E0 */ + bis $2,$4,$1 /* .. E1 */ + stq $1,0($5) /* E0 */ + ret $31,($26),1 /* .. E1 */ + +.align 3 +within_one_quad: + ldq_u $1,0($16) /* E0 */ + insql $17,$16,$2 /* E1 */ + mskql $1,$16,$4 /* E0 (after load stall) */ + bis $2,$4,$2 /* E0 */ + + mskql $2,$6,$4 /* E0 */ + mskqh $1,$6,$2 /* .. E1 */ + bis $2,$4,$1 /* E0 */ + stq_u $1,0($16) /* E0 */ + +end: + ret $31,($26),1 /* E1 */ + .end __memset + + .align 5 + .ent __memsetw +__memsetw: + .prologue 0 + + inswl $17,0,$1 /* E0 */ + inswl $17,2,$2 /* E0 */ + inswl $17,4,$3 /* E0 */ + or $1,$2,$1 /* .. E1 */ + inswl $17,6,$4 /* E0 */ + or $1,$3,$1 /* .. E1 */ + or $1,$4,$17 /* E0 */ + br __constant_c_memset /* .. E1 */ + + .end __memsetw + +memset = __memset diff --git a/arch/alpha/lib/srm_printk.c b/arch/alpha/lib/srm_printk.c new file mode 100644 index 00000000..31b53c49 --- /dev/null +++ b/arch/alpha/lib/srm_printk.c @@ -0,0 +1,41 @@ +/* + * arch/alpha/lib/srm_printk.c + */ + +#include <linux/kernel.h> +#include <asm/console.h> + +long +srm_printk(const char *fmt, ...) +{ + static char buf[1024]; + va_list args; + long len, num_lf; + char *src, *dst; + + va_start(args, fmt); + len = vsprintf(buf, fmt, args); + va_end(args); + + /* count number of linefeeds in string: */ + + num_lf = 0; + for (src = buf; *src; ++src) { + if (*src == '\n') { + ++num_lf; + } + } + + if (num_lf) { + /* expand each linefeed into carriage-return/linefeed: */ + for (dst = src + num_lf; src >= buf; ) { + if (*src == '\n') { + *dst-- = '\r'; + } + *dst-- = *src--; + } + } + + srm_puts(buf, num_lf+len); + return len; +} diff --git a/arch/alpha/lib/srm_puts.c b/arch/alpha/lib/srm_puts.c new file mode 100644 index 00000000..7b60a6f7 --- /dev/null +++ b/arch/alpha/lib/srm_puts.c @@ -0,0 +1,23 @@ +/* + * arch/alpha/lib/srm_puts.c + */ + +#include <linux/string.h> +#include <asm/console.h> + +long +srm_puts(const char *str, long len) +{ + long remaining, written; + + if (!callback_init_done) + return len; + + for (remaining = len; remaining > 0; remaining -= written) + { + written = callback_puts(0, str, remaining); + written &= 0xffffffff; + str += written; + } + return len; +} diff --git a/arch/alpha/lib/stacktrace.c b/arch/alpha/lib/stacktrace.c new file mode 100644 index 00000000..5e832161 --- /dev/null +++ b/arch/alpha/lib/stacktrace.c @@ -0,0 +1,102 @@ +#include <linux/kernel.h> + +typedef unsigned int instr; + +#define MAJOR_OP 0xfc000000 +#define LDA_OP 0x20000000 +#define STQ_OP 0xb4000000 +#define BR_OP 0xc0000000 + +#define STK_ALLOC_1 0x23de8000 /* lda $30,-X($30) */ +#define STK_ALLOC_1M 0xffff8000 +#define STK_ALLOC_2 0x43c0153e /* subq $30,X,$30 */ +#define STK_ALLOC_2M 0xffe01fff + +#define MEM_REG 0x03e00000 +#define MEM_BASE 0x001f0000 +#define MEM_OFF 0x0000ffff +#define MEM_OFF_SIGN 0x00008000 +#define BASE_SP 0x001e0000 + +#define STK_ALLOC_MATCH(INSTR) \ + (((INSTR) & STK_ALLOC_1M) == STK_ALLOC_1 \ + || ((INSTR) & STK_ALLOC_2M) == STK_ALLOC_2) +#define STK_PUSH_MATCH(INSTR) \ + (((INSTR) & (MAJOR_OP | MEM_BASE | MEM_OFF_SIGN)) == (STQ_OP | BASE_SP)) +#define MEM_OP_OFFSET(INSTR) \ + (((long)((INSTR) & MEM_OFF) << 48) >> 48) +#define MEM_OP_REG(INSTR) \ + (((INSTR) & MEM_REG) >> 22) + +/* Branches, jumps, PAL calls, and illegal opcodes end a basic block. */ +#define BB_END(INSTR) \ + (((instr)(INSTR) >= BR_OP) | ((instr)(INSTR) < LDA_OP) | \ + ((((instr)(INSTR) ^ 0x60000000) < 0x20000000) & \ + (((instr)(INSTR) & 0x0c000000) != 0))) + +#define IS_KERNEL_TEXT(PC) ((unsigned long)(PC) > START_ADDR) + +static char reg_name[][4] = { + "v0 ", "t0 ", "t1 ", "t2 ", "t3 ", "t4 ", "t5 ", "t6 ", "t7 ", + "s0 ", "s1 ", "s2 ", "s3 ", "s4 ", "s5 ", "s6 ", "a0 ", "a1 ", + "a2 ", "a3 ", "a4 ", "a5 ", "t8 ", "t9 ", "t10", "t11", "ra ", + "pv ", "at ", "gp ", "sp ", "0" +}; + + +static instr * +display_stored_regs(instr * pro_pc, unsigned char * sp) +{ + instr * ret_pc = 0; + int reg; + unsigned long value; + + printk("Prologue [<%p>], Frame %p:\n", pro_pc, sp); + while (!BB_END(*pro_pc)) + if (STK_PUSH_MATCH(*pro_pc)) { + reg = (*pro_pc & MEM_REG) >> 21; + value = *(unsigned long *)(sp + (*pro_pc & MEM_OFF)); + if (reg == 26) + ret_pc = (instr *)value; + printk("\t\t%s / 0x%016lx\n", reg_name[reg], value); + } + return ret_pc; +} + +static instr * +seek_prologue(instr * pc) +{ + while (!STK_ALLOC_MATCH(*pc)) + --pc; + while (!BB_END(*(pc - 1))) + --pc; + return pc; +} + +static long +stack_increment(instr * prologue_pc) +{ + while (!STK_ALLOC_MATCH(*prologue_pc)) + ++prologue_pc; + + /* Count the bytes allocated. */ + if ((*prologue_pc & STK_ALLOC_1M) == STK_ALLOC_1M) + return -(((long)(*prologue_pc) << 48) >> 48); + else + return (*prologue_pc >> 13) & 0xff; +} + +void +stacktrace(void) +{ + instr * ret_pc; + instr * prologue = (instr *)stacktrace; + register unsigned char * sp __asm__ ("$30"); + + printk("\tstack trace:\n"); + do { + ret_pc = display_stored_regs(prologue, sp); + sp += stack_increment(prologue); + prologue = seek_prologue(ret_pc); + } while (IS_KERNEL_TEXT(ret_pc)); +} diff --git a/arch/alpha/lib/strcat.S b/arch/alpha/lib/strcat.S new file mode 100644 index 00000000..393f5038 --- /dev/null +++ b/arch/alpha/lib/strcat.S @@ -0,0 +1,52 @@ +/* + * arch/alpha/lib/strcat.S + * Contributed by Richard Henderson (rth@tamu.edu) + * + * Append a null-terminated string from SRC to DST. + */ + + .text + + .align 3 + .globl strcat + .ent strcat +strcat: + .frame $30, 0, $26 + .prologue 0 + + mov $16, $0 # set up return value + + /* Find the end of the string. */ + + ldq_u $1, 0($16) # load first quadword (a0 may be misaligned) + lda $2, -1 + insqh $2, $16, $2 + andnot $16, 7, $16 + or $2, $1, $1 + cmpbge $31, $1, $2 # bits set iff byte == 0 + bne $2, $found + +$loop: ldq $1, 8($16) + addq $16, 8, $16 + cmpbge $31, $1, $2 + beq $2, $loop + +$found: negq $2, $3 # clear all but least set bit + and $2, $3, $2 + + and $2, 0xf0, $3 # binary search for that set bit + and $2, 0xcc, $4 + and $2, 0xaa, $5 + cmovne $3, 4, $3 + cmovne $4, 2, $4 + cmovne $5, 1, $5 + addq $3, $4, $3 + addq $16, $5, $16 + addq $16, $3, $16 + + /* Now do the append. */ + + mov $26, $23 + br __stxcpy + + .end strcat diff --git a/arch/alpha/lib/strchr.S b/arch/alpha/lib/strchr.S new file mode 100644 index 00000000..011a175e --- /dev/null +++ b/arch/alpha/lib/strchr.S @@ -0,0 +1,70 @@ +/* + * arch/alpha/lib/strchr.S + * Contributed by Richard Henderson (rth@tamu.edu) + * + * Return the address of a given character within a null-terminated + * string, or null if it is not found. + */ + +#include <asm/regdef.h> + + .set noreorder + .set noat + + .align 3 + .globl strchr + .ent strchr +strchr: + .frame sp, 0, ra + .prologue 0 + + zapnot a1, 1, a1 # e0 : zero extend the search character + ldq_u t0, 0(a0) # .. e1 : load first quadword + sll a1, 8, t5 # e0 : replicate the search character + andnot a0, 7, v0 # .. e1 : align our loop pointer + or t5, a1, a1 # e0 : + lda t4, -1 # .. e1 : build garbage mask + sll a1, 16, t5 # e0 : + cmpbge zero, t0, t2 # .. e1 : bits set iff byte == zero + mskqh t4, a0, t4 # e0 : + or t5, a1, a1 # .. e1 : + sll a1, 32, t5 # e0 : + cmpbge zero, t4, t4 # .. e1 : bits set iff byte is garbage + or t5, a1, a1 # e0 : + xor t0, a1, t1 # .. e1 : make bytes == c zero + cmpbge zero, t1, t3 # e0 : bits set iff byte == c + or t2, t3, t0 # e1 : bits set iff char match or zero match + andnot t0, t4, t0 # e0 : clear garbage bits + bne t0, $found # .. e1 (zdb) + +$loop: ldq t0, 8(v0) # e0 : + addq v0, 8, v0 # .. e1 : + nop # e0 : + xor t0, a1, t1 # .. e1 (ev5 data stall) + cmpbge zero, t0, t2 # e0 : bits set iff byte == 0 + cmpbge zero, t1, t3 # .. e1 : bits set iff byte == c + or t2, t3, t0 # e0 : + beq t0, $loop # .. e1 (zdb) + +$found: negq t0, t1 # e0 : clear all but least set bit + and t0, t1, t0 # e1 (stall) + + and t0, t3, t1 # e0 : bit set iff byte was the char + beq t1, $retnull # .. e1 (zdb) + + and t0, 0xf0, t2 # e0 : binary search for that set bit + and t0, 0xcc, t3 # .. e1 : + and t0, 0xaa, t4 # e0 : + cmovne t2, 4, t2 # .. e1 : + cmovne t3, 2, t3 # e0 : + cmovne t4, 1, t4 # .. e1 : + addq t2, t3, t2 # e0 : + addq v0, t4, v0 # .. e1 : + addq v0, t2, v0 # e0 : + ret # .. e1 : + +$retnull: + mov zero, v0 # e0 : + ret # .. e1 : + + .end strchr diff --git a/arch/alpha/lib/strcpy.S b/arch/alpha/lib/strcpy.S new file mode 100644 index 00000000..e0728e4a --- /dev/null +++ b/arch/alpha/lib/strcpy.S @@ -0,0 +1,23 @@ +/* + * arch/alpha/lib/strcpy.S + * Contributed by Richard Henderson (rth@tamu.edu) + * + * Copy a null-terminated string from SRC to DST. Return a pointer + * to the null-terminator in the source. + */ + + .text + + .align 3 + .globl strcpy + .ent strcpy +strcpy: + .frame $30, 0, $26 + .prologue 0 + + mov $16, $0 # set up return value + mov $26, $23 # set up return address + unop + br __stxcpy # do the copy + + .end strcpy diff --git a/arch/alpha/lib/strlen.S b/arch/alpha/lib/strlen.S new file mode 100644 index 00000000..fe63353d --- /dev/null +++ b/arch/alpha/lib/strlen.S @@ -0,0 +1,57 @@ +/* + * strlen.S (c) 1995 David Mosberger (davidm@cs.arizona.edu) + * + * Finds length of a 0-terminated string. Optimized for the + * Alpha architecture: + * + * - memory accessed as aligned quadwords only + * - uses bcmpge to compare 8 bytes in parallel + * - does binary search to find 0 byte in last + * quadword (HAKMEM needed 12 instructions to + * do this instead of the 9 instructions that + * binary search needs). + */ + + .set noreorder + .set noat + + .align 3 + + .globl strlen + .ent strlen + +strlen: + ldq_u $1, 0($16) # load first quadword ($16 may be misaligned) + lda $2, -1($31) + insqh $2, $16, $2 + andnot $16, 7, $0 + or $2, $1, $1 + cmpbge $31, $1, $2 # $2 <- bitmask: bit i == 1 <==> i-th byte == 0 + bne $2, found + +loop: ldq $1, 8($0) + addq $0, 8, $0 # addr += 8 + nop # helps dual issue last two insns + cmpbge $31, $1, $2 + beq $2, loop + +found: blbs $2, done # make aligned case fast + negq $2, $3 + and $2, $3, $2 + + and $2, 0x0f, $1 + addq $0, 4, $3 + cmoveq $1, $3, $0 + + and $2, 0x33, $1 + addq $0, 2, $3 + cmoveq $1, $3, $0 + + and $2, 0x55, $1 + addq $0, 1, $3 + cmoveq $1, $3, $0 + +done: subq $0, $16, $0 + ret $31, ($26) + + .end strlen diff --git a/arch/alpha/lib/strlen_user.S b/arch/alpha/lib/strlen_user.S new file mode 100644 index 00000000..508a18e9 --- /dev/null +++ b/arch/alpha/lib/strlen_user.S @@ -0,0 +1,91 @@ +/* + * arch/alpha/lib/strlen_user.S + * + * Return the length of the string including the NUL terminator + * (strlen+1) or zero if an error occurred. + * + * In places where it is critical to limit the processing time, + * and the data is not trusted, strnlen_user() should be used. + * It will return a value greater than its second argument if + * that limit would be exceeded. This implementation is allowed + * to access memory beyond the limit, but will not cross a page + * boundary when doing so. + */ + +#include <asm/regdef.h> + + +/* Allow an exception for an insn; exit if we get one. */ +#define EX(x,y...) \ + 99: x,##y; \ + .section __ex_table,"a"; \ + .long 99b - .; \ + lda v0, $exception-99b(zero); \ + .previous + + + .set noreorder + .set noat + .text + + .globl __strlen_user + .ent __strlen_user + .frame sp, 0, ra + + .align 3 +__strlen_user: + ldah a1, 32767(zero) # do not use plain strlen_user() for strings + # that might be almost 2 GB long; you should + # be using strnlen_user() instead + + .globl __strnlen_user + + .align 3 +__strnlen_user: + .prologue 0 + + EX( ldq_u t0, 0(a0) ) # load first quadword (a0 may be misaligned) + lda t1, -1(zero) + insqh t1, a0, t1 + andnot a0, 7, v0 + or t1, t0, t0 + subq a0, 1, a0 # get our +1 for the return + cmpbge zero, t0, t1 # t1 <- bitmask: bit i == 1 <==> i-th byte == 0 + subq a1, 7, t2 + subq a0, v0, t0 + bne t1, $found + + addq t2, t0, t2 + addq a1, 1, a1 + + .align 3 +$loop: ble t2, $limit + EX( ldq t0, 8(v0) ) + subq t2, 8, t2 + addq v0, 8, v0 # addr += 8 + cmpbge zero, t0, t1 + beq t1, $loop + +$found: negq t1, t2 # clear all but least set bit + and t1, t2, t1 + + and t1, 0xf0, t2 # binary search for that set bit + and t1, 0xcc, t3 + and t1, 0xaa, t4 + cmovne t2, 4, t2 + cmovne t3, 2, t3 + cmovne t4, 1, t4 + addq t2, t3, t2 + addq v0, t4, v0 + addq v0, t2, v0 + nop # dual issue next two on ev4 and ev5 + subq v0, a0, v0 +$exception: + ret + + .align 3 # currently redundant +$limit: + subq a1, t2, v0 + ret + + .end __strlen_user diff --git a/arch/alpha/lib/strncat.S b/arch/alpha/lib/strncat.S new file mode 100644 index 00000000..a8278163 --- /dev/null +++ b/arch/alpha/lib/strncat.S @@ -0,0 +1,84 @@ +/* + * arch/alpha/lib/strncat.S + * Contributed by Richard Henderson (rth@tamu.edu) + * + * Append no more than COUNT characters from the null-terminated string SRC + * to the null-terminated string DST. Always null-terminate the new DST. + * + * This differs slightly from the semantics in libc in that we never write + * past count, whereas libc may write to count+1. This follows the generic + * implementation in lib/string.c and is, IMHO, more sensible. + */ + + .text + + .align 3 + .globl strncat + .ent strncat +strncat: + .frame $30, 0, $26 + .prologue 0 + + mov $16, $0 # set up return value + beq $18, $zerocount + + /* Find the end of the string. */ + + ldq_u $1, 0($16) # load first quadword ($16 may be misaligned) + lda $2, -1($31) + insqh $2, $16, $2 + andnot $16, 7, $16 + or $2, $1, $1 + cmpbge $31, $1, $2 # bits set iff byte == 0 + bne $2, $found + +$loop: ldq $1, 8($16) + addq $16, 8, $16 + cmpbge $31, $1, $2 + beq $2, $loop + +$found: negq $2, $3 # clear all but least set bit + and $2, $3, $2 + + and $2, 0xf0, $3 # binary search for that set bit + and $2, 0xcc, $4 + and $2, 0xaa, $5 + cmovne $3, 4, $3 + cmovne $4, 2, $4 + cmovne $5, 1, $5 + addq $3, $4, $3 + addq $16, $5, $16 + addq $16, $3, $16 + + /* Now do the append. */ + + bsr $23, __stxncpy + + /* Worry about the null termination. */ + + zapnot $1, $27, $2 # was last byte a null? + bne $2, 0f + ret + +0: cmplt $27, $24, $2 # did we fill the buffer completely? + or $2, $18, $2 + bne $2, 2f + + and $24, 0x80, $2 # no zero next byte + bne $2, 1f + + /* Here there are bytes left in the current word. Clear one. */ + addq $24, $24, $24 # end-of-count bit <<= 1 +2: zap $1, $24, $1 + stq_u $1, 0($16) + ret + +1: /* Here we must read the next DST word and clear the first byte. */ + ldq_u $1, 8($16) + zap $1, 1, $1 + stq_u $1, 8($16) + +$zerocount: + ret + + .end strncat diff --git a/arch/alpha/lib/strncpy.S b/arch/alpha/lib/strncpy.S new file mode 100644 index 00000000..a46f7f3a --- /dev/null +++ b/arch/alpha/lib/strncpy.S @@ -0,0 +1,81 @@ +/* + * arch/alpha/lib/strncpy.S + * Contributed by Richard Henderson (rth@tamu.edu) + * + * Copy no more than COUNT bytes of the null-terminated string from + * SRC to DST. If SRC does not cover all of COUNT, the balance is + * zeroed. + * + * Or, rather, if the kernel cared about that weird ANSI quirk. This + * version has cropped that bit o' nastiness as well as assuming that + * __stxncpy is in range of a branch. + */ + + .set noat + .set noreorder + + .text + + .align 4 + .globl strncpy + .ent strncpy +strncpy: + .frame $30, 0, $26 + .prologue 0 + + mov $16, $0 # set return value now + beq $18, $zerolen + unop + bsr $23, __stxncpy # do the work of the copy + + unop + bne $18, $multiword # do we have full words left? + subq $24, 1, $3 # nope + subq $27, 1, $4 + + or $3, $24, $3 # clear the bits between the last + or $4, $27, $4 # written byte and the last byte in COUNT + andnot $3, $4, $4 + zap $1, $4, $1 + + stq_u $1, 0($16) + ret + + .align 4 +$multiword: + subq $27, 1, $2 # clear the final bits in the prev word + or $2, $27, $2 + zapnot $1, $2, $1 + subq $18, 1, $18 + + stq_u $1, 0($16) + addq $16, 8, $16 + unop + beq $18, 1f + + nop + unop + nop + blbc $18, 0f + + stq_u $31, 0($16) # zero one word + subq $18, 1, $18 + addq $16, 8, $16 + beq $18, 1f + +0: stq_u $31, 0($16) # zero two words + subq $18, 2, $18 + stq_u $31, 8($16) + addq $16, 16, $16 + bne $18, 0b + +1: ldq_u $1, 0($16) # clear the leading bits in the final word + subq $24, 1, $2 + or $2, $24, $2 + + zap $1, $2, $1 + stq_u $1, 0($16) +$zerolen: + ret + + .end strncpy diff --git a/arch/alpha/lib/strncpy_from_user.S b/arch/alpha/lib/strncpy_from_user.S new file mode 100644 index 00000000..73ee2116 --- /dev/null +++ b/arch/alpha/lib/strncpy_from_user.S @@ -0,0 +1,339 @@ +/* + * arch/alpha/lib/strncpy_from_user.S + * Contributed by Richard Henderson (rth@tamu.edu) + * + * Just like strncpy except in the return value: + * + * -EFAULT if an exception occurs before the terminator is copied. + * N if the buffer filled. + * + * Otherwise the length of the string is returned. + */ + + +#include <asm/errno.h> +#include <asm/regdef.h> + + +/* Allow an exception for an insn; exit if we get one. */ +#define EX(x,y...) \ + 99: x,##y; \ + .section __ex_table,"a"; \ + .long 99b - .; \ + lda $31, $exception-99b($0); \ + .previous + + + .set noat + .set noreorder + .text + + .globl __strncpy_from_user + .ent __strncpy_from_user + .frame $30, 0, $26 + .prologue 0 + + .align 3 +$aligned: + /* On entry to this basic block: + t0 == the first destination word for masking back in + t1 == the first source word. */ + + /* Create the 1st output word and detect 0's in the 1st input word. */ + lda t2, -1 # e1 : build a mask against false zero + mskqh t2, a1, t2 # e0 : detection in the src word + mskqh t1, a1, t3 # e0 : + ornot t1, t2, t2 # .. e1 : + mskql t0, a1, t0 # e0 : assemble the first output word + cmpbge zero, t2, t8 # .. e1 : bits set iff null found + or t0, t3, t0 # e0 : + beq a2, $a_eoc # .. e1 : + bne t8, $a_eos # .. e1 : + + /* On entry to this basic block: + t0 == a source word not containing a null. */ + +$a_loop: + stq_u t0, 0(a0) # e0 : + addq a0, 8, a0 # .. e1 : + EX( ldq_u t0, 0(a1) ) # e0 : + addq a1, 8, a1 # .. e1 : + subq a2, 1, a2 # e0 : + cmpbge zero, t0, t8 # .. e1 (stall) + beq a2, $a_eoc # e1 : + beq t8, $a_loop # e1 : + + /* Take care of the final (partial) word store. At this point + the end-of-count bit is set in t8 iff it applies. + + On entry to this basic block we have: + t0 == the source word containing the null + t8 == the cmpbge mask that found it. */ + +$a_eos: + negq t8, t12 # e0 : find low bit set + and t8, t12, t12 # e1 (stall) + + /* For the sake of the cache, don't read a destination word + if we're not going to need it. */ + and t12, 0x80, t6 # e0 : + bne t6, 1f # .. e1 (zdb) + + /* We're doing a partial word store and so need to combine + our source and original destination words. */ + ldq_u t1, 0(a0) # e0 : + subq t12, 1, t6 # .. e1 : + or t12, t6, t8 # e0 : + unop # + zapnot t0, t8, t0 # e0 : clear src bytes > null + zap t1, t8, t1 # .. e1 : clear dst bytes <= null + or t0, t1, t0 # e1 : + +1: stq_u t0, 0(a0) + br $finish_up + + /* Add the end-of-count bit to the eos detection bitmask. */ +$a_eoc: + or t10, t8, t8 + br $a_eos + + /*** The Function Entry Point ***/ + .align 3 +__strncpy_from_user: + mov a0, v0 # save the string start + beq a2, $zerolength + + /* Are source and destination co-aligned? */ + xor a0, a1, t1 # e0 : + and a0, 7, t0 # .. e1 : find dest misalignment + and t1, 7, t1 # e0 : + addq a2, t0, a2 # .. e1 : bias count by dest misalignment + subq a2, 1, a2 # e0 : + and a2, 7, t2 # e1 : + srl a2, 3, a2 # e0 : a2 = loop counter = (count - 1)/8 + addq zero, 1, t10 # .. e1 : + sll t10, t2, t10 # e0 : t10 = bitmask of last count byte + bne t1, $unaligned # .. e1 : + + /* We are co-aligned; take care of a partial first word. */ + + EX( ldq_u t1, 0(a1) ) # e0 : load first src word + addq a1, 8, a1 # .. e1 : + + beq t0, $aligned # avoid loading dest word if not needed + ldq_u t0, 0(a0) # e0 : + br $aligned # .. e1 : + + +/* The source and destination are not co-aligned. Align the destination + and cope. We have to be very careful about not reading too much and + causing a SEGV. */ + + .align 3 +$u_head: + /* We know just enough now to be able to assemble the first + full source word. We can still find a zero at the end of it + that prevents us from outputting the whole thing. + + On entry to this basic block: + t0 == the first dest word, unmasked + t1 == the shifted low bits of the first source word + t6 == bytemask that is -1 in dest word bytes */ + + EX( ldq_u t2, 8(a1) ) # e0 : load second src word + addq a1, 8, a1 # .. e1 : + mskql t0, a0, t0 # e0 : mask trailing garbage in dst + extqh t2, a1, t4 # e0 : + or t1, t4, t1 # e1 : first aligned src word complete + mskqh t1, a0, t1 # e0 : mask leading garbage in src + or t0, t1, t0 # e0 : first output word complete + or t0, t6, t6 # e1 : mask original data for zero test + cmpbge zero, t6, t8 # e0 : + beq a2, $u_eocfin # .. e1 : + bne t8, $u_final # e1 : + + lda t6, -1 # e1 : mask out the bits we have + mskql t6, a1, t6 # e0 : already seen + stq_u t0, 0(a0) # e0 : store first output word + or t6, t2, t2 # .. e1 : + cmpbge zero, t2, t8 # e0 : find nulls in second partial + addq a0, 8, a0 # .. e1 : + subq a2, 1, a2 # e0 : + bne t8, $u_late_head_exit # .. e1 : + + /* Finally, we've got all the stupid leading edge cases taken care + of and we can set up to enter the main loop. */ + + extql t2, a1, t1 # e0 : position hi-bits of lo word + EX( ldq_u t2, 8(a1) ) # .. e1 : read next high-order source word + addq a1, 8, a1 # e0 : + cmpbge zero, t2, t8 # e1 (stall) + beq a2, $u_eoc # e1 : + bne t8, $u_eos # e1 : + + /* Unaligned copy main loop. In order to avoid reading too much, + the loop is structured to detect zeros in aligned source words. + This has, unfortunately, effectively pulled half of a loop + iteration out into the head and half into the tail, but it does + prevent nastiness from accumulating in the very thing we want + to run as fast as possible. + + On entry to this basic block: + t1 == the shifted high-order bits from the previous source word + t2 == the unshifted current source word + + We further know that t2 does not contain a null terminator. */ + + .align 3 +$u_loop: + extqh t2, a1, t0 # e0 : extract high bits for current word + addq a1, 8, a1 # .. e1 : + extql t2, a1, t3 # e0 : extract low bits for next time + addq a0, 8, a0 # .. e1 : + or t0, t1, t0 # e0 : current dst word now complete + EX( ldq_u t2, 0(a1) ) # .. e1 : load high word for next time + stq_u t0, -8(a0) # e0 : save the current word + mov t3, t1 # .. e1 : + subq a2, 1, a2 # e0 : + cmpbge zero, t2, t8 # .. e1 : test new word for eos + beq a2, $u_eoc # e1 : + beq t8, $u_loop # e1 : + + /* We've found a zero somewhere in the source word we just read. + If it resides in the lower half, we have one (probably partial) + word to write out, and if it resides in the upper half, we + have one full and one partial word left to write out. + + On entry to this basic block: + t1 == the shifted high-order bits from the previous source word + t2 == the unshifted current source word. */ +$u_eos: + extqh t2, a1, t0 # e0 : + or t0, t1, t0 # e1 : first (partial) source word complete + + cmpbge zero, t0, t8 # e0 : is the null in this first bit? + bne t8, $u_final # .. e1 (zdb) + + stq_u t0, 0(a0) # e0 : the null was in the high-order bits + addq a0, 8, a0 # .. e1 : + subq a2, 1, a2 # e1 : + +$u_late_head_exit: + extql t2, a1, t0 # .. e0 : + cmpbge zero, t0, t8 # e0 : + or t8, t10, t6 # e1 : + cmoveq a2, t6, t8 # e0 : + nop # .. e1 : + + /* Take care of a final (probably partial) result word. + On entry to this basic block: + t0 == assembled source word + t8 == cmpbge mask that found the null. */ +$u_final: + negq t8, t6 # e0 : isolate low bit set + and t6, t8, t12 # e1 : + + and t12, 0x80, t6 # e0 : avoid dest word load if we can + bne t6, 1f # .. e1 (zdb) + + ldq_u t1, 0(a0) # e0 : + subq t12, 1, t6 # .. e1 : + or t6, t12, t8 # e0 : + zapnot t0, t8, t0 # .. e1 : kill source bytes > null + zap t1, t8, t1 # e0 : kill dest bytes <= null + or t0, t1, t0 # e1 : + +1: stq_u t0, 0(a0) # e0 : + br $finish_up + +$u_eoc: # end-of-count + extqh t2, a1, t0 + or t0, t1, t0 + cmpbge zero, t0, t8 + +$u_eocfin: # end-of-count, final word + or t10, t8, t8 + br $u_final + + /* Unaligned copy entry point. */ + .align 3 +$unaligned: + + EX( ldq_u t1, 0(a1) ) # e0 : load first source word + + and a0, 7, t4 # .. e1 : find dest misalignment + and a1, 7, t5 # e0 : find src misalignment + + /* Conditionally load the first destination word and a bytemask + with 0xff indicating that the destination byte is sacrosanct. */ + + mov zero, t0 # .. e1 : + mov zero, t6 # e0 : + beq t4, 1f # .. e1 : + ldq_u t0, 0(a0) # e0 : + lda t6, -1 # .. e1 : + mskql t6, a0, t6 # e0 : +1: + subq a1, t4, a1 # .. e1 : sub dest misalignment from src addr + + /* If source misalignment is larger than dest misalignment, we need + extra startup checks to avoid SEGV. */ + + cmplt t4, t5, t12 # e1 : + extql t1, a1, t1 # .. e0 : shift src into place + lda t2, -1 # e0 : for creating masks later + beq t12, $u_head # e1 : + + mskqh t2, t5, t2 # e0 : begin src byte validity mask + cmpbge zero, t1, t8 # .. e1 : is there a zero? + extql t2, a1, t2 # e0 : + or t8, t10, t5 # .. e1 : test for end-of-count too + cmpbge zero, t2, t3 # e0 : + cmoveq a2, t5, t8 # .. e1 : + andnot t8, t3, t8 # e0 : + beq t8, $u_head # .. e1 (zdb) + + /* At this point we've found a zero in the first partial word of + the source. We need to isolate the valid source data and mask + it into the original destination data. (Incidentally, we know + that we'll need at least one byte of that original dest word.) */ + + ldq_u t0, 0(a0) # e0 : + negq t8, t6 # .. e1 : build bitmask of bytes <= zero + mskqh t1, t4, t1 # e0 : + and t6, t8, t12 # .. e1 : + subq t12, 1, t6 # e0 : + or t6, t12, t8 # e1 : + + zapnot t2, t8, t2 # e0 : prepare source word; mirror changes + zapnot t1, t8, t1 # .. e1 : to source validity mask + + andnot t0, t2, t0 # e0 : zero place for source to reside + or t0, t1, t0 # e1 : and put it there + stq_u t0, 0(a0) # e0 : + +$finish_up: + zapnot t0, t12, t4 # was last byte written null? + cmovne t4, 1, t4 + + and t12, 0xf0, t3 # binary search for the address of the + and t12, 0xcc, t2 # last byte written + and t12, 0xaa, t1 + bic a0, 7, t0 + cmovne t3, 4, t3 + cmovne t2, 2, t2 + cmovne t1, 1, t1 + addq t0, t3, t0 + addq t1, t2, t1 + addq t0, t1, t0 + addq t0, t4, t0 # add one if we filled the buffer + + subq t0, v0, v0 # find string length + ret + +$zerolength: + clr v0 +$exception: + ret + + .end __strncpy_from_user diff --git a/arch/alpha/lib/strrchr.S b/arch/alpha/lib/strrchr.S new file mode 100644 index 00000000..1970dc07 --- /dev/null +++ b/arch/alpha/lib/strrchr.S @@ -0,0 +1,87 @@ +/* + * arch/alpha/lib/strrchr.S + * Contributed by Richard Henderson (rth@tamu.edu) + * + * Return the address of the last occurrence of a given character + * within a null-terminated string, or null if it is not found. + */ + +#include <asm/regdef.h> + + .set noreorder + .set noat + + .align 3 + .ent strrchr + .globl strrchr +strrchr: + .frame sp, 0, ra + .prologue 0 + + zapnot a1, 1, a1 # e0 : zero extend our test character + mov zero, t6 # .. e1 : t6 is last match aligned addr + sll a1, 8, t5 # e0 : replicate our test character + mov zero, t8 # .. e1 : t8 is last match byte compare mask + or t5, a1, a1 # e0 : + ldq_u t0, 0(a0) # .. e1 : load first quadword + sll a1, 16, t5 # e0 : + andnot a0, 7, v0 # .. e1 : align source addr + or t5, a1, a1 # e0 : + lda t4, -1 # .. e1 : build garbage mask + sll a1, 32, t5 # e0 : + cmpbge zero, t0, t1 # .. e1 : bits set iff byte == zero + mskqh t4, a0, t4 # e0 : + or t5, a1, a1 # .. e1 : character replication complete + xor t0, a1, t2 # e0 : make bytes == c zero + cmpbge zero, t4, t4 # .. e1 : bits set iff byte is garbage + cmpbge zero, t2, t3 # e0 : bits set iff byte == c + andnot t1, t4, t1 # .. e1 : clear garbage from null test + andnot t3, t4, t3 # e0 : clear garbage from char test + bne t1, $eos # .. e1 : did we already hit the terminator? + + /* Character search main loop */ +$loop: + ldq t0, 8(v0) # e0 : load next quadword + cmovne t3, v0, t6 # .. e1 : save previous comparisons match + cmovne t3, t3, t8 # e0 : + addq v0, 8, v0 # .. e1 : + xor t0, a1, t2 # e0 : + cmpbge zero, t0, t1 # .. e1 : bits set iff byte == zero + cmpbge zero, t2, t3 # e0 : bits set iff byte == c + beq t1, $loop # .. e1 : if we havnt seen a null, loop + + /* Mask out character matches after terminator */ +$eos: + negq t1, t4 # e0 : isolate first null byte match + and t1, t4, t4 # e1 : + subq t4, 1, t5 # e0 : build a mask of the bytes up to... + or t4, t5, t4 # e1 : ... and including the null + + and t3, t4, t3 # e0 : mask out char matches after null + cmovne t3, t3, t8 # .. e1 : save it, if match found + cmovne t3, v0, t6 # e0 : + + /* Locate the address of the last matched character */ + + /* Retain the early exit for the ev4 -- the ev5 mispredict penalty + is 5 cycles -- the same as just falling through. */ + beq t8, $retnull # .. e1 : + + and t8, 0xf0, t2 # e0 : binary search for the high bit set + cmovne t2, t2, t8 # .. e1 (zdb) + cmovne t2, 4, t2 # e0 : + and t8, 0xcc, t1 # .. e1 : + cmovne t1, t1, t8 # e0 : + cmovne t1, 2, t1 # .. e1 : + and t8, 0xaa, t0 # e0 : + cmovne t0, 1, t0 # .. e1 (zdb) + addq t2, t1, t1 # e0 : + addq t6, t0, v0 # .. e1 : add our aligned base ptr to the mix + addq v0, t1, v0 # e0 : + ret # .. e1 : + +$retnull: + mov zero, v0 # e0 : + ret # .. e1 : + + .end strrchr diff --git a/arch/alpha/lib/stxcpy.S b/arch/alpha/lib/stxcpy.S new file mode 100644 index 00000000..2a8d51bf --- /dev/null +++ b/arch/alpha/lib/stxcpy.S @@ -0,0 +1,289 @@ +/* + * arch/alpha/lib/stxcpy.S + * Contributed by Richard Henderson (rth@tamu.edu) + * + * Copy a null-terminated string from SRC to DST. + * + * This is an internal routine used by strcpy, stpcpy, and strcat. + * As such, it uses special linkage conventions to make implementation + * of these public functions more efficient. + * + * On input: + * t9 = return address + * a0 = DST + * a1 = SRC + * + * On output: + * t12 = bitmask (with one bit set) indicating the last byte written + * a0 = unaligned address of the last *word* written + * + * Furthermore, v0, a3-a5, t11, and t12 are untouched. + */ + +#include <asm/regdef.h> + + .set noat + .set noreorder + + .text + +/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that + doesn't like putting the entry point for a procedure somewhere in the + middle of the procedure descriptor. Work around this by putting the + aligned copy in its own procedure descriptor */ + + .ent stxcpy_aligned + .align 3 +stxcpy_aligned: + .frame sp, 0, t9 + .prologue 0 + + /* On entry to this basic block: + t0 == the first destination word for masking back in + t1 == the first source word. */ + + /* Create the 1st output word and detect 0's in the 1st input word. */ + lda t2, -1 # e1 : build a mask against false zero + mskqh t2, a1, t2 # e0 : detection in the src word + mskqh t1, a1, t3 # e0 : + ornot t1, t2, t2 # .. e1 : + mskql t0, a1, t0 # e0 : assemble the first output word + cmpbge zero, t2, t8 # .. e1 : bits set iff null found + or t0, t3, t1 # e0 : + bne t8, $a_eos # .. e1 : + + /* On entry to this basic block: + t0 == the first destination word for masking back in + t1 == a source word not containing a null. */ + +$a_loop: + stq_u t1, 0(a0) # e0 : + addq a0, 8, a0 # .. e1 : + ldq_u t1, 0(a1) # e0 : + addq a1, 8, a1 # .. e1 : + cmpbge zero, t1, t8 # e0 (stall) + beq t8, $a_loop # .. e1 (zdb) + + /* Take care of the final (partial) word store. + On entry to this basic block we have: + t1 == the source word containing the null + t8 == the cmpbge mask that found it. */ +$a_eos: + negq t8, t6 # e0 : find low bit set + and t8, t6, t12 # e1 (stall) + + /* For the sake of the cache, don't read a destination word + if we're not going to need it. */ + and t12, 0x80, t6 # e0 : + bne t6, 1f # .. e1 (zdb) + + /* We're doing a partial word store and so need to combine + our source and original destination words. */ + ldq_u t0, 0(a0) # e0 : + subq t12, 1, t6 # .. e1 : + zapnot t1, t6, t1 # e0 : clear src bytes >= null + or t12, t6, t8 # .. e1 : + zap t0, t8, t0 # e0 : clear dst bytes <= null + or t0, t1, t1 # e1 : + +1: stq_u t1, 0(a0) # e0 : + ret (t9) # .. e1 : + + .end stxcpy_aligned + + .align 3 + .ent __stxcpy + .globl __stxcpy +__stxcpy: + .frame sp, 0, t9 + .prologue 0 + + /* Are source and destination co-aligned? */ + xor a0, a1, t0 # e0 : + unop # : + and t0, 7, t0 # e0 : + bne t0, $unaligned # .. e1 : + + /* We are co-aligned; take care of a partial first word. */ + ldq_u t1, 0(a1) # e0 : load first src word + and a0, 7, t0 # .. e1 : take care not to load a word ... + addq a1, 8, a1 # e0 : + beq t0, stxcpy_aligned # .. e1 : ... if we wont need it + ldq_u t0, 0(a0) # e0 : + br stxcpy_aligned # .. e1 : + + +/* The source and destination are not co-aligned. Align the destination + and cope. We have to be very careful about not reading too much and + causing a SEGV. */ + + .align 3 +$u_head: + /* We know just enough now to be able to assemble the first + full source word. We can still find a zero at the end of it + that prevents us from outputting the whole thing. + + On entry to this basic block: + t0 == the first dest word, for masking back in, if needed else 0 + t1 == the low bits of the first source word + t6 == bytemask that is -1 in dest word bytes */ + + ldq_u t2, 8(a1) # e0 : + addq a1, 8, a1 # .. e1 : + + extql t1, a1, t1 # e0 : + extqh t2, a1, t4 # e0 : + mskql t0, a0, t0 # e0 : + or t1, t4, t1 # .. e1 : + mskqh t1, a0, t1 # e0 : + or t0, t1, t1 # e1 : + + or t1, t6, t6 # e0 : + cmpbge zero, t6, t8 # .. e1 : + lda t6, -1 # e0 : for masking just below + bne t8, $u_final # .. e1 : + + mskql t6, a1, t6 # e0 : mask out the bits we have + or t6, t2, t2 # e1 : already extracted before + cmpbge zero, t2, t8 # e0 : testing eos + bne t8, $u_late_head_exit # .. e1 (zdb) + + /* Finally, we've got all the stupid leading edge cases taken care + of and we can set up to enter the main loop. */ + + stq_u t1, 0(a0) # e0 : store first output word + addq a0, 8, a0 # .. e1 : + extql t2, a1, t0 # e0 : position ho-bits of lo word + ldq_u t2, 8(a1) # .. e1 : read next high-order source word + addq a1, 8, a1 # e0 : + cmpbge zero, t2, t8 # .. e1 : + nop # e0 : + bne t8, $u_eos # .. e1 : + + /* Unaligned copy main loop. In order to avoid reading too much, + the loop is structured to detect zeros in aligned source words. + This has, unfortunately, effectively pulled half of a loop + iteration out into the head and half into the tail, but it does + prevent nastiness from accumulating in the very thing we want + to run as fast as possible. + + On entry to this basic block: + t0 == the shifted high-order bits from the previous source word + t2 == the unshifted current source word + + We further know that t2 does not contain a null terminator. */ + + .align 3 +$u_loop: + extqh t2, a1, t1 # e0 : extract high bits for current word + addq a1, 8, a1 # .. e1 : + extql t2, a1, t3 # e0 : extract low bits for next time + addq a0, 8, a0 # .. e1 : + or t0, t1, t1 # e0 : current dst word now complete + ldq_u t2, 0(a1) # .. e1 : load high word for next time + stq_u t1, -8(a0) # e0 : save the current word + mov t3, t0 # .. e1 : + cmpbge zero, t2, t8 # e0 : test new word for eos + beq t8, $u_loop # .. e1 : + + /* We've found a zero somewhere in the source word we just read. + If it resides in the lower half, we have one (probably partial) + word to write out, and if it resides in the upper half, we + have one full and one partial word left to write out. + + On entry to this basic block: + t0 == the shifted high-order bits from the previous source word + t2 == the unshifted current source word. */ +$u_eos: + extqh t2, a1, t1 # e0 : + or t0, t1, t1 # e1 : first (partial) source word complete + + cmpbge zero, t1, t8 # e0 : is the null in this first bit? + bne t8, $u_final # .. e1 (zdb) + +$u_late_head_exit: + stq_u t1, 0(a0) # e0 : the null was in the high-order bits + addq a0, 8, a0 # .. e1 : + extql t2, a1, t1 # e0 : + cmpbge zero, t1, t8 # .. e1 : + + /* Take care of a final (probably partial) result word. + On entry to this basic block: + t1 == assembled source word + t8 == cmpbge mask that found the null. */ +$u_final: + negq t8, t6 # e0 : isolate low bit set + and t6, t8, t12 # e1 : + + and t12, 0x80, t6 # e0 : avoid dest word load if we can + bne t6, 1f # .. e1 (zdb) + + ldq_u t0, 0(a0) # e0 : + subq t12, 1, t6 # .. e1 : + or t6, t12, t8 # e0 : + zapnot t1, t6, t1 # .. e1 : kill source bytes >= null + zap t0, t8, t0 # e0 : kill dest bytes <= null + or t0, t1, t1 # e1 : + +1: stq_u t1, 0(a0) # e0 : + ret (t9) # .. e1 : + + /* Unaligned copy entry point. */ + .align 3 +$unaligned: + + ldq_u t1, 0(a1) # e0 : load first source word + + and a0, 7, t4 # .. e1 : find dest misalignment + and a1, 7, t5 # e0 : find src misalignment + + /* Conditionally load the first destination word and a bytemask + with 0xff indicating that the destination byte is sacrosanct. */ + + mov zero, t0 # .. e1 : + mov zero, t6 # e0 : + beq t4, 1f # .. e1 : + ldq_u t0, 0(a0) # e0 : + lda t6, -1 # .. e1 : + mskql t6, a0, t6 # e0 : +1: + subq a1, t4, a1 # .. e1 : sub dest misalignment from src addr + + /* If source misalignment is larger than dest misalignment, we need + extra startup checks to avoid SEGV. */ + + cmplt t4, t5, t12 # e0 : + beq t12, $u_head # .. e1 (zdb) + + lda t2, -1 # e1 : mask out leading garbage in source + mskqh t2, t5, t2 # e0 : + nop # e0 : + ornot t1, t2, t3 # .. e1 : + cmpbge zero, t3, t8 # e0 : is there a zero? + beq t8, $u_head # .. e1 (zdb) + + /* At this point we've found a zero in the first partial word of + the source. We need to isolate the valid source data and mask + it into the original destination data. (Incidentally, we know + that we'll need at least one byte of that original dest word.) */ + + ldq_u t0, 0(a0) # e0 : + + negq t8, t6 # .. e1 : build bitmask of bytes <= zero + and t6, t8, t12 # e0 : + and a1, 7, t5 # .. e1 : + subq t12, 1, t6 # e0 : + or t6, t12, t8 # e1 : + srl t12, t5, t12 # e0 : adjust final null return value + + zapnot t2, t8, t2 # .. e1 : prepare source word; mirror changes + and t1, t2, t1 # e1 : to source validity mask + extql t2, a1, t2 # .. e0 : + extql t1, a1, t1 # e0 : + + andnot t0, t2, t0 # .. e1 : zero place for source to reside + or t0, t1, t1 # e1 : and put it there + stq_u t1, 0(a0) # .. e0 : + ret (t9) # e1 : + + .end __stxcpy diff --git a/arch/alpha/lib/stxncpy.S b/arch/alpha/lib/stxncpy.S new file mode 100644 index 00000000..3dece252 --- /dev/null +++ b/arch/alpha/lib/stxncpy.S @@ -0,0 +1,345 @@ +/* + * arch/alpha/lib/stxncpy.S + * Contributed by Richard Henderson (rth@tamu.edu) + * + * Copy no more than COUNT bytes of the null-terminated string from + * SRC to DST. + * + * This is an internal routine used by strncpy, stpncpy, and strncat. + * As such, it uses special linkage conventions to make implementation + * of these public functions more efficient. + * + * On input: + * t9 = return address + * a0 = DST + * a1 = SRC + * a2 = COUNT + * + * Furthermore, COUNT may not be zero. + * + * On output: + * t0 = last word written + * t10 = bitmask (with one bit set) indicating the byte position of + * the end of the range specified by COUNT + * t12 = bitmask (with one bit set) indicating the last byte written + * a0 = unaligned address of the last *word* written + * a2 = the number of full words left in COUNT + * + * Furthermore, v0, a3-a5, t11, and $at are untouched. + */ + +#include <asm/regdef.h> + + .set noat + .set noreorder + + .text + +/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that + doesn't like putting the entry point for a procedure somewhere in the + middle of the procedure descriptor. Work around this by putting the + aligned copy in its own procedure descriptor */ + + .ent stxncpy_aligned + .align 3 +stxncpy_aligned: + .frame sp, 0, t9, 0 + .prologue 0 + + /* On entry to this basic block: + t0 == the first destination word for masking back in + t1 == the first source word. */ + + /* Create the 1st output word and detect 0's in the 1st input word. */ + lda t2, -1 # e1 : build a mask against false zero + mskqh t2, a1, t2 # e0 : detection in the src word + mskqh t1, a1, t3 # e0 : + ornot t1, t2, t2 # .. e1 : + mskql t0, a1, t0 # e0 : assemble the first output word + cmpbge zero, t2, t8 # .. e1 : bits set iff null found + or t0, t3, t0 # e0 : + beq a2, $a_eoc # .. e1 : + bne t8, $a_eos # .. e1 : + + /* On entry to this basic block: + t0 == a source word not containing a null. */ + +$a_loop: + stq_u t0, 0(a0) # e0 : + addq a0, 8, a0 # .. e1 : + ldq_u t0, 0(a1) # e0 : + addq a1, 8, a1 # .. e1 : + subq a2, 1, a2 # e0 : + cmpbge zero, t0, t8 # .. e1 (stall) + beq a2, $a_eoc # e1 : + beq t8, $a_loop # e1 : + + /* Take care of the final (partial) word store. At this point + the end-of-count bit is set in t8 iff it applies. + + On entry to this basic block we have: + t0 == the source word containing the null + t8 == the cmpbge mask that found it. */ + +$a_eos: + negq t8, t12 # e0 : find low bit set + and t8, t12, t12 # e1 (stall) + + /* For the sake of the cache, don't read a destination word + if we're not going to need it. */ + and t12, 0x80, t6 # e0 : + bne t6, 1f # .. e1 (zdb) + + /* We're doing a partial word store and so need to combine + our source and original destination words. */ + ldq_u t1, 0(a0) # e0 : + subq t12, 1, t6 # .. e1 : + or t12, t6, t8 # e0 : + unop # + zapnot t0, t8, t0 # e0 : clear src bytes > null + zap t1, t8, t1 # .. e1 : clear dst bytes <= null + or t0, t1, t0 # e1 : + +1: stq_u t0, 0(a0) # e0 : + ret (t9) # e1 : + + /* Add the end-of-count bit to the eos detection bitmask. */ +$a_eoc: + or t10, t8, t8 + br $a_eos + + .end stxncpy_aligned + + .align 3 + .ent __stxncpy + .globl __stxncpy +__stxncpy: + .frame sp, 0, t9, 0 + .prologue 0 + + /* Are source and destination co-aligned? */ + xor a0, a1, t1 # e0 : + and a0, 7, t0 # .. e1 : find dest misalignment + and t1, 7, t1 # e0 : + addq a2, t0, a2 # .. e1 : bias count by dest misalignment + subq a2, 1, a2 # e0 : + and a2, 7, t2 # e1 : + srl a2, 3, a2 # e0 : a2 = loop counter = (count - 1)/8 + addq zero, 1, t10 # .. e1 : + sll t10, t2, t10 # e0 : t10 = bitmask of last count byte + bne t1, $unaligned # .. e1 : + + /* We are co-aligned; take care of a partial first word. */ + + ldq_u t1, 0(a1) # e0 : load first src word + addq a1, 8, a1 # .. e1 : + + beq t0, stxncpy_aligned # avoid loading dest word if not needed + ldq_u t0, 0(a0) # e0 : + br stxncpy_aligned # .. e1 : + + +/* The source and destination are not co-aligned. Align the destination + and cope. We have to be very careful about not reading too much and + causing a SEGV. */ + + .align 3 +$u_head: + /* We know just enough now to be able to assemble the first + full source word. We can still find a zero at the end of it + that prevents us from outputting the whole thing. + + On entry to this basic block: + t0 == the first dest word, unmasked + t1 == the shifted low bits of the first source word + t6 == bytemask that is -1 in dest word bytes */ + + ldq_u t2, 8(a1) # e0 : load second src word + addq a1, 8, a1 # .. e1 : + mskql t0, a0, t0 # e0 : mask trailing garbage in dst + extqh t2, a1, t4 # e0 : + or t1, t4, t1 # e1 : first aligned src word complete + mskqh t1, a0, t1 # e0 : mask leading garbage in src + or t0, t1, t0 # e0 : first output word complete + or t0, t6, t6 # e1 : mask original data for zero test + cmpbge zero, t6, t8 # e0 : + beq a2, $u_eocfin # .. e1 : + lda t6, -1 # e0 : + bne t8, $u_final # .. e1 : + + mskql t6, a1, t6 # e0 : mask out bits already seen + nop # .. e1 : + stq_u t0, 0(a0) # e0 : store first output word + or t6, t2, t2 # .. e1 : + cmpbge zero, t2, t8 # e0 : find nulls in second partial + addq a0, 8, a0 # .. e1 : + subq a2, 1, a2 # e0 : + bne t8, $u_late_head_exit # .. e1 : + + /* Finally, we've got all the stupid leading edge cases taken care + of and we can set up to enter the main loop. */ + + extql t2, a1, t1 # e0 : position hi-bits of lo word + beq a2, $u_eoc # .. e1 : + ldq_u t2, 8(a1) # e0 : read next high-order source word + addq a1, 8, a1 # .. e1 : + extqh t2, a1, t0 # e0 : position lo-bits of hi word (stall) + cmpbge zero, t2, t8 # .. e1 : + nop # e0 : + bne t8, $u_eos # .. e1 : + + /* Unaligned copy main loop. In order to avoid reading too much, + the loop is structured to detect zeros in aligned source words. + This has, unfortunately, effectively pulled half of a loop + iteration out into the head and half into the tail, but it does + prevent nastiness from accumulating in the very thing we want + to run as fast as possible. + + On entry to this basic block: + t0 == the shifted low-order bits from the current source word + t1 == the shifted high-order bits from the previous source word + t2 == the unshifted current source word + + We further know that t2 does not contain a null terminator. */ + + .align 3 +$u_loop: + or t0, t1, t0 # e0 : current dst word now complete + subq a2, 1, a2 # .. e1 : decrement word count + stq_u t0, 0(a0) # e0 : save the current word + addq a0, 8, a0 # .. e1 : + extql t2, a1, t1 # e0 : extract high bits for next time + beq a2, $u_eoc # .. e1 : + ldq_u t2, 8(a1) # e0 : load high word for next time + addq a1, 8, a1 # .. e1 : + nop # e0 : + cmpbge zero, t2, t8 # e1 : test new word for eos (stall) + extqh t2, a1, t0 # e0 : extract low bits for current word + beq t8, $u_loop # .. e1 : + + /* We've found a zero somewhere in the source word we just read. + If it resides in the lower half, we have one (probably partial) + word to write out, and if it resides in the upper half, we + have one full and one partial word left to write out. + + On entry to this basic block: + t0 == the shifted low-order bits from the current source word + t1 == the shifted high-order bits from the previous source word + t2 == the unshifted current source word. */ +$u_eos: + or t0, t1, t0 # e0 : first (partial) source word complete + nop # .. e1 : + cmpbge zero, t0, t8 # e0 : is the null in this first bit? + bne t8, $u_final # .. e1 (zdb) + + stq_u t0, 0(a0) # e0 : the null was in the high-order bits + addq a0, 8, a0 # .. e1 : + subq a2, 1, a2 # e1 : + +$u_late_head_exit: + extql t2, a1, t0 # .. e0 : + cmpbge zero, t0, t8 # e0 : + or t8, t10, t6 # e1 : + cmoveq a2, t6, t8 # e0 : + nop # .. e1 : + + /* Take care of a final (probably partial) result word. + On entry to this basic block: + t0 == assembled source word + t8 == cmpbge mask that found the null. */ +$u_final: + negq t8, t6 # e0 : isolate low bit set + and t6, t8, t12 # e1 : + + and t12, 0x80, t6 # e0 : avoid dest word load if we can + bne t6, 1f # .. e1 (zdb) + + ldq_u t1, 0(a0) # e0 : + subq t12, 1, t6 # .. e1 : + or t6, t12, t8 # e0 : + zapnot t0, t8, t0 # .. e1 : kill source bytes > null + zap t1, t8, t1 # e0 : kill dest bytes <= null + or t0, t1, t0 # e1 : + +1: stq_u t0, 0(a0) # e0 : + ret (t9) # .. e1 : + + /* Got to end-of-count before end of string. + On entry to this basic block: + t1 == the shifted high-order bits from the previous source word */ +$u_eoc: + and a1, 7, t6 # e1 : + sll t10, t6, t6 # e0 : + and t6, 0xff, t6 # e0 : + bne t6, 1f # .. e1 : + + ldq_u t2, 8(a1) # e0 : load final src word + nop # .. e1 : + extqh t2, a1, t0 # e0 : extract low bits for last word + or t1, t0, t1 # e1 : + +1: cmpbge zero, t1, t8 + mov t1, t0 + +$u_eocfin: # end-of-count, final word + or t10, t8, t8 + br $u_final + + /* Unaligned copy entry point. */ + .align 3 +$unaligned: + + ldq_u t1, 0(a1) # e0 : load first source word + + and a0, 7, t4 # .. e1 : find dest misalignment + and a1, 7, t5 # e0 : find src misalignment + + /* Conditionally load the first destination word and a bytemask + with 0xff indicating that the destination byte is sacrosanct. */ + + mov zero, t0 # .. e1 : + mov zero, t6 # e0 : + beq t4, 1f # .. e1 : + ldq_u t0, 0(a0) # e0 : + lda t6, -1 # .. e1 : + mskql t6, a0, t6 # e0 : + subq a1, t4, a1 # .. e1 : sub dest misalignment from src addr + + /* If source misalignment is larger than dest misalignment, we need + extra startup checks to avoid SEGV. */ + +1: cmplt t4, t5, t12 # e1 : + extql t1, a1, t1 # .. e0 : shift src into place + lda t2, -1 # e0 : for creating masks later + beq t12, $u_head # .. e1 : + + extql t2, a1, t2 # e0 : + cmpbge zero, t1, t8 # .. e1 : is there a zero? + andnot t2, t6, t2 # e0 : dest mask for a single word copy + or t8, t10, t5 # .. e1 : test for end-of-count too + cmpbge zero, t2, t3 # e0 : + cmoveq a2, t5, t8 # .. e1 : + andnot t8, t3, t8 # e0 : + beq t8, $u_head # .. e1 (zdb) + + /* At this point we've found a zero in the first partial word of + the source. We need to isolate the valid source data and mask + it into the original destination data. (Incidentally, we know + that we'll need at least one byte of that original dest word.) */ + + ldq_u t0, 0(a0) # e0 : + negq t8, t6 # .. e1 : build bitmask of bytes <= zero + mskqh t1, t4, t1 # e0 : + and t6, t8, t12 # .. e1 : + subq t12, 1, t6 # e0 : + or t6, t12, t8 # e1 : + + zapnot t2, t8, t2 # e0 : prepare source word; mirror changes + zapnot t1, t8, t1 # .. e1 : to source validity mask + + andnot t0, t2, t0 # e0 : zero place for source to reside + or t0, t1, t0 # e1 : and put it there + stq_u t0, 0(a0) # e0 : + ret (t9) # .. e1 : + + .end __stxncpy diff --git a/arch/alpha/lib/udelay.c b/arch/alpha/lib/udelay.c new file mode 100644 index 00000000..69d52aa3 --- /dev/null +++ b/arch/alpha/lib/udelay.c @@ -0,0 +1,54 @@ +/* + * Copyright (C) 1993, 2000 Linus Torvalds + * + * Delay routines, using a pre-computed "loops_per_jiffy" value. + */ + +#include <linux/module.h> +#include <linux/sched.h> /* for udelay's use of smp_processor_id */ +#include <asm/param.h> +#include <asm/smp.h> +#include <linux/delay.h> + +/* + * Use only for very small delays (< 1 msec). + * + * The active part of our cycle counter is only 32-bits wide, and + * we're treating the difference between two marks as signed. On + * a 1GHz box, that's about 2 seconds. + */ + +void +__delay(int loops) +{ + int tmp; + __asm__ __volatile__( + " rpcc %0\n" + " addl %1,%0,%1\n" + "1: rpcc %0\n" + " subl %1,%0,%0\n" + " bgt %0,1b" + : "=&r" (tmp), "=r" (loops) : "1"(loops)); +} + +#ifdef CONFIG_SMP +#define LPJ cpu_data[smp_processor_id()].loops_per_jiffy +#else +#define LPJ loops_per_jiffy +#endif + +void +udelay(unsigned long usecs) +{ + usecs *= (((unsigned long)HZ << 32) / 1000000) * LPJ; + __delay((long)usecs >> 32); +} +EXPORT_SYMBOL(udelay); + +void +ndelay(unsigned long nsecs) +{ + nsecs *= (((unsigned long)HZ << 32) / 1000000000) * LPJ; + __delay((long)nsecs >> 32); +} +EXPORT_SYMBOL(ndelay); diff --git a/arch/alpha/math-emu/Makefile b/arch/alpha/math-emu/Makefile new file mode 100644 index 00000000..7f467199 --- /dev/null +++ b/arch/alpha/math-emu/Makefile @@ -0,0 +1,9 @@ +# +# Makefile for the FPU instruction emulation. +# + +ccflags-y := -w + +obj-$(CONFIG_MATHEMU) += math-emu.o + +math-emu-objs := math.o qrnnd.o diff --git a/arch/alpha/math-emu/math.c b/arch/alpha/math-emu/math.c new file mode 100644 index 00000000..58c2669a --- /dev/null +++ b/arch/alpha/math-emu/math.c @@ -0,0 +1,400 @@ +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> + +#include <asm/uaccess.h> + +#include "sfp-util.h" +#include <math-emu/soft-fp.h> +#include <math-emu/single.h> +#include <math-emu/double.h> + +#define OPC_PAL 0x00 +#define OPC_INTA 0x10 +#define OPC_INTL 0x11 +#define OPC_INTS 0x12 +#define OPC_INTM 0x13 +#define OPC_FLTC 0x14 +#define OPC_FLTV 0x15 +#define OPC_FLTI 0x16 +#define OPC_FLTL 0x17 +#define OPC_MISC 0x18 +#define OPC_JSR 0x1a + +#define FOP_SRC_S 0 +#define FOP_SRC_T 2 +#define FOP_SRC_Q 3 + +#define FOP_FNC_ADDx 0 +#define FOP_FNC_CVTQL 0 +#define FOP_FNC_SUBx 1 +#define FOP_FNC_MULx 2 +#define FOP_FNC_DIVx 3 +#define FOP_FNC_CMPxUN 4 +#define FOP_FNC_CMPxEQ 5 +#define FOP_FNC_CMPxLT 6 +#define FOP_FNC_CMPxLE 7 +#define FOP_FNC_SQRTx 11 +#define FOP_FNC_CVTxS 12 +#define FOP_FNC_CVTxT 14 +#define FOP_FNC_CVTxQ 15 + +#define MISC_TRAPB 0x0000 +#define MISC_EXCB 0x0400 + +extern unsigned long alpha_read_fp_reg (unsigned long reg); +extern void alpha_write_fp_reg (unsigned long reg, unsigned long val); +extern unsigned long alpha_read_fp_reg_s (unsigned long reg); +extern void alpha_write_fp_reg_s (unsigned long reg, unsigned long val); + + +#ifdef MODULE + +MODULE_DESCRIPTION("FP Software completion module"); + +extern long (*alpha_fp_emul_imprecise)(struct pt_regs *, unsigned long); +extern long (*alpha_fp_emul) (unsigned long pc); + +static long (*save_emul_imprecise)(struct pt_regs *, unsigned long); +static long (*save_emul) (unsigned long pc); + +long do_alpha_fp_emul_imprecise(struct pt_regs *, unsigned long); +long do_alpha_fp_emul(unsigned long); + +int init_module(void) +{ + save_emul_imprecise = alpha_fp_emul_imprecise; + save_emul = alpha_fp_emul; + alpha_fp_emul_imprecise = do_alpha_fp_emul_imprecise; + alpha_fp_emul = do_alpha_fp_emul; + return 0; +} + +void cleanup_module(void) +{ + alpha_fp_emul_imprecise = save_emul_imprecise; + alpha_fp_emul = save_emul; +} + +#undef alpha_fp_emul_imprecise +#define alpha_fp_emul_imprecise do_alpha_fp_emul_imprecise +#undef alpha_fp_emul +#define alpha_fp_emul do_alpha_fp_emul + +#endif /* MODULE */ + + +/* + * Emulate the floating point instruction at address PC. Returns -1 if the + * instruction to be emulated is illegal (such as with the opDEC trap), else + * the SI_CODE for a SIGFPE signal, else 0 if everything's ok. + * + * Notice that the kernel does not and cannot use FP regs. This is good + * because it means that instead of saving/restoring all fp regs, we simply + * stick the result of the operation into the appropriate register. + */ +long +alpha_fp_emul (unsigned long pc) +{ + FP_DECL_EX; + FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR); + FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR); + + unsigned long fa, fb, fc, func, mode, src; + unsigned long res, va, vb, vc, swcr, fpcr; + __u32 insn; + long si_code; + + get_user(insn, (__u32 __user *)pc); + fc = (insn >> 0) & 0x1f; /* destination register */ + fb = (insn >> 16) & 0x1f; + fa = (insn >> 21) & 0x1f; + func = (insn >> 5) & 0xf; + src = (insn >> 9) & 0x3; + mode = (insn >> 11) & 0x3; + + fpcr = rdfpcr(); + swcr = swcr_update_status(current_thread_info()->ieee_state, fpcr); + + if (mode == 3) { + /* Dynamic -- get rounding mode from fpcr. */ + mode = (fpcr >> FPCR_DYN_SHIFT) & 3; + } + + switch (src) { + case FOP_SRC_S: + va = alpha_read_fp_reg_s(fa); + vb = alpha_read_fp_reg_s(fb); + + FP_UNPACK_SP(SA, &va); + FP_UNPACK_SP(SB, &vb); + + switch (func) { + case FOP_FNC_SUBx: + FP_SUB_S(SR, SA, SB); + goto pack_s; + + case FOP_FNC_ADDx: + FP_ADD_S(SR, SA, SB); + goto pack_s; + + case FOP_FNC_MULx: + FP_MUL_S(SR, SA, SB); + goto pack_s; + + case FOP_FNC_DIVx: + FP_DIV_S(SR, SA, SB); + goto pack_s; + + case FOP_FNC_SQRTx: + FP_SQRT_S(SR, SB); + goto pack_s; + } + goto bad_insn; + + case FOP_SRC_T: + va = alpha_read_fp_reg(fa); + vb = alpha_read_fp_reg(fb); + + if ((func & ~3) == FOP_FNC_CMPxUN) { + FP_UNPACK_RAW_DP(DA, &va); + FP_UNPACK_RAW_DP(DB, &vb); + if (!DA_e && !_FP_FRAC_ZEROP_1(DA)) { + FP_SET_EXCEPTION(FP_EX_DENORM); + if (FP_DENORM_ZERO) + _FP_FRAC_SET_1(DA, _FP_ZEROFRAC_1); + } + if (!DB_e && !_FP_FRAC_ZEROP_1(DB)) { + FP_SET_EXCEPTION(FP_EX_DENORM); + if (FP_DENORM_ZERO) + _FP_FRAC_SET_1(DB, _FP_ZEROFRAC_1); + } + FP_CMP_D(res, DA, DB, 3); + vc = 0x4000000000000000UL; + /* CMPTEQ, CMPTUN don't trap on QNaN, + while CMPTLT and CMPTLE do */ + if (res == 3 + && ((func & 3) >= 2 + || FP_ISSIGNAN_D(DA) + || FP_ISSIGNAN_D(DB))) { + FP_SET_EXCEPTION(FP_EX_INVALID); + } + switch (func) { + case FOP_FNC_CMPxUN: if (res != 3) vc = 0; break; + case FOP_FNC_CMPxEQ: if (res) vc = 0; break; + case FOP_FNC_CMPxLT: if (res != -1) vc = 0; break; + case FOP_FNC_CMPxLE: if ((long)res > 0) vc = 0; break; + } + goto done_d; + } + + FP_UNPACK_DP(DA, &va); + FP_UNPACK_DP(DB, &vb); + + switch (func) { + case FOP_FNC_SUBx: + FP_SUB_D(DR, DA, DB); + goto pack_d; + + case FOP_FNC_ADDx: + FP_ADD_D(DR, DA, DB); + goto pack_d; + + case FOP_FNC_MULx: + FP_MUL_D(DR, DA, DB); + goto pack_d; + + case FOP_FNC_DIVx: + FP_DIV_D(DR, DA, DB); + goto pack_d; + + case FOP_FNC_SQRTx: + FP_SQRT_D(DR, DB); + goto pack_d; + + case FOP_FNC_CVTxS: + /* It is irritating that DEC encoded CVTST with + SRC == T_floating. It is also interesting that + the bit used to tell the two apart is /U... */ + if (insn & 0x2000) { + FP_CONV(S,D,1,1,SR,DB); + goto pack_s; + } else { + vb = alpha_read_fp_reg_s(fb); + FP_UNPACK_SP(SB, &vb); + DR_c = DB_c; + DR_s = DB_s; + DR_e = DB_e + (1024 - 128); + DR_f = SB_f << (52 - 23); + goto pack_d; + } + + case FOP_FNC_CVTxQ: + if (DB_c == FP_CLS_NAN + && (_FP_FRAC_HIGH_RAW_D(DB) & _FP_QNANBIT_D)) { + /* AAHB Table B-2 says QNaN should not trigger INV */ + vc = 0; + } else + FP_TO_INT_ROUND_D(vc, DB, 64, 2); + goto done_d; + } + goto bad_insn; + + case FOP_SRC_Q: + vb = alpha_read_fp_reg(fb); + + switch (func) { + case FOP_FNC_CVTQL: + /* Notice: We can get here only due to an integer + overflow. Such overflows are reported as invalid + ops. We return the result the hw would have + computed. */ + vc = ((vb & 0xc0000000) << 32 | /* sign and msb */ + (vb & 0x3fffffff) << 29); /* rest of the int */ + FP_SET_EXCEPTION (FP_EX_INVALID); + goto done_d; + + case FOP_FNC_CVTxS: + FP_FROM_INT_S(SR, ((long)vb), 64, long); + goto pack_s; + + case FOP_FNC_CVTxT: + FP_FROM_INT_D(DR, ((long)vb), 64, long); + goto pack_d; + } + goto bad_insn; + } + goto bad_insn; + +pack_s: + FP_PACK_SP(&vc, SR); + if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ)) + vc = 0; + alpha_write_fp_reg_s(fc, vc); + goto done; + +pack_d: + FP_PACK_DP(&vc, DR); + if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ)) + vc = 0; +done_d: + alpha_write_fp_reg(fc, vc); + goto done; + + /* + * Take the appropriate action for each possible + * floating-point result: + * + * - Set the appropriate bits in the FPCR + * - If the specified exception is enabled in the FPCR, + * return. The caller (entArith) will dispatch + * the appropriate signal to the translated program. + * + * In addition, properly track the exception state in software + * as described in the Alpha Architecture Handbook section 4.7.7.3. + */ +done: + if (_fex) { + /* Record exceptions in software control word. */ + swcr |= (_fex << IEEE_STATUS_TO_EXCSUM_SHIFT); + current_thread_info()->ieee_state + |= (_fex << IEEE_STATUS_TO_EXCSUM_SHIFT); + + /* Update hardware control register. */ + fpcr &= (~FPCR_MASK | FPCR_DYN_MASK); + fpcr |= ieee_swcr_to_fpcr(swcr); + wrfpcr(fpcr); + + /* Do we generate a signal? */ + _fex = _fex & swcr & IEEE_TRAP_ENABLE_MASK; + si_code = 0; + if (_fex) { + if (_fex & IEEE_TRAP_ENABLE_DNO) si_code = FPE_FLTUND; + if (_fex & IEEE_TRAP_ENABLE_INE) si_code = FPE_FLTRES; + if (_fex & IEEE_TRAP_ENABLE_UNF) si_code = FPE_FLTUND; + if (_fex & IEEE_TRAP_ENABLE_OVF) si_code = FPE_FLTOVF; + if (_fex & IEEE_TRAP_ENABLE_DZE) si_code = FPE_FLTDIV; + if (_fex & IEEE_TRAP_ENABLE_INV) si_code = FPE_FLTINV; + } + + return si_code; + } + + /* We used to write the destination register here, but DEC FORTRAN + requires that the result *always* be written... so we do the write + immediately after the operations above. */ + + return 0; + +bad_insn: + printk(KERN_ERR "alpha_fp_emul: Invalid FP insn %#x at %#lx\n", + insn, pc); + return -1; +} + +long +alpha_fp_emul_imprecise (struct pt_regs *regs, unsigned long write_mask) +{ + unsigned long trigger_pc = regs->pc - 4; + unsigned long insn, opcode, rc, si_code = 0; + + /* + * Turn off the bits corresponding to registers that are the + * target of instructions that set bits in the exception + * summary register. We have some slack doing this because a + * register that is the target of a trapping instruction can + * be written at most once in the trap shadow. + * + * Branches, jumps, TRAPBs, EXCBs and calls to PALcode all + * bound the trap shadow, so we need not look any further than + * up to the first occurrence of such an instruction. + */ + while (write_mask) { + get_user(insn, (__u32 __user *)(trigger_pc)); + opcode = insn >> 26; + rc = insn & 0x1f; + + switch (opcode) { + case OPC_PAL: + case OPC_JSR: + case 0x30 ... 0x3f: /* branches */ + goto egress; + + case OPC_MISC: + switch (insn & 0xffff) { + case MISC_TRAPB: + case MISC_EXCB: + goto egress; + + default: + break; + } + break; + + case OPC_INTA: + case OPC_INTL: + case OPC_INTS: + case OPC_INTM: + write_mask &= ~(1UL << rc); + break; + + case OPC_FLTC: + case OPC_FLTV: + case OPC_FLTI: + case OPC_FLTL: + write_mask &= ~(1UL << (rc + 32)); + break; + } + if (!write_mask) { + /* Re-execute insns in the trap-shadow. */ + regs->pc = trigger_pc + 4; + si_code = alpha_fp_emul(trigger_pc); + goto egress; + } + trigger_pc -= 4; + } + +egress: + return si_code; +} diff --git a/arch/alpha/math-emu/qrnnd.S b/arch/alpha/math-emu/qrnnd.S new file mode 100644 index 00000000..d6373ec1 --- /dev/null +++ b/arch/alpha/math-emu/qrnnd.S @@ -0,0 +1,163 @@ + # Alpha 21064 __udiv_qrnnd + # Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc. + + # This file is part of GCC. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # In addition to the permissions in the GNU General Public License, the + # Free Software Foundation gives you unlimited permission to link the + # compiled version of this file with other programs, and to distribute + # those programs without any restriction coming from the use of this + # file. (The General Public License restrictions do apply in other + # respects; for example, they cover modification of the file, and + # distribution when not linked into another program.) + + # This file is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU General Public License + # along with GCC; see the file COPYING. If not, write to the + # Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + .set noreorder + .set noat + + .text + + .globl __udiv_qrnnd + .ent __udiv_qrnnd +__udiv_qrnnd: + .frame $30,0,$26,0 + .prologue 0 + +#define cnt $2 +#define tmp $3 +#define rem_ptr $16 +#define n1 $17 +#define n0 $18 +#define d $19 +#define qb $20 +#define AT $at + + ldiq cnt,16 + blt d,$largedivisor + +$loop1: cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + subq cnt,1,cnt + bgt cnt,$loop1 + stq n1,0(rem_ptr) + bis $31,n0,$0 + ret $31,($26),1 + +$largedivisor: + and n0,1,$4 + + srl n0,1,n0 + sll n1,63,tmp + or tmp,n0,n0 + srl n1,1,n1 + + and d,1,$6 + srl d,1,$5 + addq $5,$6,$5 + +$loop2: cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + subq cnt,1,cnt + bgt cnt,$loop2 + + addq n1,n1,n1 + addq $4,n1,n1 + bne $6,$Odd + stq n1,0(rem_ptr) + bis $31,n0,$0 + ret $31,($26),1 + +$Odd: + /* q' in n0. r' in n1 */ + addq n1,n0,n1 + + cmpult n1,n0,tmp # tmp := carry from addq + subq n1,d,AT + addq n0,tmp,n0 + cmovne tmp,AT,n1 + + cmpult n1,d,tmp + addq n0,1,AT + cmoveq tmp,AT,n0 + subq n1,d,AT + cmoveq tmp,AT,n1 + + stq n1,0(rem_ptr) + bis $31,n0,$0 + ret $31,($26),1 + + .end __udiv_qrnnd diff --git a/arch/alpha/math-emu/sfp-util.h b/arch/alpha/math-emu/sfp-util.h new file mode 100644 index 00000000..f53707f7 --- /dev/null +++ b/arch/alpha/math-emu/sfp-util.h @@ -0,0 +1,35 @@ +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/types.h> +#include <asm/byteorder.h> +#include <asm/fpu.h> + +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + ((sl) = (al) + (bl), (sh) = (ah) + (bh) + ((sl) < (al))) + +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + ((sl) = (al) - (bl), (sh) = (ah) - (bh) - ((al) < (bl))) + +#define umul_ppmm(wh, wl, u, v) \ + __asm__ ("mulq %2,%3,%1; umulh %2,%3,%0" \ + : "=r" ((UDItype)(wh)), \ + "=&r" ((UDItype)(wl)) \ + : "r" ((UDItype)(u)), \ + "r" ((UDItype)(v))) + +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { unsigned long __r; \ + (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \ + (r) = __r; \ + } while (0) +extern unsigned long __udiv_qrnnd (unsigned long *, unsigned long, + unsigned long , unsigned long); + +#define UDIV_NEEDS_NORMALIZATION 1 + +#define abort() goto bad_insn + +#ifndef __LITTLE_ENDIAN +#define __LITTLE_ENDIAN -1 +#endif +#define __BYTE_ORDER __LITTLE_ENDIAN diff --git a/arch/alpha/mm/Makefile b/arch/alpha/mm/Makefile new file mode 100644 index 00000000..c993d3f9 --- /dev/null +++ b/arch/alpha/mm/Makefile @@ -0,0 +1,9 @@ +# +# Makefile for the linux alpha-specific parts of the memory manager. +# + +ccflags-y := -Werror + +obj-y := init.o fault.o extable.o + +obj-$(CONFIG_DISCONTIGMEM) += numa.o diff --git a/arch/alpha/mm/extable.c b/arch/alpha/mm/extable.c new file mode 100644 index 00000000..813c9b63 --- /dev/null +++ b/arch/alpha/mm/extable.c @@ -0,0 +1,92 @@ +/* + * linux/arch/alpha/mm/extable.c + */ + +#include <linux/module.h> +#include <linux/sort.h> +#include <asm/uaccess.h> + +static inline unsigned long ex_to_addr(const struct exception_table_entry *x) +{ + return (unsigned long)&x->insn + x->insn; +} + +static void swap_ex(void *a, void *b, int size) +{ + struct exception_table_entry *ex_a = a, *ex_b = b; + unsigned long addr_a = ex_to_addr(ex_a), addr_b = ex_to_addr(ex_b); + unsigned int t = ex_a->fixup.unit; + + ex_a->fixup.unit = ex_b->fixup.unit; + ex_b->fixup.unit = t; + ex_a->insn = (int)(addr_b - (unsigned long)&ex_a->insn); + ex_b->insn = (int)(addr_a - (unsigned long)&ex_b->insn); +} + +/* + * The exception table needs to be sorted so that the binary + * search that we use to find entries in it works properly. + * This is used both for the kernel exception table and for + * the exception tables of modules that get loaded. + */ +static int cmp_ex(const void *a, const void *b) +{ + const struct exception_table_entry *x = a, *y = b; + + /* avoid overflow */ + if (ex_to_addr(x) > ex_to_addr(y)) + return 1; + if (ex_to_addr(x) < ex_to_addr(y)) + return -1; + return 0; +} + +void sort_extable(struct exception_table_entry *start, + struct exception_table_entry *finish) +{ + sort(start, finish - start, sizeof(struct exception_table_entry), + cmp_ex, swap_ex); +} + +#ifdef CONFIG_MODULES +/* + * Any entry referring to the module init will be at the beginning or + * the end. + */ +void trim_init_extable(struct module *m) +{ + /*trim the beginning*/ + while (m->num_exentries && + within_module_init(ex_to_addr(&m->extable[0]), m)) { + m->extable++; + m->num_exentries--; + } + /*trim the end*/ + while (m->num_exentries && + within_module_init(ex_to_addr(&m->extable[m->num_exentries-1]), + m)) + m->num_exentries--; +} +#endif /* CONFIG_MODULES */ + +const struct exception_table_entry * +search_extable(const struct exception_table_entry *first, + const struct exception_table_entry *last, + unsigned long value) +{ + while (first <= last) { + const struct exception_table_entry *mid; + unsigned long mid_value; + + mid = (last - first) / 2 + first; + mid_value = ex_to_addr(mid); + if (mid_value == value) + return mid; + else if (mid_value < value) + first = mid+1; + else + last = mid-1; + } + + return NULL; +} diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c new file mode 100644 index 00000000..5eecab1a --- /dev/null +++ b/arch/alpha/mm/fault.c @@ -0,0 +1,233 @@ +/* + * linux/arch/alpha/mm/fault.c + * + * Copyright (C) 1995 Linus Torvalds + */ + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <asm/io.h> + +#define __EXTERN_INLINE inline +#include <asm/mmu_context.h> +#include <asm/tlbflush.h> +#undef __EXTERN_INLINE + +#include <linux/signal.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/mman.h> +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/module.h> + +#include <asm/uaccess.h> + +extern void die_if_kernel(char *,struct pt_regs *,long, unsigned long *); + + +/* + * Force a new ASN for a task. + */ + +#ifndef CONFIG_SMP +unsigned long last_asn = ASN_FIRST_VERSION; +#endif + +void +__load_new_mm_context(struct mm_struct *next_mm) +{ + unsigned long mmc; + struct pcb_struct *pcb; + + mmc = __get_new_mm_context(next_mm, smp_processor_id()); + next_mm->context[smp_processor_id()] = mmc; + + pcb = ¤t_thread_info()->pcb; + pcb->asn = mmc & HARDWARE_ASN_MASK; + pcb->ptbr = ((unsigned long) next_mm->pgd - IDENT_ADDR) >> PAGE_SHIFT; + + __reload_thread(pcb); +} + + +/* + * This routine handles page faults. It determines the address, + * and the problem, and then passes it off to handle_mm_fault(). + * + * mmcsr: + * 0 = translation not valid + * 1 = access violation + * 2 = fault-on-read + * 3 = fault-on-execute + * 4 = fault-on-write + * + * cause: + * -1 = instruction fetch + * 0 = load + * 1 = store + * + * Registers $9 through $15 are saved in a block just prior to `regs' and + * are saved and restored around the call to allow exception code to + * modify them. + */ + +/* Macro for exception fixup code to access integer registers. */ +#define dpf_reg(r) \ + (((unsigned long *)regs)[(r) <= 8 ? (r) : (r) <= 15 ? (r)-16 : \ + (r) <= 18 ? (r)+8 : (r)-10]) + +asmlinkage void +do_page_fault(unsigned long address, unsigned long mmcsr, + long cause, struct pt_regs *regs) +{ + struct vm_area_struct * vma; + struct mm_struct *mm = current->mm; + const struct exception_table_entry *fixup; + int fault, si_code = SEGV_MAPERR; + siginfo_t info; + + /* As of EV6, a load into $31/$f31 is a prefetch, and never faults + (or is suppressed by the PALcode). Support that for older CPUs + by ignoring such an instruction. */ + if (cause == 0) { + unsigned int insn; + __get_user(insn, (unsigned int __user *)regs->pc); + if ((insn >> 21 & 0x1f) == 0x1f && + /* ldq ldl ldt lds ldg ldf ldwu ldbu */ + (1ul << (insn >> 26) & 0x30f00001400ul)) { + regs->pc += 4; + return; + } + } + + /* If we're in an interrupt context, or have no user context, + we must not take the fault. */ + if (!mm || in_atomic()) + goto no_context; + +#ifdef CONFIG_ALPHA_LARGE_VMALLOC + if (address >= TASK_SIZE) + goto vmalloc_fault; +#endif + + down_read(&mm->mmap_sem); + vma = find_vma(mm, address); + if (!vma) + goto bad_area; + if (vma->vm_start <= address) + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + if (expand_stack(vma, address)) + goto bad_area; + + /* Ok, we have a good vm_area for this memory access, so + we can handle it. */ + good_area: + si_code = SEGV_ACCERR; + if (cause < 0) { + if (!(vma->vm_flags & VM_EXEC)) + goto bad_area; + } else if (!cause) { + /* Allow reads even for write-only mappings */ + if (!(vma->vm_flags & (VM_READ | VM_WRITE))) + goto bad_area; + } else { + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + } + + /* If for any reason at all we couldn't handle the fault, + make sure we exit gracefully rather than endlessly redo + the fault. */ + fault = handle_mm_fault(mm, vma, address, cause > 0 ? FAULT_FLAG_WRITE : 0); + up_read(&mm->mmap_sem); + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGBUS) + goto do_sigbus; + BUG(); + } + if (fault & VM_FAULT_MAJOR) + current->maj_flt++; + else + current->min_flt++; + return; + + /* Something tried to access memory that isn't in our memory map. + Fix it, but check if it's kernel or user first. */ + bad_area: + up_read(&mm->mmap_sem); + + if (user_mode(regs)) + goto do_sigsegv; + + no_context: + /* Are we prepared to handle this fault as an exception? */ + if ((fixup = search_exception_tables(regs->pc)) != 0) { + unsigned long newpc; + newpc = fixup_exception(dpf_reg, fixup, regs->pc); + regs->pc = newpc; + return; + } + + /* Oops. The kernel tried to access some bad page. We'll have to + terminate things with extreme prejudice. */ + printk(KERN_ALERT "Unable to handle kernel paging request at " + "virtual address %016lx\n", address); + die_if_kernel("Oops", regs, cause, (unsigned long*)regs - 16); + do_exit(SIGKILL); + + /* We ran out of memory, or some other thing happened to us that + made us unable to handle the page fault gracefully. */ + out_of_memory: + if (!user_mode(regs)) + goto no_context; + pagefault_out_of_memory(); + return; + + do_sigbus: + /* Send a sigbus, regardless of whether we were in kernel + or user mode. */ + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRERR; + info.si_addr = (void __user *) address; + force_sig_info(SIGBUS, &info, current); + if (!user_mode(regs)) + goto no_context; + return; + + do_sigsegv: + info.si_signo = SIGSEGV; + info.si_errno = 0; + info.si_code = si_code; + info.si_addr = (void __user *) address; + force_sig_info(SIGSEGV, &info, current); + return; + +#ifdef CONFIG_ALPHA_LARGE_VMALLOC + vmalloc_fault: + if (user_mode(regs)) + goto do_sigsegv; + else { + /* Synchronize this task's top level page-table + with the "reference" page table from init. */ + long index = pgd_index(address); + pgd_t *pgd, *pgd_k; + + pgd = current->active_mm->pgd + index; + pgd_k = swapper_pg_dir + index; + if (!pgd_present(*pgd) && pgd_present(*pgd_k)) { + pgd_val(*pgd) = pgd_val(*pgd_k); + return; + } + goto no_context; + } +#endif +} diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c new file mode 100644 index 00000000..1ad6ca74 --- /dev/null +++ b/arch/alpha/mm/init.c @@ -0,0 +1,349 @@ +/* + * linux/arch/alpha/mm/init.c + * + * Copyright (C) 1995 Linus Torvalds + */ + +/* 2.3.x zone allocator, 1999 Andrea Arcangeli <andrea@suse.de> */ + +#include <linux/pagemap.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/init.h> +#include <linux/bootmem.h> /* max_low_pfn */ +#include <linux/vmalloc.h> +#include <linux/gfp.h> + +#include <asm/uaccess.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/hwrpb.h> +#include <asm/dma.h> +#include <asm/mmu_context.h> +#include <asm/console.h> +#include <asm/tlb.h> +#include <asm/setup.h> + +extern void die_if_kernel(char *,struct pt_regs *,long); + +static struct pcb_struct original_pcb; + +pgd_t * +pgd_alloc(struct mm_struct *mm) +{ + pgd_t *ret, *init; + + ret = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + init = pgd_offset(&init_mm, 0UL); + if (ret) { +#ifdef CONFIG_ALPHA_LARGE_VMALLOC + memcpy (ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD - 1)*sizeof(pgd_t)); +#else + pgd_val(ret[PTRS_PER_PGD-2]) = pgd_val(init[PTRS_PER_PGD-2]); +#endif + + /* The last PGD entry is the VPTB self-map. */ + pgd_val(ret[PTRS_PER_PGD-1]) + = pte_val(mk_pte(virt_to_page(ret), PAGE_KERNEL)); + } + return ret; +} + + +/* + * BAD_PAGE is the page that is used for page faults when linux + * is out-of-memory. Older versions of linux just did a + * do_exit(), but using this instead means there is less risk + * for a process dying in kernel mode, possibly leaving an inode + * unused etc.. + * + * BAD_PAGETABLE is the accompanying page-table: it is initialized + * to point to BAD_PAGE entries. + * + * ZERO_PAGE is a special page that is used for zero-initialized + * data and COW. + */ +pmd_t * +__bad_pagetable(void) +{ + memset((void *) EMPTY_PGT, 0, PAGE_SIZE); + return (pmd_t *) EMPTY_PGT; +} + +pte_t +__bad_page(void) +{ + memset((void *) EMPTY_PGE, 0, PAGE_SIZE); + return pte_mkdirty(mk_pte(virt_to_page(EMPTY_PGE), PAGE_SHARED)); +} + +static inline unsigned long +load_PCB(struct pcb_struct *pcb) +{ + register unsigned long sp __asm__("$30"); + pcb->ksp = sp; + return __reload_thread(pcb); +} + +/* Set up initial PCB, VPTB, and other such nicities. */ + +static inline void +switch_to_system_map(void) +{ + unsigned long newptbr; + unsigned long original_pcb_ptr; + + /* Initialize the kernel's page tables. Linux puts the vptb in + the last slot of the L1 page table. */ + memset(swapper_pg_dir, 0, PAGE_SIZE); + newptbr = ((unsigned long) swapper_pg_dir - PAGE_OFFSET) >> PAGE_SHIFT; + pgd_val(swapper_pg_dir[1023]) = + (newptbr << 32) | pgprot_val(PAGE_KERNEL); + + /* Set the vptb. This is often done by the bootloader, but + shouldn't be required. */ + if (hwrpb->vptb != 0xfffffffe00000000UL) { + wrvptptr(0xfffffffe00000000UL); + hwrpb->vptb = 0xfffffffe00000000UL; + hwrpb_update_checksum(hwrpb); + } + + /* Also set up the real kernel PCB while we're at it. */ + init_thread_info.pcb.ptbr = newptbr; + init_thread_info.pcb.flags = 1; /* set FEN, clear everything else */ + original_pcb_ptr = load_PCB(&init_thread_info.pcb); + tbia(); + + /* Save off the contents of the original PCB so that we can + restore the original console's page tables for a clean reboot. + + Note that the PCB is supposed to be a physical address, but + since KSEG values also happen to work, folks get confused. + Check this here. */ + + if (original_pcb_ptr < PAGE_OFFSET) { + original_pcb_ptr = (unsigned long) + phys_to_virt(original_pcb_ptr); + } + original_pcb = *(struct pcb_struct *) original_pcb_ptr; +} + +int callback_init_done; + +void * __init +callback_init(void * kernel_end) +{ + struct crb_struct * crb; + pgd_t *pgd; + pmd_t *pmd; + void *two_pages; + + /* Starting at the HWRPB, locate the CRB. */ + crb = (struct crb_struct *)((char *)hwrpb + hwrpb->crb_offset); + + if (alpha_using_srm) { + /* Tell the console whither it is to be remapped. */ + if (srm_fixup(VMALLOC_START, (unsigned long)hwrpb)) + __halt(); /* "We're boned." --Bender */ + + /* Edit the procedure descriptors for DISPATCH and FIXUP. */ + crb->dispatch_va = (struct procdesc_struct *) + (VMALLOC_START + (unsigned long)crb->dispatch_va + - crb->map[0].va); + crb->fixup_va = (struct procdesc_struct *) + (VMALLOC_START + (unsigned long)crb->fixup_va + - crb->map[0].va); + } + + switch_to_system_map(); + + /* Allocate one PGD and one PMD. In the case of SRM, we'll need + these to actually remap the console. There is an assumption + here that only one of each is needed, and this allows for 8MB. + On systems with larger consoles, additional pages will be + allocated as needed during the mapping process. + + In the case of not SRM, but not CONFIG_ALPHA_LARGE_VMALLOC, + we need to allocate the PGD we use for vmalloc before we start + forking other tasks. */ + + two_pages = (void *) + (((unsigned long)kernel_end + ~PAGE_MASK) & PAGE_MASK); + kernel_end = two_pages + 2*PAGE_SIZE; + memset(two_pages, 0, 2*PAGE_SIZE); + + pgd = pgd_offset_k(VMALLOC_START); + pgd_set(pgd, (pmd_t *)two_pages); + pmd = pmd_offset(pgd, VMALLOC_START); + pmd_set(pmd, (pte_t *)(two_pages + PAGE_SIZE)); + + if (alpha_using_srm) { + static struct vm_struct console_remap_vm; + unsigned long nr_pages = 0; + unsigned long vaddr; + unsigned long i, j; + + /* calculate needed size */ + for (i = 0; i < crb->map_entries; ++i) + nr_pages += crb->map[i].count; + + /* register the vm area */ + console_remap_vm.flags = VM_ALLOC; + console_remap_vm.size = nr_pages << PAGE_SHIFT; + vm_area_register_early(&console_remap_vm, PAGE_SIZE); + + vaddr = (unsigned long)console_remap_vm.addr; + + /* Set up the third level PTEs and update the virtual + addresses of the CRB entries. */ + for (i = 0; i < crb->map_entries; ++i) { + unsigned long pfn = crb->map[i].pa >> PAGE_SHIFT; + crb->map[i].va = vaddr; + for (j = 0; j < crb->map[i].count; ++j) { + /* Newer consoles (especially on larger + systems) may require more pages of + PTEs. Grab additional pages as needed. */ + if (pmd != pmd_offset(pgd, vaddr)) { + memset(kernel_end, 0, PAGE_SIZE); + pmd = pmd_offset(pgd, vaddr); + pmd_set(pmd, (pte_t *)kernel_end); + kernel_end += PAGE_SIZE; + } + set_pte(pte_offset_kernel(pmd, vaddr), + pfn_pte(pfn, PAGE_KERNEL)); + pfn++; + vaddr += PAGE_SIZE; + } + } + } + + callback_init_done = 1; + return kernel_end; +} + + +#ifndef CONFIG_DISCONTIGMEM +/* + * paging_init() sets up the memory map. + */ +void __init paging_init(void) +{ + unsigned long zones_size[MAX_NR_ZONES] = {0, }; + unsigned long dma_pfn, high_pfn; + + dma_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; + high_pfn = max_pfn = max_low_pfn; + + if (dma_pfn >= high_pfn) + zones_size[ZONE_DMA] = high_pfn; + else { + zones_size[ZONE_DMA] = dma_pfn; + zones_size[ZONE_NORMAL] = high_pfn - dma_pfn; + } + + /* Initialize mem_map[]. */ + free_area_init(zones_size); + + /* Initialize the kernel's ZERO_PGE. */ + memset((void *)ZERO_PGE, 0, PAGE_SIZE); +} +#endif /* CONFIG_DISCONTIGMEM */ + +#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_SRM) +void +srm_paging_stop (void) +{ + /* Move the vptb back to where the SRM console expects it. */ + swapper_pg_dir[1] = swapper_pg_dir[1023]; + tbia(); + wrvptptr(0x200000000UL); + hwrpb->vptb = 0x200000000UL; + hwrpb_update_checksum(hwrpb); + + /* Reload the page tables that the console had in use. */ + load_PCB(&original_pcb); + tbia(); +} +#endif + +#ifndef CONFIG_DISCONTIGMEM +static void __init +printk_memory_info(void) +{ + unsigned long codesize, reservedpages, datasize, initsize, tmp; + extern int page_is_ram(unsigned long) __init; + extern char _text, _etext, _data, _edata; + extern char __init_begin, __init_end; + + /* printk all informations */ + reservedpages = 0; + for (tmp = 0; tmp < max_low_pfn; tmp++) + /* + * Only count reserved RAM pages + */ + if (page_is_ram(tmp) && PageReserved(mem_map+tmp)) + reservedpages++; + + codesize = (unsigned long) &_etext - (unsigned long) &_text; + datasize = (unsigned long) &_edata - (unsigned long) &_data; + initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; + + printk("Memory: %luk/%luk available (%luk kernel code, %luk reserved, %luk data, %luk init)\n", + nr_free_pages() << (PAGE_SHIFT-10), + max_mapnr << (PAGE_SHIFT-10), + codesize >> 10, + reservedpages << (PAGE_SHIFT-10), + datasize >> 10, + initsize >> 10); +} + +void __init +mem_init(void) +{ + max_mapnr = num_physpages = max_low_pfn; + totalram_pages += free_all_bootmem(); + high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); + + printk_memory_info(); +} +#endif /* CONFIG_DISCONTIGMEM */ + +void +free_reserved_mem(void *start, void *end) +{ + void *__start = start; + for (; __start < end; __start += PAGE_SIZE) { + ClearPageReserved(virt_to_page(__start)); + init_page_count(virt_to_page(__start)); + free_page((long)__start); + totalram_pages++; + } +} + +void +free_initmem(void) +{ + extern char __init_begin, __init_end; + + free_reserved_mem(&__init_begin, &__init_end); + printk ("Freeing unused kernel memory: %ldk freed\n", + (&__init_end - &__init_begin) >> 10); +} + +#ifdef CONFIG_BLK_DEV_INITRD +void +free_initrd_mem(unsigned long start, unsigned long end) +{ + free_reserved_mem((void *)start, (void *)end); + printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); +} +#endif diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c new file mode 100644 index 00000000..3973ae39 --- /dev/null +++ b/arch/alpha/mm/numa.c @@ -0,0 +1,362 @@ +/* + * linux/arch/alpha/mm/numa.c + * + * DISCONTIGMEM NUMA alpha support. + * + * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE + */ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/bootmem.h> +#include <linux/swap.h> +#include <linux/initrd.h> +#include <linux/pfn.h> +#include <linux/module.h> + +#include <asm/hwrpb.h> +#include <asm/pgalloc.h> + +pg_data_t node_data[MAX_NUMNODES]; +EXPORT_SYMBOL(node_data); + +#undef DEBUG_DISCONTIG +#ifdef DEBUG_DISCONTIG +#define DBGDCONT(args...) printk(args) +#else +#define DBGDCONT(args...) +#endif + +#define for_each_mem_cluster(memdesc, _cluster, i) \ + for ((_cluster) = (memdesc)->cluster, (i) = 0; \ + (i) < (memdesc)->numclusters; (i)++, (_cluster)++) + +static void __init show_mem_layout(void) +{ + struct memclust_struct * cluster; + struct memdesc_struct * memdesc; + int i; + + /* Find free clusters, and init and free the bootmem accordingly. */ + memdesc = (struct memdesc_struct *) + (hwrpb->mddt_offset + (unsigned long) hwrpb); + + printk("Raw memory layout:\n"); + for_each_mem_cluster(memdesc, cluster, i) { + printk(" memcluster %2d, usage %1lx, start %8lu, end %8lu\n", + i, cluster->usage, cluster->start_pfn, + cluster->start_pfn + cluster->numpages); + } +} + +static void __init +setup_memory_node(int nid, void *kernel_end) +{ + extern unsigned long mem_size_limit; + struct memclust_struct * cluster; + struct memdesc_struct * memdesc; + unsigned long start_kernel_pfn, end_kernel_pfn; + unsigned long bootmap_size, bootmap_pages, bootmap_start; + unsigned long start, end; + unsigned long node_pfn_start, node_pfn_end; + unsigned long node_min_pfn, node_max_pfn; + int i; + unsigned long node_datasz = PFN_UP(sizeof(pg_data_t)); + int show_init = 0; + + /* Find the bounds of current node */ + node_pfn_start = (node_mem_start(nid)) >> PAGE_SHIFT; + node_pfn_end = node_pfn_start + (node_mem_size(nid) >> PAGE_SHIFT); + + /* Find free clusters, and init and free the bootmem accordingly. */ + memdesc = (struct memdesc_struct *) + (hwrpb->mddt_offset + (unsigned long) hwrpb); + + /* find the bounds of this node (node_min_pfn/node_max_pfn) */ + node_min_pfn = ~0UL; + node_max_pfn = 0UL; + for_each_mem_cluster(memdesc, cluster, i) { + /* Bit 0 is console/PALcode reserved. Bit 1 is + non-volatile memory -- we might want to mark + this for later. */ + if (cluster->usage & 3) + continue; + + start = cluster->start_pfn; + end = start + cluster->numpages; + + if (start >= node_pfn_end || end <= node_pfn_start) + continue; + + if (!show_init) { + show_init = 1; + printk("Initializing bootmem allocator on Node ID %d\n", nid); + } + printk(" memcluster %2d, usage %1lx, start %8lu, end %8lu\n", + i, cluster->usage, cluster->start_pfn, + cluster->start_pfn + cluster->numpages); + + if (start < node_pfn_start) + start = node_pfn_start; + if (end > node_pfn_end) + end = node_pfn_end; + + if (start < node_min_pfn) + node_min_pfn = start; + if (end > node_max_pfn) + node_max_pfn = end; + } + + if (mem_size_limit && node_max_pfn > mem_size_limit) { + static int msg_shown = 0; + if (!msg_shown) { + msg_shown = 1; + printk("setup: forcing memory size to %ldK (from %ldK).\n", + mem_size_limit << (PAGE_SHIFT - 10), + node_max_pfn << (PAGE_SHIFT - 10)); + } + node_max_pfn = mem_size_limit; + } + + if (node_min_pfn >= node_max_pfn) + return; + + /* Update global {min,max}_low_pfn from node information. */ + if (node_min_pfn < min_low_pfn) + min_low_pfn = node_min_pfn; + if (node_max_pfn > max_low_pfn) + max_pfn = max_low_pfn = node_max_pfn; + + num_physpages += node_max_pfn - node_min_pfn; + +#if 0 /* we'll try this one again in a little while */ + /* Cute trick to make sure our local node data is on local memory */ + node_data[nid] = (pg_data_t *)(__va(node_min_pfn << PAGE_SHIFT)); +#endif + /* Quasi-mark the pg_data_t as in-use */ + node_min_pfn += node_datasz; + if (node_min_pfn >= node_max_pfn) { + printk(" not enough mem to reserve NODE_DATA"); + return; + } + NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; + + printk(" Detected node memory: start %8lu, end %8lu\n", + node_min_pfn, node_max_pfn); + + DBGDCONT(" DISCONTIG: node_data[%d] is at 0x%p\n", nid, NODE_DATA(nid)); + DBGDCONT(" DISCONTIG: NODE_DATA(%d)->bdata is at 0x%p\n", nid, NODE_DATA(nid)->bdata); + + /* Find the bounds of kernel memory. */ + start_kernel_pfn = PFN_DOWN(KERNEL_START_PHYS); + end_kernel_pfn = PFN_UP(virt_to_phys(kernel_end)); + bootmap_start = -1; + + if (!nid && (node_max_pfn < end_kernel_pfn || node_min_pfn > start_kernel_pfn)) + panic("kernel loaded out of ram"); + + /* Zone start phys-addr must be 2^(MAX_ORDER-1) aligned. + Note that we round this down, not up - node memory + has much larger alignment than 8Mb, so it's safe. */ + node_min_pfn &= ~((1UL << (MAX_ORDER-1))-1); + + /* We need to know how many physically contiguous pages + we'll need for the bootmap. */ + bootmap_pages = bootmem_bootmap_pages(node_max_pfn-node_min_pfn); + + /* Now find a good region where to allocate the bootmap. */ + for_each_mem_cluster(memdesc, cluster, i) { + if (cluster->usage & 3) + continue; + + start = cluster->start_pfn; + end = start + cluster->numpages; + + if (start >= node_max_pfn || end <= node_min_pfn) + continue; + + if (end > node_max_pfn) + end = node_max_pfn; + if (start < node_min_pfn) + start = node_min_pfn; + + if (start < start_kernel_pfn) { + if (end > end_kernel_pfn + && end - end_kernel_pfn >= bootmap_pages) { + bootmap_start = end_kernel_pfn; + break; + } else if (end > start_kernel_pfn) + end = start_kernel_pfn; + } else if (start < end_kernel_pfn) + start = end_kernel_pfn; + if (end - start >= bootmap_pages) { + bootmap_start = start; + break; + } + } + + if (bootmap_start == -1) + panic("couldn't find a contiguous place for the bootmap"); + + /* Allocate the bootmap and mark the whole MM as reserved. */ + bootmap_size = init_bootmem_node(NODE_DATA(nid), bootmap_start, + node_min_pfn, node_max_pfn); + DBGDCONT(" bootmap_start %lu, bootmap_size %lu, bootmap_pages %lu\n", + bootmap_start, bootmap_size, bootmap_pages); + + /* Mark the free regions. */ + for_each_mem_cluster(memdesc, cluster, i) { + if (cluster->usage & 3) + continue; + + start = cluster->start_pfn; + end = cluster->start_pfn + cluster->numpages; + + if (start >= node_max_pfn || end <= node_min_pfn) + continue; + + if (end > node_max_pfn) + end = node_max_pfn; + if (start < node_min_pfn) + start = node_min_pfn; + + if (start < start_kernel_pfn) { + if (end > end_kernel_pfn) { + free_bootmem_node(NODE_DATA(nid), PFN_PHYS(start), + (PFN_PHYS(start_kernel_pfn) + - PFN_PHYS(start))); + printk(" freeing pages %ld:%ld\n", + start, start_kernel_pfn); + start = end_kernel_pfn; + } else if (end > start_kernel_pfn) + end = start_kernel_pfn; + } else if (start < end_kernel_pfn) + start = end_kernel_pfn; + if (start >= end) + continue; + + free_bootmem_node(NODE_DATA(nid), PFN_PHYS(start), PFN_PHYS(end) - PFN_PHYS(start)); + printk(" freeing pages %ld:%ld\n", start, end); + } + + /* Reserve the bootmap memory. */ + reserve_bootmem_node(NODE_DATA(nid), PFN_PHYS(bootmap_start), + bootmap_size, BOOTMEM_DEFAULT); + printk(" reserving pages %ld:%ld\n", bootmap_start, bootmap_start+PFN_UP(bootmap_size)); + + node_set_online(nid); +} + +void __init +setup_memory(void *kernel_end) +{ + int nid; + + show_mem_layout(); + + nodes_clear(node_online_map); + + min_low_pfn = ~0UL; + max_low_pfn = 0UL; + for (nid = 0; nid < MAX_NUMNODES; nid++) + setup_memory_node(nid, kernel_end); + +#ifdef CONFIG_BLK_DEV_INITRD + initrd_start = INITRD_START; + if (initrd_start) { + extern void *move_initrd(unsigned long); + + initrd_end = initrd_start+INITRD_SIZE; + printk("Initial ramdisk at: 0x%p (%lu bytes)\n", + (void *) initrd_start, INITRD_SIZE); + + if ((void *)initrd_end > phys_to_virt(PFN_PHYS(max_low_pfn))) { + if (!move_initrd(PFN_PHYS(max_low_pfn))) + printk("initrd extends beyond end of memory " + "(0x%08lx > 0x%p)\ndisabling initrd\n", + initrd_end, + phys_to_virt(PFN_PHYS(max_low_pfn))); + } else { + nid = kvaddr_to_nid(initrd_start); + reserve_bootmem_node(NODE_DATA(nid), + virt_to_phys((void *)initrd_start), + INITRD_SIZE, BOOTMEM_DEFAULT); + } + } +#endif /* CONFIG_BLK_DEV_INITRD */ +} + +void __init paging_init(void) +{ + unsigned int nid; + unsigned long zones_size[MAX_NR_ZONES] = {0, }; + unsigned long dma_local_pfn; + + /* + * The old global MAX_DMA_ADDRESS per-arch API doesn't fit + * in the NUMA model, for now we convert it to a pfn and + * we interpret this pfn as a local per-node information. + * This issue isn't very important since none of these machines + * have legacy ISA slots anyways. + */ + dma_local_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; + + for_each_online_node(nid) { + bootmem_data_t *bdata = &bootmem_node_data[nid]; + unsigned long start_pfn = bdata->node_min_pfn; + unsigned long end_pfn = bdata->node_low_pfn; + + if (dma_local_pfn >= end_pfn - start_pfn) + zones_size[ZONE_DMA] = end_pfn - start_pfn; + else { + zones_size[ZONE_DMA] = dma_local_pfn; + zones_size[ZONE_NORMAL] = (end_pfn - start_pfn) - dma_local_pfn; + } + node_set_state(nid, N_NORMAL_MEMORY); + free_area_init_node(nid, zones_size, start_pfn, NULL); + } + + /* Initialize the kernel's ZERO_PGE. */ + memset((void *)ZERO_PGE, 0, PAGE_SIZE); +} + +void __init mem_init(void) +{ + unsigned long codesize, reservedpages, datasize, initsize, pfn; + extern int page_is_ram(unsigned long) __init; + extern char _text, _etext, _data, _edata; + extern char __init_begin, __init_end; + unsigned long nid, i; + high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); + + reservedpages = 0; + for_each_online_node(nid) { + /* + * This will free up the bootmem, ie, slot 0 memory + */ + totalram_pages += free_all_bootmem_node(NODE_DATA(nid)); + + pfn = NODE_DATA(nid)->node_start_pfn; + for (i = 0; i < node_spanned_pages(nid); i++, pfn++) + if (page_is_ram(pfn) && + PageReserved(nid_page_nr(nid, i))) + reservedpages++; + } + + codesize = (unsigned long) &_etext - (unsigned long) &_text; + datasize = (unsigned long) &_edata - (unsigned long) &_data; + initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; + + printk("Memory: %luk/%luk available (%luk kernel code, %luk reserved, " + "%luk data, %luk init)\n", + nr_free_pages() << (PAGE_SHIFT-10), + num_physpages << (PAGE_SHIFT-10), + codesize >> 10, + reservedpages << (PAGE_SHIFT-10), + datasize >> 10, + initsize >> 10); +#if 0 + mem_stress(); +#endif +} diff --git a/arch/alpha/oprofile/Makefile b/arch/alpha/oprofile/Makefile new file mode 100644 index 00000000..3473de75 --- /dev/null +++ b/arch/alpha/oprofile/Makefile @@ -0,0 +1,19 @@ +ccflags-y := -Werror -Wno-sign-compare + +obj-$(CONFIG_OPROFILE) += oprofile.o + +DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ + oprof.o cpu_buffer.o buffer_sync.o \ + event_buffer.o oprofile_files.o \ + oprofilefs.o oprofile_stats.o \ + timer_int.o ) + +oprofile-y := $(DRIVER_OBJS) common.o +oprofile-$(CONFIG_ALPHA_GENERIC) += op_model_ev4.o \ + op_model_ev5.o \ + op_model_ev6.o \ + op_model_ev67.o +oprofile-$(CONFIG_ALPHA_EV4) += op_model_ev4.o +oprofile-$(CONFIG_ALPHA_EV5) += op_model_ev5.o +oprofile-$(CONFIG_ALPHA_EV6) += op_model_ev6.o \ + op_model_ev67.o diff --git a/arch/alpha/oprofile/common.c b/arch/alpha/oprofile/common.c new file mode 100644 index 00000000..a0a5d27a --- /dev/null +++ b/arch/alpha/oprofile/common.c @@ -0,0 +1,188 @@ +/** + * @file arch/alpha/oprofile/common.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author Richard Henderson <rth@twiddle.net> + */ + +#include <linux/oprofile.h> +#include <linux/init.h> +#include <linux/smp.h> +#include <linux/errno.h> +#include <asm/ptrace.h> + +#include "op_impl.h" + +extern struct op_axp_model op_model_ev4 __attribute__((weak)); +extern struct op_axp_model op_model_ev5 __attribute__((weak)); +extern struct op_axp_model op_model_pca56 __attribute__((weak)); +extern struct op_axp_model op_model_ev6 __attribute__((weak)); +extern struct op_axp_model op_model_ev67 __attribute__((weak)); + +static struct op_axp_model *model; + +extern void (*perf_irq)(unsigned long, struct pt_regs *); +static void (*save_perf_irq)(unsigned long, struct pt_regs *); + +static struct op_counter_config ctr[20]; +static struct op_system_config sys; +static struct op_register_config reg; + +/* Called from do_entInt to handle the performance monitor interrupt. */ + +static void +op_handle_interrupt(unsigned long which, struct pt_regs *regs) +{ + model->handle_interrupt(which, regs, ctr); + + /* If the user has selected an interrupt frequency that is + not exactly the width of the counter, write a new value + into the counter such that it'll overflow after N more + events. */ + if ((reg.need_reset >> which) & 1) + model->reset_ctr(®, which); +} + +static int +op_axp_setup(void) +{ + unsigned long i, e; + + /* Install our interrupt handler into the existing hook. */ + save_perf_irq = perf_irq; + perf_irq = op_handle_interrupt; + + /* Compute the mask of enabled counters. */ + for (i = e = 0; i < model->num_counters; ++i) + if (ctr[i].enabled) + e |= 1 << i; + reg.enable = e; + + /* Pre-compute the values to stuff in the hardware registers. */ + model->reg_setup(®, ctr, &sys); + + /* Configure the registers on all cpus. */ + (void)smp_call_function(model->cpu_setup, ®, 1); + model->cpu_setup(®); + return 0; +} + +static void +op_axp_shutdown(void) +{ + /* Remove our interrupt handler. We may be removing this module. */ + perf_irq = save_perf_irq; +} + +static void +op_axp_cpu_start(void *dummy) +{ + wrperfmon(1, reg.enable); +} + +static int +op_axp_start(void) +{ + (void)smp_call_function(op_axp_cpu_start, NULL, 1); + op_axp_cpu_start(NULL); + return 0; +} + +static inline void +op_axp_cpu_stop(void *dummy) +{ + /* Disable performance monitoring for all counters. */ + wrperfmon(0, -1); +} + +static void +op_axp_stop(void) +{ + (void)smp_call_function(op_axp_cpu_stop, NULL, 1); + op_axp_cpu_stop(NULL); +} + +static int +op_axp_create_files(struct super_block *sb, struct dentry *root) +{ + int i; + + for (i = 0; i < model->num_counters; ++i) { + struct dentry *dir; + char buf[4]; + + snprintf(buf, sizeof buf, "%d", i); + dir = oprofilefs_mkdir(sb, root, buf); + + oprofilefs_create_ulong(sb, dir, "enabled", &ctr[i].enabled); + oprofilefs_create_ulong(sb, dir, "event", &ctr[i].event); + oprofilefs_create_ulong(sb, dir, "count", &ctr[i].count); + /* Dummies. */ + oprofilefs_create_ulong(sb, dir, "kernel", &ctr[i].kernel); + oprofilefs_create_ulong(sb, dir, "user", &ctr[i].user); + oprofilefs_create_ulong(sb, dir, "unit_mask", &ctr[i].unit_mask); + } + + if (model->can_set_proc_mode) { + oprofilefs_create_ulong(sb, root, "enable_pal", + &sys.enable_pal); + oprofilefs_create_ulong(sb, root, "enable_kernel", + &sys.enable_kernel); + oprofilefs_create_ulong(sb, root, "enable_user", + &sys.enable_user); + } + + return 0; +} + +int __init +oprofile_arch_init(struct oprofile_operations *ops) +{ + struct op_axp_model *lmodel = NULL; + + switch (implver()) { + case IMPLVER_EV4: + lmodel = &op_model_ev4; + break; + case IMPLVER_EV5: + /* 21164PC has a slightly different set of events. + Recognize the chip by the presence of the MAX insns. */ + if (!amask(AMASK_MAX)) + lmodel = &op_model_pca56; + else + lmodel = &op_model_ev5; + break; + case IMPLVER_EV6: + /* 21264A supports ProfileMe. + Recognize the chip by the presence of the CIX insns. */ + if (!amask(AMASK_CIX)) + lmodel = &op_model_ev67; + else + lmodel = &op_model_ev6; + break; + } + + if (!lmodel) + return -ENODEV; + model = lmodel; + + ops->create_files = op_axp_create_files; + ops->setup = op_axp_setup; + ops->shutdown = op_axp_shutdown; + ops->start = op_axp_start; + ops->stop = op_axp_stop; + ops->cpu_type = lmodel->cpu_type; + + printk(KERN_INFO "oprofile: using %s performance monitoring.\n", + lmodel->cpu_type); + + return 0; +} + + +void +oprofile_arch_exit(void) +{ +} diff --git a/arch/alpha/oprofile/op_impl.h b/arch/alpha/oprofile/op_impl.h new file mode 100644 index 00000000..b2b87ae9 --- /dev/null +++ b/arch/alpha/oprofile/op_impl.h @@ -0,0 +1,55 @@ +/** + * @file arch/alpha/oprofile/op_impl.h + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author Richard Henderson <rth@twiddle.net> + */ + +#ifndef OP_IMPL_H +#define OP_IMPL_H 1 + +/* Per-counter configuration as set via oprofilefs. */ +struct op_counter_config { + unsigned long enabled; + unsigned long event; + unsigned long count; + /* Dummies because I am too lazy to hack the userspace tools. */ + unsigned long kernel; + unsigned long user; + unsigned long unit_mask; +}; + +/* System-wide configuration as set via oprofilefs. */ +struct op_system_config { + unsigned long enable_pal; + unsigned long enable_kernel; + unsigned long enable_user; +}; + +/* Cached values for the various performance monitoring registers. */ +struct op_register_config { + unsigned long enable; + unsigned long mux_select; + unsigned long proc_mode; + unsigned long freq; + unsigned long reset_values; + unsigned long need_reset; +}; + +/* Per-architecture configuration and hooks. */ +struct op_axp_model { + void (*reg_setup) (struct op_register_config *, + struct op_counter_config *, + struct op_system_config *); + void (*cpu_setup) (void *); + void (*reset_ctr) (struct op_register_config *, unsigned long); + void (*handle_interrupt) (unsigned long, struct pt_regs *, + struct op_counter_config *); + char *cpu_type; + unsigned char num_counters; + unsigned char can_set_proc_mode; +}; + +#endif diff --git a/arch/alpha/oprofile/op_model_ev4.c b/arch/alpha/oprofile/op_model_ev4.c new file mode 100644 index 00000000..18aa9b4f --- /dev/null +++ b/arch/alpha/oprofile/op_model_ev4.c @@ -0,0 +1,115 @@ +/** + * @file arch/alpha/oprofile/op_model_ev4.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author Richard Henderson <rth@twiddle.net> + */ + +#include <linux/oprofile.h> +#include <linux/init.h> +#include <linux/smp.h> +#include <asm/ptrace.h> + +#include "op_impl.h" + + +/* Compute all of the registers in preparation for enabling profiling. */ + +static void +ev4_reg_setup(struct op_register_config *reg, + struct op_counter_config *ctr, + struct op_system_config *sys) +{ + unsigned long ctl = 0, count, hilo; + + /* Select desired events. We've mapped the event numbers + such that they fit directly into the event selection fields. + + Note that there is no "off" setting. In both cases we select + the EXTERNAL event source, hoping that it'll be the lowest + frequency, and set the frequency counter to LOW. The interrupts + for these "disabled" counter overflows are ignored by the + interrupt handler. + + This is most irritating, because the hardware *can* enable and + disable the interrupts for these counters independently, but the + wrperfmon interface doesn't allow it. */ + + ctl |= (ctr[0].enabled ? ctr[0].event << 8 : 14 << 8); + ctl |= (ctr[1].enabled ? (ctr[1].event - 16) << 32 : 7ul << 32); + + /* EV4 can not read or write its counter registers. The only + thing one can do at all is see if you overflow and get an + interrupt. We can set the width of the counters, to some + extent. Take the interrupt count selected by the user, + map it onto one of the possible values, and write it back. */ + + count = ctr[0].count; + if (count <= 4096) + count = 4096, hilo = 1; + else + count = 65536, hilo = 0; + ctr[0].count = count; + ctl |= (ctr[0].enabled && hilo) << 3; + + count = ctr[1].count; + if (count <= 256) + count = 256, hilo = 1; + else + count = 4096, hilo = 0; + ctr[1].count = count; + ctl |= (ctr[1].enabled && hilo); + + reg->mux_select = ctl; + + /* Select performance monitoring options. */ + /* ??? Need to come up with some mechanism to trace only + selected processes. EV4 does not have a mechanism to + select kernel or user mode only. For now, enable always. */ + reg->proc_mode = 0; + + /* Frequency is folded into mux_select for EV4. */ + reg->freq = 0; + + /* See above regarding no writes. */ + reg->reset_values = 0; + reg->need_reset = 0; + +} + +/* Program all of the registers in preparation for enabling profiling. */ + +static void +ev4_cpu_setup(void *x) +{ + struct op_register_config *reg = x; + + wrperfmon(2, reg->mux_select); + wrperfmon(3, reg->proc_mode); +} + +static void +ev4_handle_interrupt(unsigned long which, struct pt_regs *regs, + struct op_counter_config *ctr) +{ + /* EV4 can't properly disable counters individually. + Discard "disabled" events now. */ + if (!ctr[which].enabled) + return; + + /* Record the sample. */ + oprofile_add_sample(regs, which); +} + + +struct op_axp_model op_model_ev4 = { + .reg_setup = ev4_reg_setup, + .cpu_setup = ev4_cpu_setup, + .reset_ctr = NULL, + .handle_interrupt = ev4_handle_interrupt, + .cpu_type = "alpha/ev4", + .num_counters = 2, + .can_set_proc_mode = 0, +}; diff --git a/arch/alpha/oprofile/op_model_ev5.c b/arch/alpha/oprofile/op_model_ev5.c new file mode 100644 index 00000000..c32f8a0a --- /dev/null +++ b/arch/alpha/oprofile/op_model_ev5.c @@ -0,0 +1,210 @@ +/** + * @file arch/alpha/oprofile/op_model_ev5.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author Richard Henderson <rth@twiddle.net> + */ + +#include <linux/oprofile.h> +#include <linux/init.h> +#include <linux/smp.h> +#include <asm/ptrace.h> + +#include "op_impl.h" + + +/* Compute all of the registers in preparation for enabling profiling. + + The 21164 (EV5) and 21164PC (PCA65) vary in the bit placement and + meaning of the "CBOX" events. Given that we don't care about meaning + at this point, arrange for the difference in bit placement to be + handled by common code. */ + +static void +common_reg_setup(struct op_register_config *reg, + struct op_counter_config *ctr, + struct op_system_config *sys, + int cbox1_ofs, int cbox2_ofs) +{ + int i, ctl, reset, need_reset; + + /* Select desired events. The event numbers are selected such + that they map directly into the event selection fields: + + PCSEL0: 0, 1 + PCSEL1: 24-39 + CBOX1: 40-47 + PCSEL2: 48-63 + CBOX2: 64-71 + + There are two special cases, in that CYCLES can be measured + on PCSEL[02], and SCACHE_WRITE can be measured on CBOX[12]. + These event numbers are canonicalizes to their first appearance. */ + + ctl = 0; + for (i = 0; i < 3; ++i) { + unsigned long event = ctr[i].event; + if (!ctr[i].enabled) + continue; + + /* Remap the duplicate events, as described above. */ + if (i == 2) { + if (event == 0) + event = 12+48; + else if (event == 2+41) + event = 4+65; + } + + /* Convert the event numbers onto mux_select bit mask. */ + if (event < 2) + ctl |= event << 31; + else if (event < 24) + /* error */; + else if (event < 40) + ctl |= (event - 24) << 4; + else if (event < 48) + ctl |= (event - 40) << cbox1_ofs | 15 << 4; + else if (event < 64) + ctl |= event - 48; + else if (event < 72) + ctl |= (event - 64) << cbox2_ofs | 15; + } + reg->mux_select = ctl; + + /* Select processor mode. */ + /* ??? Need to come up with some mechanism to trace only selected + processes. For now select from pal, kernel and user mode. */ + ctl = 0; + ctl |= !sys->enable_pal << 9; + ctl |= !sys->enable_kernel << 8; + ctl |= !sys->enable_user << 30; + reg->proc_mode = ctl; + + /* Select interrupt frequencies. Take the interrupt count selected + by the user, and map it onto one of the possible counter widths. + If the user value is in between, compute a value to which the + counter is reset at each interrupt. */ + + ctl = reset = need_reset = 0; + for (i = 0; i < 3; ++i) { + unsigned long max, hilo, count = ctr[i].count; + if (!ctr[i].enabled) + continue; + + if (count <= 256) + count = 256, hilo = 3, max = 256; + else { + max = (i == 2 ? 16384 : 65536); + hilo = 2; + if (count > max) + count = max; + } + ctr[i].count = count; + + ctl |= hilo << (8 - i*2); + reset |= (max - count) << (48 - 16*i); + if (count != max) + need_reset |= 1 << i; + } + reg->freq = ctl; + reg->reset_values = reset; + reg->need_reset = need_reset; +} + +static void +ev5_reg_setup(struct op_register_config *reg, + struct op_counter_config *ctr, + struct op_system_config *sys) +{ + common_reg_setup(reg, ctr, sys, 19, 22); +} + +static void +pca56_reg_setup(struct op_register_config *reg, + struct op_counter_config *ctr, + struct op_system_config *sys) +{ + common_reg_setup(reg, ctr, sys, 8, 11); +} + +/* Program all of the registers in preparation for enabling profiling. */ + +static void +ev5_cpu_setup (void *x) +{ + struct op_register_config *reg = x; + + wrperfmon(2, reg->mux_select); + wrperfmon(3, reg->proc_mode); + wrperfmon(4, reg->freq); + wrperfmon(6, reg->reset_values); +} + +/* CTR is a counter for which the user has requested an interrupt count + in between one of the widths selectable in hardware. Reset the count + for CTR to the value stored in REG->RESET_VALUES. + + For EV5, this means disabling profiling, reading the current values, + masking in the value for the desired register, writing, then turning + profiling back on. + + This can be streamlined if profiling is only enabled for user mode. + In that case we know that the counters are not currently incrementing + (due to being in kernel mode). */ + +static void +ev5_reset_ctr(struct op_register_config *reg, unsigned long ctr) +{ + unsigned long values, mask, not_pk, reset_values; + + mask = (ctr == 0 ? 0xfffful << 48 + : ctr == 1 ? 0xfffful << 32 + : 0x3fff << 16); + + not_pk = 1 << 9 | 1 << 8; + + reset_values = reg->reset_values; + + if ((reg->proc_mode & not_pk) == not_pk) { + values = wrperfmon(5, 0); + values = (reset_values & mask) | (values & ~mask & -2); + wrperfmon(6, values); + } else { + wrperfmon(0, -1); + values = wrperfmon(5, 0); + values = (reset_values & mask) | (values & ~mask & -2); + wrperfmon(6, values); + wrperfmon(1, reg->enable); + } +} + +static void +ev5_handle_interrupt(unsigned long which, struct pt_regs *regs, + struct op_counter_config *ctr) +{ + /* Record the sample. */ + oprofile_add_sample(regs, which); +} + + +struct op_axp_model op_model_ev5 = { + .reg_setup = ev5_reg_setup, + .cpu_setup = ev5_cpu_setup, + .reset_ctr = ev5_reset_ctr, + .handle_interrupt = ev5_handle_interrupt, + .cpu_type = "alpha/ev5", + .num_counters = 3, + .can_set_proc_mode = 1, +}; + +struct op_axp_model op_model_pca56 = { + .reg_setup = pca56_reg_setup, + .cpu_setup = ev5_cpu_setup, + .reset_ctr = ev5_reset_ctr, + .handle_interrupt = ev5_handle_interrupt, + .cpu_type = "alpha/pca56", + .num_counters = 3, + .can_set_proc_mode = 1, +}; diff --git a/arch/alpha/oprofile/op_model_ev6.c b/arch/alpha/oprofile/op_model_ev6.c new file mode 100644 index 00000000..1c84cc25 --- /dev/null +++ b/arch/alpha/oprofile/op_model_ev6.c @@ -0,0 +1,102 @@ +/** + * @file arch/alpha/oprofile/op_model_ev6.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author Richard Henderson <rth@twiddle.net> + */ + +#include <linux/oprofile.h> +#include <linux/init.h> +#include <linux/smp.h> +#include <asm/ptrace.h> + +#include "op_impl.h" + + +/* Compute all of the registers in preparation for enabling profiling. */ + +static void +ev6_reg_setup(struct op_register_config *reg, + struct op_counter_config *ctr, + struct op_system_config *sys) +{ + unsigned long ctl, reset, need_reset, i; + + /* Select desired events. We've mapped the event numbers + such that they fit directly into the event selection fields. */ + ctl = 0; + if (ctr[0].enabled && ctr[0].event) + ctl |= (ctr[0].event & 1) << 4; + if (ctr[1].enabled) + ctl |= (ctr[1].event - 2) & 15; + reg->mux_select = ctl; + + /* Select logging options. */ + /* ??? Need to come up with some mechanism to trace only + selected processes. EV6 does not have a mechanism to + select kernel or user mode only. For now, enable always. */ + reg->proc_mode = 0; + + /* EV6 cannot change the width of the counters as with the + other implementations. But fortunately, we can write to + the counters and set the value such that it will overflow + at the right time. */ + reset = need_reset = 0; + for (i = 0; i < 2; ++i) { + unsigned long count = ctr[i].count; + if (!ctr[i].enabled) + continue; + + if (count > 0x100000) + count = 0x100000; + ctr[i].count = count; + reset |= (0x100000 - count) << (i ? 6 : 28); + if (count != 0x100000) + need_reset |= 1 << i; + } + reg->reset_values = reset; + reg->need_reset = need_reset; +} + +/* Program all of the registers in preparation for enabling profiling. */ + +static void +ev6_cpu_setup (void *x) +{ + struct op_register_config *reg = x; + + wrperfmon(2, reg->mux_select); + wrperfmon(3, reg->proc_mode); + wrperfmon(6, reg->reset_values | 3); +} + +/* CTR is a counter for which the user has requested an interrupt count + in between one of the widths selectable in hardware. Reset the count + for CTR to the value stored in REG->RESET_VALUES. */ + +static void +ev6_reset_ctr(struct op_register_config *reg, unsigned long ctr) +{ + wrperfmon(6, reg->reset_values | (1 << ctr)); +} + +static void +ev6_handle_interrupt(unsigned long which, struct pt_regs *regs, + struct op_counter_config *ctr) +{ + /* Record the sample. */ + oprofile_add_sample(regs, which); +} + + +struct op_axp_model op_model_ev6 = { + .reg_setup = ev6_reg_setup, + .cpu_setup = ev6_cpu_setup, + .reset_ctr = ev6_reset_ctr, + .handle_interrupt = ev6_handle_interrupt, + .cpu_type = "alpha/ev6", + .num_counters = 2, + .can_set_proc_mode = 0, +}; diff --git a/arch/alpha/oprofile/op_model_ev67.c b/arch/alpha/oprofile/op_model_ev67.c new file mode 100644 index 00000000..34a57a12 --- /dev/null +++ b/arch/alpha/oprofile/op_model_ev67.c @@ -0,0 +1,262 @@ +/** + * @file arch/alpha/oprofile/op_model_ev67.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author Richard Henderson <rth@twiddle.net> + * @author Falk Hueffner <falk@debian.org> + */ + +#include <linux/oprofile.h> +#include <linux/init.h> +#include <linux/smp.h> +#include <asm/ptrace.h> + +#include "op_impl.h" + + +/* Compute all of the registers in preparation for enabling profiling. */ + +static void +ev67_reg_setup(struct op_register_config *reg, + struct op_counter_config *ctr, + struct op_system_config *sys) +{ + unsigned long ctl, reset, need_reset, i; + + /* Select desired events. */ + ctl = 1UL << 4; /* Enable ProfileMe mode. */ + + /* The event numbers are chosen so we can use them directly if + PCTR1 is enabled. */ + if (ctr[1].enabled) { + ctl |= (ctr[1].event & 3) << 2; + } else { + if (ctr[0].event == 0) /* cycles */ + ctl |= 1UL << 2; + } + reg->mux_select = ctl; + + /* Select logging options. */ + /* ??? Need to come up with some mechanism to trace only + selected processes. EV67 does not have a mechanism to + select kernel or user mode only. For now, enable always. */ + reg->proc_mode = 0; + + /* EV67 cannot change the width of the counters as with the + other implementations. But fortunately, we can write to + the counters and set the value such that it will overflow + at the right time. */ + reset = need_reset = 0; + for (i = 0; i < 2; ++i) { + unsigned long count = ctr[i].count; + if (!ctr[i].enabled) + continue; + + if (count > 0x100000) + count = 0x100000; + ctr[i].count = count; + reset |= (0x100000 - count) << (i ? 6 : 28); + if (count != 0x100000) + need_reset |= 1 << i; + } + reg->reset_values = reset; + reg->need_reset = need_reset; +} + +/* Program all of the registers in preparation for enabling profiling. */ + +static void +ev67_cpu_setup (void *x) +{ + struct op_register_config *reg = x; + + wrperfmon(2, reg->mux_select); + wrperfmon(3, reg->proc_mode); + wrperfmon(6, reg->reset_values | 3); +} + +/* CTR is a counter for which the user has requested an interrupt count + in between one of the widths selectable in hardware. Reset the count + for CTR to the value stored in REG->RESET_VALUES. */ + +static void +ev67_reset_ctr(struct op_register_config *reg, unsigned long ctr) +{ + wrperfmon(6, reg->reset_values | (1 << ctr)); +} + +/* ProfileMe conditions which will show up as counters. We can also + detect the following, but it seems unlikely that anybody is + interested in counting them: + * Reset + * MT_FPCR (write to floating point control register) + * Arithmetic trap + * Dstream Fault + * Machine Check (ECC fault, etc.) + * OPCDEC (illegal opcode) + * Floating point disabled + * Differentiate between DTB single/double misses and 3 or 4 level + page tables + * Istream access violation + * Interrupt + * Icache Parity Error. + * Instruction killed (nop, trapb) + + Unfortunately, there seems to be no way to detect Dcache and Bcache + misses; the latter could be approximated by making the counter + count Bcache misses, but that is not precise. + + We model this as 20 counters: + * PCTR0 + * PCTR1 + * 9 ProfileMe events, induced by PCTR0 + * 9 ProfileMe events, induced by PCTR1 +*/ + +enum profileme_counters { + PM_STALLED, /* Stalled for at least one cycle + between the fetch and map stages */ + PM_TAKEN, /* Conditional branch taken */ + PM_MISPREDICT, /* Branch caused mispredict trap */ + PM_ITB_MISS, /* ITB miss */ + PM_DTB_MISS, /* DTB miss */ + PM_REPLAY, /* Replay trap */ + PM_LOAD_STORE, /* Load-store order trap */ + PM_ICACHE_MISS, /* Icache miss */ + PM_UNALIGNED, /* Unaligned Load/Store */ + PM_NUM_COUNTERS +}; + +static inline void +op_add_pm(unsigned long pc, int kern, unsigned long counter, + struct op_counter_config *ctr, unsigned long event) +{ + unsigned long fake_counter = 2 + event; + if (counter == 1) + fake_counter += PM_NUM_COUNTERS; + if (ctr[fake_counter].enabled) + oprofile_add_pc(pc, kern, fake_counter); +} + +static void +ev67_handle_interrupt(unsigned long which, struct pt_regs *regs, + struct op_counter_config *ctr) +{ + unsigned long pmpc, pctr_ctl; + int kern = !user_mode(regs); + int mispredict = 0; + union { + unsigned long v; + struct { + unsigned reserved: 30; /* 0-29 */ + unsigned overcount: 3; /* 30-32 */ + unsigned icache_miss: 1; /* 33 */ + unsigned trap_type: 4; /* 34-37 */ + unsigned load_store: 1; /* 38 */ + unsigned trap: 1; /* 39 */ + unsigned mispredict: 1; /* 40 */ + } fields; + } i_stat; + + enum trap_types { + TRAP_REPLAY, + TRAP_INVALID0, + TRAP_DTB_DOUBLE_MISS_3, + TRAP_DTB_DOUBLE_MISS_4, + TRAP_FP_DISABLED, + TRAP_UNALIGNED, + TRAP_DTB_SINGLE_MISS, + TRAP_DSTREAM_FAULT, + TRAP_OPCDEC, + TRAP_INVALID1, + TRAP_MACHINE_CHECK, + TRAP_INVALID2, + TRAP_ARITHMETIC, + TRAP_INVALID3, + TRAP_MT_FPCR, + TRAP_RESET + }; + + pmpc = wrperfmon(9, 0); + /* ??? Don't know how to handle physical-mode PALcode address. */ + if (pmpc & 1) + return; + pmpc &= ~2; /* clear reserved bit */ + + i_stat.v = wrperfmon(8, 0); + if (i_stat.fields.trap) { + switch (i_stat.fields.trap_type) { + case TRAP_INVALID1: + case TRAP_INVALID2: + case TRAP_INVALID3: + /* Pipeline redirection occurred. PMPC points + to PALcode. Recognize ITB miss by PALcode + offset address, and get actual PC from + EXC_ADDR. */ + oprofile_add_pc(regs->pc, kern, which); + if ((pmpc & ((1 << 15) - 1)) == 581) + op_add_pm(regs->pc, kern, which, + ctr, PM_ITB_MISS); + /* Most other bit and counter values will be + those for the first instruction in the + fault handler, so we're done. */ + return; + case TRAP_REPLAY: + op_add_pm(pmpc, kern, which, ctr, + (i_stat.fields.load_store + ? PM_LOAD_STORE : PM_REPLAY)); + break; + case TRAP_DTB_DOUBLE_MISS_3: + case TRAP_DTB_DOUBLE_MISS_4: + case TRAP_DTB_SINGLE_MISS: + op_add_pm(pmpc, kern, which, ctr, PM_DTB_MISS); + break; + case TRAP_UNALIGNED: + op_add_pm(pmpc, kern, which, ctr, PM_UNALIGNED); + break; + case TRAP_INVALID0: + case TRAP_FP_DISABLED: + case TRAP_DSTREAM_FAULT: + case TRAP_OPCDEC: + case TRAP_MACHINE_CHECK: + case TRAP_ARITHMETIC: + case TRAP_MT_FPCR: + case TRAP_RESET: + break; + } + + /* ??? JSR/JMP/RET/COR or HW_JSR/HW_JMP/HW_RET/HW_COR + mispredicts do not set this bit but can be + recognized by the presence of one of these + instructions at the PMPC location with bit 39 + set. */ + if (i_stat.fields.mispredict) { + mispredict = 1; + op_add_pm(pmpc, kern, which, ctr, PM_MISPREDICT); + } + } + + oprofile_add_pc(pmpc, kern, which); + + pctr_ctl = wrperfmon(5, 0); + if (pctr_ctl & (1UL << 27)) + op_add_pm(pmpc, kern, which, ctr, PM_STALLED); + + /* Unfortunately, TAK is undefined on mispredicted branches. + ??? It is also undefined for non-cbranch insns, should + check that. */ + if (!mispredict && pctr_ctl & (1UL << 0)) + op_add_pm(pmpc, kern, which, ctr, PM_TAKEN); +} + +struct op_axp_model op_model_ev67 = { + .reg_setup = ev67_reg_setup, + .cpu_setup = ev67_cpu_setup, + .reset_ctr = ev67_reset_ctr, + .handle_interrupt = ev67_handle_interrupt, + .cpu_type = "alpha/ev67", + .num_counters = 20, + .can_set_proc_mode = 0, +}; |